forked from OSchip/llvm-project
fix PR36582
The error occurs when reading i16 elements (as in the testcase) from a v8i8 with a pattern of <0,2,4,6>. As all the data in the vector is accessed, the operation is not a VUZP. The patch stops the pattern recognition of VUZP when EXTRACT_VECTOR_ELT has a different element type than BUILD_VECTOR. llvm-svn: 326722
This commit is contained in:
parent
77a719cb9e
commit
ac0bfb5938
|
@ -6777,11 +6777,17 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
const SDNode *N = V.getNode();
|
||||
if (!isa<ConstantSDNode>(N->getOperand(1)))
|
||||
break;
|
||||
SDValue N0 = N->getOperand(0);
|
||||
|
||||
// All elements are extracted from the same vector.
|
||||
if (!Vector)
|
||||
Vector = N->getOperand(0).getNode();
|
||||
else if (Vector != N->getOperand(0).getNode()) {
|
||||
if (!Vector) {
|
||||
Vector = N0.getNode();
|
||||
// Check that the type of EXTRACT_VECTOR_ELT matches the type of
|
||||
// BUILD_VECTOR.
|
||||
if (VT.getVectorElementType() !=
|
||||
N0.getValueType().getVectorElementType())
|
||||
break;
|
||||
} else if (Vector != N0.getNode()) {
|
||||
Odd = false;
|
||||
Even = false;
|
||||
break;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
; CHECK-LABEL: fun1:
|
||||
; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK-NOT: mov
|
||||
|
@ -48,4 +50,15 @@ entry:
|
|||
ret i32 undef
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)
|
||||
; CHECK-LABEL: pr36582:
|
||||
; Check that this does not ICE.
|
||||
define void @pr36582(i8* %p1, i32* %p2) {
|
||||
entry:
|
||||
%x = bitcast i8* %p1 to <8 x i8>*
|
||||
%wide.vec = load <8 x i8>, <8 x i8>* %x, align 1
|
||||
%strided.vec = shufflevector <8 x i8> %wide.vec, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%y = zext <4 x i8> %strided.vec to <4 x i32>
|
||||
%z = bitcast i32* %p2 to <4 x i32>*
|
||||
store <4 x i32> %y, <4 x i32>* %z, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue