forked from OSchip/llvm-project
[SDAG] fold insert_vector_elt with undef index
Similar to: rG4c47617627fb This makes the DAG behavior consistent with IR's insertelement. https://bugs.llvm.org/show_bug.cgi?id=42689 I've tried to maintain test intent for AArch64 and WebAssembly by replacing undef index operands with something else.
This commit is contained in:
parent
f067dd839e
commit
85a2146c15
|
@ -16574,10 +16574,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
|||
SDValue EltNo = N->getOperand(2);
|
||||
SDLoc DL(N);
|
||||
|
||||
// If the inserted element is an UNDEF, just use the input vector.
|
||||
if (InVal.isUndef())
|
||||
return InVec;
|
||||
|
||||
EVT VT = InVec.getValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
|
|
|
@ -5506,6 +5506,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
|
||||
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
|
||||
return getUNDEF(VT);
|
||||
|
||||
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
|
||||
if (N3.isUndef())
|
||||
return getUNDEF(VT);
|
||||
|
||||
// If the inserted element is an UNDEF, just use the input vector.
|
||||
if (N2.isUndef())
|
||||
return N1;
|
||||
|
||||
break;
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
|
||||
; CHECK: fmla
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: fmla
|
||||
; CHECK-NEXT: fmla
|
||||
; CHECK-NEXT: fmla
|
||||
|
@ -16,21 +18,17 @@ target triple = "aarch64--linux-gnu"
|
|||
%Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 {
|
||||
define linkonce_odr void @func(%Struct* nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> undef, float undef, i32 0
|
||||
%1 = insertelement <4 x float> %0, float undef, i32 1
|
||||
%2 = insertelement <4 x float> %1, float undef, i32 2
|
||||
%3 = insertelement <4 x float> %2, float undef, i32 3
|
||||
%scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0
|
||||
%struct_ptr = bitcast float* %scevgep to i8*
|
||||
%vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr)
|
||||
%ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
|
||||
%fm1 = fmul <4 x float> %0, %ev1
|
||||
%av1 = fadd <4 x float> %1, %fm1
|
||||
%fm1 = fmul <4 x float> %f, %ev1
|
||||
%av1 = fadd <4 x float> %f, %fm1
|
||||
%ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
|
||||
%fm2 = fmul <4 x float> %2, %ev2
|
||||
%av2 = fadd <4 x float> %3, %fm2
|
||||
%fm2 = fmul <4 x float> %f, %ev2
|
||||
%av2 = fadd <4 x float> %f, %fm2
|
||||
%scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0
|
||||
%struct_ptr2 = bitcast float* %scevgep2 to i8*
|
||||
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2)
|
||||
|
@ -38,11 +36,11 @@ entry:
|
|||
%struct_ptr3 = bitcast float* %scevgep3 to i8*
|
||||
%vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3)
|
||||
%ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
|
||||
%fm3 = fmul <4 x float> %0, %ev3
|
||||
%av3 = fadd <4 x float> %1, %fm3
|
||||
%fm3 = fmul <4 x float> %f, %ev3
|
||||
%av3 = fadd <4 x float> %f, %fm3
|
||||
%ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
|
||||
%fm4 = fmul <4 x float> %2, %ev4
|
||||
%av4 = fadd <4 x float> %3, %fm4
|
||||
%fm4 = fmul <4 x float> %f, %ev4
|
||||
%av4 = fadd <4 x float> %f, %fm4
|
||||
%scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0
|
||||
%struct_ptr4 = bitcast float* %scevgep4 to i8*
|
||||
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4)
|
||||
|
|
|
@ -193,13 +193,13 @@ define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) {
|
|||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v16i8:
|
||||
; CHECK-LABEL: replace_zero_v16i8:
|
||||
; NO-SIMD128-NOT: i8x16
|
||||
; SIMD128-NEXT: .functype replace_undef_v16i8 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) {
|
||||
%res = insertelement <16 x i8> %v, i8 %x, i32 undef
|
||||
define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
|
||||
%res = insertelement <16 x i8> %v, i8 %x, i32 0
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
|
@ -464,13 +464,13 @@ define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) {
|
|||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v8i16:
|
||||
; CHECK-LABEL: replace_zero_v8i16:
|
||||
; NO-SIMD128-NOT: i16x8
|
||||
; SIMD128-NEXT: .functype replace_undef_v8i16 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) {
|
||||
%res = insertelement <8 x i16> %v, i16 %x, i32 undef
|
||||
define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
|
||||
%res = insertelement <8 x i16> %v, i16 %x, i32 0
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
|
@ -625,13 +625,13 @@ define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) {
|
|||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v4i32:
|
||||
; CHECK-LABEL: replace_zero_v4i32:
|
||||
; NO-SIMD128-NOT: i32x4
|
||||
; SIMD128-NEXT: .functype replace_undef_v4i32 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) {
|
||||
%res = insertelement <4 x i32> %v, i32 %x, i32 undef
|
||||
define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
|
||||
%res = insertelement <4 x i32> %v, i32 %x, i32 0
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
|
@ -781,14 +781,14 @@ define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) {
|
|||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v2i64:
|
||||
; CHECK-LABEL: replace_zero_v2i64:
|
||||
; NO-SIMD128-NOT: i64x2
|
||||
; SIMD128-VM-NOT: i64x2
|
||||
; SIMD128-NEXT: .functype replace_undef_v2i64 (v128, i64) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) {
|
||||
%res = insertelement <2 x i64> %v, i64 %x, i32 undef
|
||||
define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) {
|
||||
%res = insertelement <2 x i64> %v, i64 %x, i32 0
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
|
@ -931,13 +931,13 @@ define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) {
|
|||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v4f32:
|
||||
; CHECK-LABEL: replace_zero_v4f32:
|
||||
; NO-SIMD128-NOT: f32x4
|
||||
; SIMD128-NEXT: .functype replace_undef_v4f32 (v128, f32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) {
|
||||
%res = insertelement <4 x float> %v, float %x, i32 undef
|
||||
define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
|
||||
%res = insertelement <4 x float> %v, float %x, i32 0
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
@ -1086,14 +1086,14 @@ define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) {
|
|||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: replace_undef_v2f64:
|
||||
; CHECK-LABEL: replace_zero_v2f64:
|
||||
; NO-SIMD128-NOT: f64x2
|
||||
; SIMD128-VM-NOT: f64x2
|
||||
; SIMD128-NEXT: .functype replace_undef_v2f64 (v128, f64) -> (v128){{$}}
|
||||
; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) {
|
||||
%res = insertelement <2 x double> %v, double %x, i32 undef
|
||||
define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) {
|
||||
%res = insertelement <2 x double> %v, double %x, i32 0
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,22 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
|
||||
|
||||
define <16 x i8> @undef_index(i8 %x) nounwind {
|
||||
; ALL-LABEL: undef_index:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: retq
|
||||
%ins = insertelement <16 x i8> undef, i8 %x, i64 undef
|
||||
ret <16 x i8> %ins
|
||||
}
|
||||
|
||||
define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
|
||||
; ALL-LABEL: undef_scalar:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: retq
|
||||
%ins = insertelement <16 x i8> %x, i8 undef, i32 %index
|
||||
ret <16 x i8> %ins
|
||||
}
|
||||
|
||||
define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
|
||||
; SSE-LABEL: arg_i8_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
|
|
Loading…
Reference in New Issue