forked from OSchip/llvm-project
[InstCombine] improve demanded element analysis for vector insert-of-extract
InstCombine currently has odd rules for folding insert-extract chains to shuffles, so we miss collapsing seemingly simple cases as shown in the tests here. But poison makes this not quite as easy as we might have guessed. Alive2 tests to show the subtle difference (similar to the regression tests): https://alive2.llvm.org/ce/z/hp4hv3 (this is ok) https://alive2.llvm.org/ce/z/ehEWaN (poison leakage) SLP tends to create these patterns (as shown in the SLP tests), and this could help with solving PR16739. Differential Revision: https://reviews.llvm.org/D86460
This commit is contained in:
parent
7661c8c040
commit
557b890ff4
|
@ -1161,6 +1161,17 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
|
|||
return I->getOperand(0);
|
||||
}
|
||||
|
||||
// If we only demand the element that is being inserted and that element
|
||||
// was extracted from the same index in another vector with the same type,
|
||||
// replace this insert with that other vector.
|
||||
Value *Vec;
|
||||
if (PreInsertDemandedElts == 0 &&
|
||||
match(I->getOperand(1),
|
||||
m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
|
||||
Vec->getType() == I->getType()) {
|
||||
return Vec;
|
||||
}
|
||||
|
||||
// The inserted element is defined.
|
||||
UndefElts.clearBit(IdxNo);
|
||||
break;
|
||||
|
|
|
@ -749,9 +749,7 @@ define <4 x i8> @select_cond_(<4 x i8> %x, <4 x i8> %min, <4 x i1> %cmp, i1 %poi
|
|||
|
||||
define <4 x float> @ins_of_ext(<4 x float> %x, float %y) {
|
||||
; CHECK-LABEL: @ins_of_ext(
|
||||
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[E0]], i32 0
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[X:%.*]], float [[Y:%.*]], i32 1
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[Y]], i32 2
|
||||
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[Y]], i32 3
|
||||
; CHECK-NEXT: ret <4 x float> [[I3]]
|
||||
|
@ -766,11 +764,7 @@ define <4 x float> @ins_of_ext(<4 x float> %x, float %y) {
|
|||
|
||||
define <4 x float> @ins_of_ext_twice(<4 x float> %x, float %y) {
|
||||
; CHECK-LABEL: @ins_of_ext_twice(
|
||||
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[E0]], i32 0
|
||||
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x float> [[X]], i32 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[E1]], i32 1
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[Y:%.*]], i32 2
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[X:%.*]], float [[Y:%.*]], i32 2
|
||||
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[Y]], i32 3
|
||||
; CHECK-NEXT: ret <4 x float> [[I3]]
|
||||
;
|
||||
|
@ -783,6 +777,9 @@ define <4 x float> @ins_of_ext_twice(<4 x float> %x, float %y) {
|
|||
ret <4 x float> %i3
|
||||
}
|
||||
|
||||
; Negative test - element 3 of the result must be undef to be poison safe.
|
||||
; TODO: Could convert insert/extract to identity shuffle with undef mask elements.
|
||||
|
||||
define <4 x float> @ins_of_ext_wrong_demand(<4 x float> %x, float %y) {
|
||||
; CHECK-LABEL: @ins_of_ext_wrong_demand(
|
||||
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
|
||||
|
@ -798,6 +795,9 @@ define <4 x float> @ins_of_ext_wrong_demand(<4 x float> %x, float %y) {
|
|||
ret <4 x float> %i2
|
||||
}
|
||||
|
||||
; Negative test - can't replace i0 with x.
|
||||
; TODO: Could convert insert/extract to identity shuffle with undef mask elements.
|
||||
|
||||
define <4 x float> @ins_of_ext_wrong_type(<5 x float> %x, float %y) {
|
||||
; CHECK-LABEL: @ins_of_ext_wrong_type(
|
||||
; CHECK-NEXT: [[E0:%.*]] = extractelement <5 x float> [[X:%.*]], i32 0
|
||||
|
|
|
@ -51,13 +51,13 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
|
|||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[Z:%.*]], i32 3
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.cond.cleanup.loopexit:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP21:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP20:%.*]], i32 1
|
||||
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
||||
; CHECK: for.cond.cleanup:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP21]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP20]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
|
||||
|
@ -83,12 +83,11 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
|
|||
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
|
||||
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
||||
; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[ADD11]], i32 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
||||
; CHECK-NEXT: [[TMP20]] = add nsw <2 x i32> [[TMP18]], [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP21]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
|
||||
;
|
||||
entry:
|
||||
|
@ -157,13 +156,13 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
|
|||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.cond.cleanup.loopexit:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP18:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP17:%.*]], i32 1
|
||||
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
|
||||
; CHECK: for.cond.cleanup:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP4]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[T4]] to i64
|
||||
|
@ -188,12 +187,11 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
|
|||
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
|
||||
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
||||
; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[ADD11]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
|
||||
; CHECK-NEXT: [[TMP17]] = add nsw <2 x i32> [[TMP15]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP18]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -183,13 +183,11 @@ define void @vecload_vs_broadcast5(double * noalias %from, double * noalias %to,
|
|||
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[P]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
|
||||
; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]]
|
||||
; CHECK: ext:
|
||||
; CHECK-NEXT: ret void
|
||||
|
|
Loading…
Reference in New Issue