[InstCombine] Revert rL341831: relax one-use check in foldICmpAddConstant() (PR44100)

rL341831 moved one-use check higher up, restricting a few folds
that produced a single instruction from two instructions to the case
where the inner instruction would go away.

Original commit message:
> InstCombine: move hasOneUse check to the top of foldICmpAddConstant
>
> There were two combines not covered by the check before now,
> neither of which actually differed from normal in the benefit analysis.
>
> The most recent seems to be because it was just added at the top of the
> function (naturally). The older is from way back in 2008 (r46687)
> when we just didn't put those checks in so routinely, and has been
> diligently maintained since.

From the commit message alone, there doesn't seem to be a
deeper motivation, deeper problem that was trying to solve,
other than 'fixing the wrong one-use check'.

As i have briefly discusses in IRC with Tim, the original motivation
can no longer be recovered, too much time has passed.

However i believe that the original fold was doing the right thing,
we should be performing such a transformation even if the inner `add`
will not go away - that will still unchain the comparison from `add`,
it will no longer need to wait for `add` to compute.

Doing so doesn't seem to break any particular idioms,
as least as far as i can see.

References https://bugs.llvm.org/show_bug.cgi?id=44100
This commit is contained in:
Roman Lebedev 2019-12-02 17:34:55 +03:00
parent 241cbf201a
commit 0f22e783a0
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
5 changed files with 52 additions and 52 deletions

View File

@ -2566,9 +2566,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
Type *Ty = Add->getType();
CmpInst::Predicate Pred = Cmp.getPredicate();
if (!Add->hasOneUse())
return nullptr;
// If the add does not wrap, we can always adjust the compare by subtracting
// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
// are canonicalized to SGT/SLT/UGT/ULT.
@ -2602,6 +2599,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
}
if (!Add->hasOneUse())
return nullptr;
// X+C <u C2 -> (X & -C2) == C
// iff C & (C2-1) == 0
// C2 is a power of 2

View File

@ -77,14 +77,14 @@ define i1 @test4(i32 %a) {
define { i32, i1 } @test4multiuse(i32 %a) {
; CHECK-LABEL: @test4multiuse(
; CHECK-NEXT: [[B:%.*]] = add i32 [[A:%.*]], -2147483644
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[B]], -4
; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[A:%.*]], -2147483644
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[A]], 2147483640
; CHECK-NEXT: [[TMP:%.*]] = insertvalue { i32, i1 } undef, i32 [[B]], 0
; CHECK-NEXT: [[RES:%.*]] = insertvalue { i32, i1 } [[TMP]], i1 [[C]], 1
; CHECK-NEXT: ret { i32, i1 } [[RES]]
;
%b = add i32 %a, -2147483644
%b = add nsw i32 %a, -2147483644
%c = icmp slt i32 %b, -4
%tmp = insertvalue { i32, i1 } undef, i32 %b, 0

View File

@ -578,10 +578,10 @@ define void @test8() {
; PROLOG: %lcmp.mod = icmp eq i64
; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader
; PROLOG: latch.6:
; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8
; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
; PROLOG: latch.7
; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100
; PROLOG: latch.7:
; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8
; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp3, 92
; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa
bb:
br label %outerloop

View File

@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
@ -22,33 +22,33 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !3
; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i1> [[TMP11]], [[TMP16]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[PREDPHI]]
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP18]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !3
; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
; CHECK-NEXT: [[TMP12:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP10]], [[TMP15]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI]]
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP17]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@ -58,16 +58,16 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]]
; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]]
; CHECK: if.then:
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], 19
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], 19
; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]]
; CHECK: if.else:
; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP21]], 4
; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP20]], 4
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5
; CHECK-NEXT: br label [[IF_END14]]
; CHECK: if.end14:

View File

@ -19,7 +19,7 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1, !dbg !9
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg !9
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1, !dbg !9
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4, !dbg !9
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3, !dbg !9
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg !9
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1, !dbg !9
@ -29,34 +29,34 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP5]], !dbg !9
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[A]], !dbg !9
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]], !dbg !9
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, !dbg !9
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg !9
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg !9
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]], !dbg !9
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>*, !dbg !9
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4, !dbg !9, !alias.scope !10
; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, !dbg !9
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]], !dbg !9
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*, !dbg !9
; CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[TMP11]], align 4, !dbg !9, !alias.scope !13, !noalias !10
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]], !dbg !9
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*, !dbg !9
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !dbg !9, !alias.scope !10
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, !dbg !9
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]], !dbg !9
; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>*, !dbg !9
; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP10]], align 4, !dbg !9, !alias.scope !13, !noalias !10
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4, !dbg !9
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]], !dbg !9
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]], !dbg !9
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ], !dbg !9
; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !9
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], !dbg !9
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]], !dbg !9
; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg !9
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], 3.000000e+00, !dbg !9
; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg !9
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP12]], 3.000000e+00, !dbg !9
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]], !dbg !9
; CHECK-NEXT: store float [[MUL]], float* [[ARRAYIDX2]], align 4, !dbg !9
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1, !dbg !9