Do actual DCE in LoopUnroll (try 4)

Turns out simplifyLoopIVs sometimes returns a non-dead instruction in it's DeadInsts out param.  I had done a bit of NFC cleanup which was only NFC if simplifyLoopIVs obeyed it's documentation.  I'm simplfy dropping that part of the change.

Commit message from try 3:

Recommitting after fixing a bug found post commit. Amusingly, try 1 had been correct, and by reverting to incorporate last minute review feedback, I introduce the bug. Oops. :)

Original commit message:

The problem was that recursively deleting an instruction can delete instructions beyond the current iterator (via a dead phi), thus invalidating iteration. Test case added in LoopUnroll/dce.ll to cover this case.

LoopUnroll does a limited DCE pass after unrolling, but if you have a chain of dead instructions, it only deletes the last one. Improve the code to recursively delete all trivially dead instructions.

Differential Revision: https://reviews.llvm.org/D102511
This commit is contained in:
Philip Reames 2021-05-19 09:50:21 -07:00
parent 76b8754d1b
commit 449d14ebd2
11 changed files with 77 additions and 63 deletions

View File

@ -220,26 +220,24 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
// At this point, the code is well formed. Perform constprop, instsimplify,
// and dce.
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(Inst, V))
Inst->replaceAllUsesWith(V);
if (isInstructionTriviallyDead(Inst))
BB->getInstList().erase(Inst);
DeadInsts.emplace_back(Inst);
}
// We can't do recursive deletion until we're done iterating, as we might
// have a phi which (potentially indirectly) uses instructions later in
// the block we're iterating through.
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
}
// TODO: after peeling or unrolling, previously loop variant conditions are
// likely to fold to constants, eagerly propagating those here will require
// fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
// appropriate.
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling

View File

@ -11,12 +11,13 @@
; }
;
; This test is meant to check that this loop is unrolled into four iterations.
; Note that the load on the last iteration is dead and thus doesn't appear in
; the output.
; UNROLL-LABEL: @test
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL: load i32, i32*
; UNROLL-NOT: load i32, i32*
; NOUNROLL-LABEL: @test
; NOUNROLL: load i32, i32*

View File

@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-unroll -S < %s | FileCheck %s
; Can't recursively delete %c.addr.07 without deleting %conv1
; and thus invalidating iteration.
define void @PR50368(i32 %c, i64 %x) {
; CHECK-LABEL: @PR50368(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]]
; CHECK: loop.peel.begin:
; CHECK-NEXT: br label [[LOOP_PEEL:%.*]]
; CHECK: loop.peel:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[LOOP_PEEL_NEXT:%.*]]
; CHECK: loop.peel.next:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]]
; CHECK: loop.peel.next1:
; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
; CHECK: entry.peel.newph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%0 = phi i64 [ 0, %loop ], [ %x, %entry ]
%c.addr.07 = phi i32 [ %conv1, %loop ], [ %c, %entry ]
%conv1 = trunc i64 undef to i32
br i1 false, label %exit, label %loop
exit:
ret void
}
define void @dead_chain(i64 %a) {
; CHECK-LABEL: @dead_chain(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%conv1 = trunc i64 %a to i32
%and = and i32 %conv1, 15
%shl = shl i32 %and, 15
br i1 true, label %exit, label %loop
exit:
ret void
}

View File

@ -34,25 +34,7 @@ define i32 @test2(i8 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[AND:%.*]] = and i32 [[ZEXT]], 31
; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[ZEXT_1]], 31
; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[ZEXT_2]], 31
; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[ZEXT_3]], 31
; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[ZEXT_4]], 31
; CHECK-NEXT: [[ZEXT_5:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[ZEXT_5]], 31
; CHECK-NEXT: [[ZEXT_6:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[ZEXT_6]], 31
; CHECK-NEXT: [[ZEXT_7:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[ZEXT_7]], 31
; CHECK-NEXT: [[ZEXT_8:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[AND_8:%.*]] = and i32 [[ZEXT_8]], 31
; CHECK-NEXT: [[ZEXT_9:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_9:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[AND_9:%.*]] = and i32 [[ZEXT_9]], 31
; CHECK-NEXT: [[SHL_9:%.*]] = shl i32 [[AND_9]], 15
; CHECK-NEXT: ret i32 [[SHL_9]]
@ -79,16 +61,7 @@ define i32 @test3(i8 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_5:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_6:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_7:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_8:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_9:%.*]] = zext i8 [[A]] to i32
; CHECK-NEXT: [[ZEXT_9:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[DIV_9:%.*]] = udiv i32 [[ZEXT_9]], 31
; CHECK-NEXT: ret i32 [[DIV_9]]
;

View File

@ -40,7 +40,6 @@ define void @test1(i32* noalias %A) {
; CHECK: for.body.3:
; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.for.body_crit_edge.3:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
; CHECK-NEXT: unreachable
;
entry:
@ -124,7 +123,7 @@ define void @test2(i32* noalias %A) {
; CHECK: for.body.for.body_crit_edge.3:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]]
; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4
; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop !0
; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
;
entry:
br i1 true, label %for.preheader, label %for.end
@ -202,7 +201,7 @@ define void @test3(i32* noalias %A, i1 %cond) {
; CHECK: for.body.for.body_crit_edge.3:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]]
; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4
; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop !2
; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
;
entry:
%0 = load i32, i32* %A, align 4

View File

@ -12,7 +12,6 @@ define i32 @test(i32 %a, i32 %b, i32 %c) optsize {
; CHECK-NEXT: store i32 [[B:%.*]], i32* [[ARRAYINIT_ELEMENT]], align 4
; CHECK-NEXT: [[ARRAYINIT_ELEMENT1:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[REF_TMP]], i64 0, i64 2
; CHECK-NEXT: store i32 [[C:%.*]], i32* [[ARRAYINIT_ELEMENT1]], align 4
; CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[REF_TMP]], i64 0, i64 3
; CHECK-NEXT: [[CMP_I_I_I3:%.*]] = icmp slt i32 [[A]], [[B]]
; CHECK-NEXT: [[SPEC_SELECT_I_I4:%.*]] = select i1 [[CMP_I_I_I3]], i32* [[ARRAYINIT_ELEMENT]], i32* [[ARRAYINIT_BEGIN]]
; CHECK-NEXT: [[INCDEC_PTR_I_I5:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[REF_TMP]], i64 0, i64 2
@ -22,7 +21,6 @@ define i32 @test(i32 %a, i32 %b, i32 %c) optsize {
; CHECK-NEXT: [[DOTPRE2:%.*]] = load i32, i32* [[INCDEC_PTR_I_I5]], align 4
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp slt i32 [[DOTPRE]], [[DOTPRE2]]
; CHECK-NEXT: [[SPEC_SELECT_I_I:%.*]] = select i1 [[CMP_I_I_I]], i32* [[INCDEC_PTR_I_I5]], i32* [[SPEC_SELECT_I_I4]]
; CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_I_I5]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[SPEC_SELECT_I_I]], align 4
; CHECK-NEXT: ret i32 [[TMP1]]
;

View File

@ -163,7 +163,6 @@ define void @test1() {
; PEEL8-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]]
; PEEL8-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; PEEL8-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX_7]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; PEEL8-NEXT: br label [[FOR_EXIT]]
; PEEL8: for.exit:
; PEEL8-NEXT: ret void
@ -206,7 +205,7 @@ define void @test1() {
; PEEL2UNROLL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4
; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; PEEL2UNROLL2-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8
; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop !0
; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL2UNROLL2: for.exit.loopexit:
; PEEL2UNROLL2-NEXT: br label [[FOR_EXIT]]
; PEEL2UNROLL2: for.exit:

View File

@ -10,7 +10,6 @@
define i32 @sansCanonical(i32* %base) nounwind {
; CHECK-LABEL: @sansCanonical(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 0 to i64
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[ADR:%.*]] = getelementptr inbounds i32, i32* [[BASE:%.*]], i64 9
@ -39,7 +38,6 @@ define i32 @sansCanonical(i32* %base) nounwind {
; CHECK-NEXT: [[ADR_8:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 1
; CHECK-NEXT: [[TMP_8:%.*]] = load i32, i32* [[ADR_8]], align 8
; CHECK-NEXT: [[SUM_NEXT_8:%.*]] = add i32 [[SUM_NEXT_7]], [[TMP_8]]
; CHECK-NEXT: [[TMP_9:%.*]] = load i32, i32* [[BASE]], align 8
; CHECK-NEXT: ret i32 [[SUM_NEXT_8]]
;
entry:

View File

@ -10,23 +10,18 @@ define i16 @full_unroll(i16* %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: br label [[FOR_COND_CLEANUP3:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[DOTLCSSA10_LCSSA:%.*]] = phi i16 [ [[TMP2_2:%.*]], [[FOR_COND_CLEANUP3_2:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @func(i16 [[DOTLCSSA10_LCSSA]])
; CHECK-NEXT: ret i16 0
; CHECK: for.cond.cleanup3:
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1
; CHECK-NEXT: [[TMP2_1:%.*]] = load i16, i16* [[PTR_1]], align 2
; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_1:%.*]]
; CHECK: for.cond.cleanup3.1:
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 2
; CHECK-NEXT: [[TMP2_2]] = load i16, i16* [[PTR_2]], align 2
; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_2]]
; CHECK: for.cond.cleanup3.2:
; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3
; CHECK-NEXT: [[TMP2_3:%.*]] = load i16, i16* [[PTR_3]], align 2
; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP3_3:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup3.3:
; CHECK-NEXT: unreachable
@ -59,8 +54,6 @@ define i16 @partial_unroll(i16* %A) {
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC9_2:%.*]], [[FOR_COND_CLEANUP3_2:%.*]] ]
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[PTR]], align 2
; CHECK-NEXT: br label [[FOR_COND_CLEANUP3:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[DOTLCSSA10_LCSSA:%.*]] = phi i16 [ [[TMP2_1:%.*]], [[FOR_COND_CLEANUP3_1:%.*]] ]
@ -68,13 +61,11 @@ define i16 @partial_unroll(i16* %A) {
; CHECK-NEXT: ret i16 0
; CHECK: for.cond.cleanup3:
; CHECK-NEXT: [[INC9:%.*]] = add nuw nsw i64 [[I_0]], 1
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INC9]]
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[INC9]]
; CHECK-NEXT: [[TMP2_1]] = load i16, i16* [[PTR_1]], align 2
; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_1]]
; CHECK: for.cond.cleanup3.1:
; CHECK-NEXT: [[INC9_1:%.*]] = add nuw nsw i64 [[INC9]], 1
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INC9_1]]
; CHECK-NEXT: [[TMP2_2:%.*]] = load i16, i16* [[PTR_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i64 [[INC9_1]], 200
; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_COND_CLEANUP3_2]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup3.2:

View File

@ -68,7 +68,6 @@ define double @test_with_lcssa(double %arg1, double* %arg2) {
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi double [ [[RES_1]], [[LOOP_LATCH]] ]
; CHECK-NEXT: ret double [[RES_LCSSA]]
; CHECK: loop.latch.1:
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds double, double* [[ARG2]], i64 2
; CHECK-NEXT: unreachable
;

View File

@ -141,8 +141,6 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]]
; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa !0
; CHECK-NEXT: [[ADD8_EPIL_2:%.*]] = add nuw i32 [[ADD8_EPIL_1]], 1
; CHECK-NEXT: [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
entry: