[LoopUnroll] Use smallest exact trip count from any exit

This is a more general alternative/extension to D102635. Rather than
handling the special case of "header exit with non-exiting latch",
this unrolls against the smallest exact trip count from any exit.
The latch exit is no longer treated as priviledged when it comes to
full unrolling.

The motivating case is in full-unroll-one-unpredictable-exit.ll.
Here the header exit is an IV-based exit, while the latch exit is
a data comparison. This kind of loop does not get rotated, because
the latch is already exiting, and loop rotation doesn't try to
distinguish IV-based/analyzable latches.

Differential Revision: https://reviews.llvm.org/D102982
This commit is contained in:
Nikita Popov 2021-06-19 09:44:28 +02:00
parent 558ee5843f
commit 1ae266f452
8 changed files with 299 additions and 151 deletions

View File

@ -1120,18 +1120,29 @@ static LoopUnrollResult tryToUnrollLoop(
return LoopUnrollResult::Unmodified;
}
// Find trip count and trip multiple if count is not available
// Find the smallest exact trip count for any exit. This is an upper bound
// on the loop trip count, but an exit at an earlier iteration is still
// possible. An unroll by the smallest exact trip count guarantees that all
// brnaches relating to at least one exit can be eliminated. This is unlike
// the max trip count, which only guarantees that the backedge can be broken.
unsigned TripCount = 0;
unsigned TripMultiple = 1;
// If there are multiple exiting blocks but one of them is the latch, use the
// latch for the trip count estimation. Otherwise insist on a single exiting
// block for the trip count estimation.
BasicBlock *ExitingBlock = L->getLoopLatch();
if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
ExitingBlock = L->getExitingBlock();
if (ExitingBlock) {
TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *ExitingBlock : ExitingBlocks)
if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
if (!TripCount || TC < TripCount)
TripCount = TripMultiple = TC;
if (!TripCount) {
// If no exact trip count is known, determine the trip multiple of either
// the loop latch or the single exiting block.
// TODO: Relax for multiple exits.
BasicBlock *ExitingBlock = L->getLoopLatch();
if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
ExitingBlock = L->getExitingBlock();
if (ExitingBlock)
TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
}
// If the loop contains a convergent operation, the prelude we'd add

View File

@ -87,20 +87,30 @@ define i1 @test_non_latch() {
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 2
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]]
; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 [[IV]]
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 [[IV]]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ], [ false, [[LATCH_2:%.*]] ]
; CHECK-NEXT: ret i1 [[EXIT_VAL]]
; CHECK: loop.1:
; CHECK-NEXT: br label [[LATCH_1]]
; CHECK: latch.1:
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]]
; CHECK: loop.2:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LATCH_2]]
; CHECK: latch.2:
; CHECK-NEXT: br label [[EXIT]]
;
start:
%a1 = alloca [2 x i64], align 8

View File

@ -72,24 +72,96 @@ exit:
ret void
}
; TODO: We should fully unroll this by 10, leave the unrolled latch
; Fully unroll this loop by 10, but leave the unrolled latch
; tests since we don't know if %N < 10, and break the backedge.
define void @test2(i64 %N) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i64 [[IV]], 10
; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT:%.*]]
; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i64 [[IV]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT]]
; CHECK-NEXT: br i1 true, label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: loop.1:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ule i64 1, [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
; CHECK: loop.2:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_2:%.*]]
; CHECK: latch.2:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ule i64 2, [[N]]
; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3:%.*]], label [[EXIT]]
; CHECK: loop.3:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ule i64 3, [[N]]
; CHECK-NEXT: br i1 [[CMP2_3]], label [[LOOP_4:%.*]], label [[EXIT]]
; CHECK: loop.4:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_4:%.*]]
; CHECK: latch.4:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_4:%.*]] = icmp ule i64 4, [[N]]
; CHECK-NEXT: br i1 [[CMP2_4]], label [[LOOP_5:%.*]], label [[EXIT]]
; CHECK: loop.5:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_5:%.*]]
; CHECK: latch.5:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_5:%.*]] = icmp ule i64 5, [[N]]
; CHECK-NEXT: br i1 [[CMP2_5]], label [[LOOP_6:%.*]], label [[EXIT]]
; CHECK: loop.6:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_6:%.*]]
; CHECK: latch.6:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_6:%.*]] = icmp ule i64 6, [[N]]
; CHECK-NEXT: br i1 [[CMP2_6]], label [[LOOP_7:%.*]], label [[EXIT]]
; CHECK: loop.7:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_7:%.*]]
; CHECK: latch.7:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_7:%.*]] = icmp ule i64 7, [[N]]
; CHECK-NEXT: br i1 [[CMP2_7]], label [[LOOP_8:%.*]], label [[EXIT]]
; CHECK: loop.8:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_8:%.*]]
; CHECK: latch.8:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_8:%.*]] = icmp ule i64 8, [[N]]
; CHECK-NEXT: br i1 [[CMP2_8]], label [[LOOP_9:%.*]], label [[EXIT]]
; CHECK: loop.9:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_9:%.*]]
; CHECK: latch.9:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_9:%.*]] = icmp ule i64 9, [[N]]
; CHECK-NEXT: br i1 [[CMP2_9]], label [[LOOP_10:%.*]], label [[EXIT]]
; CHECK: loop.10:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[LATCH_10:%.*]]
; CHECK: latch.10:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[CMP2_10:%.*]] = icmp ule i64 10, [[N]]
; CHECK-NEXT: br i1 [[CMP2_10]], label [[LOOP_11:%.*]], label [[EXIT]]
; CHECK: loop.11:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br i1 false, label [[LATCH_11:%.*]], label [[EXIT]]
; CHECK: latch.11:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[EXIT]]
;
entry:
br label %loop

View File

@ -10,20 +10,60 @@ define i1 @test(i64* %a1, i64* %a2) {
; CHECK-NEXT: start:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 24
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT_4:%.*]], [[LATCH_4:%.*]] ]
; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A1:%.*]], i64 [[IV]]
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A2:%.*]], i64 [[IV]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ false, [[LATCH_2:%.*]] ], [ false, [[LATCH_3:%.*]] ], [ true, [[LOOP_4:%.*]] ], [ false, [[LATCH_4]] ]
; CHECK-NEXT: ret i1 [[EXIT_VAL]]
; CHECK: loop.1:
; CHECK-NEXT: br label [[LATCH_1]]
; CHECK: latch.1:
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 1
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
; CHECK: loop.2:
; CHECK-NEXT: br label [[LATCH_2]]
; CHECK: latch.2:
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 1
; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_1]]
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_1]]
; CHECK-NEXT: [[LOAD1_2:%.*]] = load i64, i64* [[GEP1_2]], align 8
; CHECK-NEXT: [[LOAD2_2:%.*]] = load i64, i64* [[GEP2_2]], align 8
; CHECK-NEXT: [[EXITCOND2_2:%.*]] = icmp eq i64 [[LOAD1_2]], [[LOAD2_2]]
; CHECK-NEXT: br i1 [[EXITCOND2_2]], label [[LOOP_3:%.*]], label [[EXIT]]
; CHECK: loop.3:
; CHECK-NEXT: br label [[LATCH_3]]
; CHECK: latch.3:
; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_NEXT_2]], 1
; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_2]]
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_2]]
; CHECK-NEXT: [[LOAD1_3:%.*]] = load i64, i64* [[GEP1_3]], align 8
; CHECK-NEXT: [[LOAD2_3:%.*]] = load i64, i64* [[GEP2_3]], align 8
; CHECK-NEXT: [[EXITCOND2_3:%.*]] = icmp eq i64 [[LOAD1_3]], [[LOAD2_3]]
; CHECK-NEXT: br i1 [[EXITCOND2_3]], label [[LOOP_4]], label [[EXIT]]
; CHECK: loop.4:
; CHECK-NEXT: [[EXITCOND_4:%.*]] = icmp eq i64 [[IV_NEXT_3]], 24
; CHECK-NEXT: br i1 [[EXITCOND_4]], label [[EXIT]], label [[LATCH_4]]
; CHECK: latch.4:
; CHECK-NEXT: [[IV_NEXT_4]] = add nuw nsw i64 [[IV_NEXT_3]], 1
; CHECK-NEXT: [[GEP1_4:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_3]]
; CHECK-NEXT: [[GEP2_4:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_3]]
; CHECK-NEXT: [[LOAD1_4:%.*]] = load i64, i64* [[GEP1_4]], align 8
; CHECK-NEXT: [[LOAD2_4:%.*]] = load i64, i64* [[GEP2_4]], align 8
; CHECK-NEXT: [[EXITCOND2_4:%.*]] = icmp eq i64 [[LOAD1_4]], [[LOAD2_4]]
; CHECK-NEXT: br i1 [[EXITCOND2_4]], label [[LOOP]], label [[EXIT]]
;
start:
br label %loop

View File

@ -10,80 +10,19 @@ define void @test(i32 %s, i32 %n) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N2:%.*]] = add i32 [[S:%.*]], 123
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[S]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], [[S]]
; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP1]], 7
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]]
; CHECK: loop.prol.preheader:
; CHECK-NEXT: br label [[LOOP_PROL:%.*]]
; CHECK: loop.prol:
; CHECK-NEXT: [[I_PROL:%.*]] = phi i32 [ [[S]], [[LOOP_PROL_PREHEADER]] ], [ [[I_INC_PROL:%.*]], [[LATCH_PROL:%.*]] ]
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ]
; CHECK-NEXT: [[C1_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N2]]
; CHECK-NEXT: br i1 [[C1_PROL]], label [[EXIT1_LOOPEXIT1:%.*]], label [[LATCH_PROL]]
; CHECK: latch.prol:
; CHECK-NEXT: [[C2_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N]]
; CHECK-NEXT: [[I_INC_PROL]] = add i32 [[I_PROL]], 1
; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: loop.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[I_INC_PROL]], [[LATCH_PROL]] ]
; CHECK-NEXT: br label [[LOOP_PROL_LOOPEXIT]]
; CHECK: loop.prol.loopexit:
; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT2:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_UNR]], [[ENTRY_NEW]] ], [ [[I_INC_7:%.*]], [[LATCH_7:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[I]], [[N2]]
; CHECK-NEXT: br i1 [[C1]], label [[EXIT1_LOOPEXIT:%.*]], label [[LATCH:%.*]]
; CHECK-NEXT: br i1 [[C1]], label [[EXIT1:%.*]], label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1
; CHECK-NEXT: [[C1_1:%.*]] = icmp eq i32 [[I_INC]], [[N2]]
; CHECK-NEXT: br i1 [[C1_1]], label [[EXIT1_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: exit1.loopexit:
; CHECK-NEXT: br label [[EXIT1:%.*]]
; CHECK: exit1.loopexit1:
; CHECK-NEXT: br label [[EXIT1]]
; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[I]], [[N:%.*]]
; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
; CHECK-NEXT: br i1 [[C2]], label [[EXIT2:%.*]], label [[LOOP]]
; CHECK: exit1:
; CHECK-NEXT: ret void
; CHECK: exit2.unr-lcssa:
; CHECK-NEXT: br label [[EXIT2]]
; CHECK: exit2:
; CHECK-NEXT: ret void
; CHECK: latch.1:
; CHECK-NEXT: [[I_INC_1:%.*]] = add i32 [[I_INC]], 1
; CHECK-NEXT: [[C1_2:%.*]] = icmp eq i32 [[I_INC_1]], [[N2]]
; CHECK-NEXT: br i1 [[C1_2]], label [[EXIT1_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
; CHECK-NEXT: [[I_INC_2:%.*]] = add i32 [[I_INC_1]], 1
; CHECK-NEXT: [[C1_3:%.*]] = icmp eq i32 [[I_INC_2]], [[N2]]
; CHECK-NEXT: br i1 [[C1_3]], label [[EXIT1_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: [[I_INC_3:%.*]] = add i32 [[I_INC_2]], 1
; CHECK-NEXT: [[C1_4:%.*]] = icmp eq i32 [[I_INC_3]], [[N2]]
; CHECK-NEXT: br i1 [[C1_4]], label [[EXIT1_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
; CHECK-NEXT: [[I_INC_4:%.*]] = add i32 [[I_INC_3]], 1
; CHECK-NEXT: [[C1_5:%.*]] = icmp eq i32 [[I_INC_4]], [[N2]]
; CHECK-NEXT: br i1 [[C1_5]], label [[EXIT1_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
; CHECK-NEXT: [[I_INC_5:%.*]] = add i32 [[I_INC_4]], 1
; CHECK-NEXT: [[C1_6:%.*]] = icmp eq i32 [[I_INC_5]], [[N2]]
; CHECK-NEXT: br i1 [[C1_6]], label [[EXIT1_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
; CHECK-NEXT: [[I_INC_6:%.*]] = add i32 [[I_INC_5]], 1
; CHECK-NEXT: [[C1_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N2]]
; CHECK-NEXT: br i1 [[C1_7]], label [[EXIT1_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
; CHECK-NEXT: [[C2_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N]]
; CHECK-NEXT: [[I_INC_7]] = add i32 [[I_INC_6]], 1
; CHECK-NEXT: br i1 [[C2_7]], label [[EXIT2_UNR_LCSSA:%.*]], label [[LOOP]]
;
entry:
%n2 = add i32 %s, 123

View File

@ -60,30 +60,46 @@ exit:
; SCEV unrolling properly handles loops with multiple exits. In this
; case, the computed trip count based on a canonical IV is *not* for a
; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
; unrolling does not.
; latch block.
define i64 @earlyLoopTest(i64* %base) nounwind {
; CHECK-LABEL: @earlyLoopTest(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ]
; CHECK-NEXT: [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ]
; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]]
; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 4
; CHECK-NEXT: [[S_NEXT]] = add i64 [[S]], [[VAL]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[INC]], 4
; CHECK-NEXT: br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]]
; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4
; CHECK-NEXT: br label [[TAIL:%.*]]
; CHECK: tail:
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]]
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]]
; CHECK: exit1:
; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ]
; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ]
; CHECK-NEXT: ret i64 [[S_LCSSA]]
; CHECK: exit2:
; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ]
; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1:%.*]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ], [ [[S_NEXT_3:%.*]], [[TAIL_3:%.*]] ]
; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]]
; CHECK: loop.1:
; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1
; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4
; CHECK-NEXT: [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]]
; CHECK-NEXT: br label [[TAIL_1]]
; CHECK: tail.1:
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0
; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]]
; CHECK: loop.2:
; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2
; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4
; CHECK-NEXT: [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]]
; CHECK-NEXT: br label [[TAIL_2]]
; CHECK: tail.2:
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0
; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]]
; CHECK: loop.3:
; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3
; CHECK-NEXT: [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4
; CHECK-NEXT: [[S_NEXT_3]] = add i64 [[S_NEXT_2]], [[VAL_3]]
; CHECK-NEXT: br i1 false, label [[TAIL_3]], label [[EXIT1:%.*]]
; CHECK: tail.3:
; CHECK-NEXT: br label [[EXIT2]]
;
entry:
br label %loop
@ -115,18 +131,12 @@ define i32 @multiExit(i32* %base) nounwind {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[L1:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ]
; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[IV1]], 1
; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
; CHECK-NEXT: br i1 false, label [[L2]], label [[EXIT1:%.*]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4
; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]]
; CHECK: l2:
; CHECK-NEXT: br i1 true, label [[L1]], label [[EXIT2:%.*]]
; CHECK-NEXT: ret i32 [[VAL]]
; CHECK: exit1:
; CHECK-NEXT: ret i32 1
; CHECK: exit2:
; CHECK-NEXT: [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ]
; CHECK-NEXT: ret i32 [[VAL_LCSSA1]]
;
entry:
br label %l1
@ -149,32 +159,69 @@ exit2:
}
; SCEV should not unroll a multi-exit loops unless the latch block has
; a known trip count, regardless of the early exit trip counts. The
; LoopUnroll utility uses this assumption to optimize the latch
; block's branch.
; SCEV can unroll a multi-exit loops even if the latch block has no
; known trip count, but an early exit has a known trip count. In this
; case we must be careful not to optimize the latch branch away.
define i32 @multiExitIncomplete(i32* %base) nounwind {
; CHECK-LABEL: @multiExitIncomplete(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[L1:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ]
; CHECK-NEXT: [[INC1]] = add nuw i32 [[IV1]], 1
; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5
; CHECK-NEXT: br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4
; CHECK-NEXT: br label [[L2:%.*]]
; CHECK: l2:
; CHECK-NEXT: br i1 true, label [[L3]], label [[EXIT2:%.*]]
; CHECK-NEXT: br label [[L3:%.*]]
; CHECK: l3:
; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0
; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]]
; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]]
; CHECK: exit1:
; CHECK-NEXT: ret i32 1
; CHECK: exit2:
; CHECK-NEXT: ret i32 2
; CHECK: exit3:
; CHECK-NEXT: ret i32 3
; CHECK: l1.1:
; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1
; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4
; CHECK-NEXT: br label [[L2_1:%.*]]
; CHECK: l2.1:
; CHECK-NEXT: br label [[L3_1:%.*]]
; CHECK: l3.1:
; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0
; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]]
; CHECK: l1.2:
; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2
; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4
; CHECK-NEXT: br label [[L2_2:%.*]]
; CHECK: l2.2:
; CHECK-NEXT: br label [[L3_2:%.*]]
; CHECK: l3.2:
; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0
; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]]
; CHECK: l1.3:
; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3
; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4
; CHECK-NEXT: br label [[L2_3:%.*]]
; CHECK: l2.3:
; CHECK-NEXT: br label [[L3_3:%.*]]
; CHECK: l3.3:
; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0
; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]]
; CHECK: l1.4:
; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4
; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4
; CHECK-NEXT: br label [[L2_4:%.*]]
; CHECK: l2.4:
; CHECK-NEXT: br label [[L3_4:%.*]]
; CHECK: l3.4:
; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0
; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]]
; CHECK: l1.5:
; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]]
; CHECK: l2.5:
; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]]
; CHECK: l3.5:
; CHECK-NEXT: br label [[EXIT3]]
;
entry:
br label %l1
@ -263,17 +310,18 @@ define void @nsw_latch(i32* %a) nounwind {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0
; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[B_03]], 8
; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY]]
; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]]
; CHECK: return:
; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ]
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ]
; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4
; CHECK-NEXT: ret void
; CHECK: for.body.1:
; CHECK-NEXT: br i1 false, label [[FOR_COND_1]], label [[RETURN]]
; CHECK: for.cond.1:
; CHECK-NEXT: br label [[RETURN]]
;
entry:
br label %for.body

View File

@ -483,7 +483,7 @@ define void @unloopCriticalEdge() nounwind {
; CHECK: for.cond.i:
; CHECK-NEXT: br label [[FOR_COND_I]]
; CHECK: Proc2.exit:
; CHECK-NEXT: br label [[FOR_COND31]]
; CHECK-NEXT: unreachable
; CHECK: for.end94:
; CHECK-NEXT: ret void
;

View File

@ -8,25 +8,53 @@ define i16 @full_unroll_multiple_exiting_blocks(i16* %A, i16 %x, i16 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 123, [[ENTRY:%.*]] ], [ [[RES_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC9:%.*]], [[LATCH]] ]
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]]
; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[PTR]], align 2
; CHECK-NEXT: [[RES_NEXT]] = add i16 [[RES]], [[LV]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_0]], 3
; CHECK-NEXT: br i1 [[CMP]], label [[EXITING_1:%.*]], label [[EXIT:%.*]]
; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 2
; CHECK-NEXT: [[RES_NEXT:%.*]] = add i16 123, [[LV]]
; CHECK-NEXT: br label [[EXITING_1:%.*]]
; CHECK: exiting.1:
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i16 [[LV]], [[X:%.*]]
; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[EXITING_2:%.*]]
; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT:%.*]], label [[EXITING_2:%.*]]
; CHECK: exiting.2:
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i16 [[LV]], [[Y:%.*]]
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH]]
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[INC9]] = add i64 [[I_0]], 1
; CHECK-NEXT: br label [[HEADER]]
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1
; CHECK-NEXT: [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2
; CHECK-NEXT: [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]]
; CHECK-NEXT: br label [[EXITING_1_1:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ]
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ]
; CHECK-NEXT: ret i16 [[RES_LCSSA]]
; CHECK: exiting.1.1:
; CHECK-NEXT: [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]]
; CHECK-NEXT: br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]]
; CHECK: exiting.2.1:
; CHECK-NEXT: [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]]
; CHECK-NEXT: br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2
; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2
; CHECK-NEXT: [[RES_NEXT_2:%.*]] = add i16 [[RES_NEXT_1]], [[LV_2]]
; CHECK-NEXT: br label [[EXITING_1_2]]
; CHECK: exiting.1.2:
; CHECK-NEXT: [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]]
; CHECK-NEXT: br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]]
; CHECK: exiting.2.2:
; CHECK-NEXT: [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]]
; CHECK-NEXT: br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]]
; CHECK: latch.2:
; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3
; CHECK-NEXT: [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2
; CHECK-NEXT: [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]]
; CHECK-NEXT: br i1 false, label [[EXITING_1_3]], label [[EXIT]]
; CHECK: exiting.1.3:
; CHECK-NEXT: [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]]
; CHECK-NEXT: br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]]
; CHECK: exiting.2.3:
; CHECK-NEXT: [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]]
; CHECK-NEXT: br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: unreachable
;
entry:
br label %header