[LoopUnroll] Separate peeling from unrolling

Loop peeling is currently performed as part of UnrollLoop().
Outside test scenarios, it is always performed with an unroll
count of 1. This means that unrolling doesn't actually do anything
apart from performing post-unroll simplification.

When testing, it's currently possible to specify both an explicit
peel count and an explicit unroll count. This doesn't perform any
sensible operation and may result in miscompiles, see
https://bugs.llvm.org/show_bug.cgi?id=45939.

This patch moves peeling from UnrollLoop() into tryToUnrollLoop(),
so that peeling does not also perform a susequent unroll. We only
run the post-unroll simplifications. Specifying both an explicit
peel count and unroll count is forbidden.

In the future, we may want to support both (non-PGO) peeling a
loop and unrolling it, but this needs to be done by first performing
the peel and then recalculating unrolling heuristics on a now
possibly analyzable loop.

Differential Revision: https://reviews.llvm.org/D103362
This commit is contained in:
Nikita Popov 2021-05-29 18:33:31 +02:00
parent cf29a92b90
commit db45746821
8 changed files with 81 additions and 192 deletions

View File

@ -71,7 +71,6 @@ struct UnrollLoopOptions {
bool AllowRuntime;
bool AllowExpensiveTripCount;
unsigned TripMultiple;
unsigned PeelCount;
bool UnrollRemainder;
bool ForgetAllSCEV;
};

View File

@ -771,6 +771,18 @@ bool llvm::computeUnrollCount(
UnrollCostEstimator UCE(*L, LoopSize);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
if (UnrollCount.getNumOccurrences() > 0) {
report_fatal_error("Cannot specify both explicit peel count and "
"explicit unroll count");
}
UP.Count = 1;
UP.Runtime = false;
return true;
}
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@ -1158,6 +1170,28 @@ static LoopUnrollResult tryToUnrollLoop(
if (TripCount && UP.Count > TripCount)
UP.Count = TripCount;
if (PP.PeelCount) {
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
<< " with iteration count " << PP.PeelCount << "!\n");
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
L->getHeader())
<< " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
<< " iterations";
});
if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (PP.PeelProfiledIterations)
L->setLoopAlreadyUnrolled();
return LoopUnrollResult::PartiallyUnrolled;
}
return LoopUnrollResult::Unmodified;
}
// Save loop properties before it is transformed.
MDNode *OrigLoopID = L->getLoopID();
@ -1166,7 +1200,7 @@ static LoopUnrollResult tryToUnrollLoop(
LoopUnrollResult UnrollResult = UnrollLoop(
L,
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
TripMultiple, PP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV},
TripMultiple, UP.UnrollRemainder, ForgetAllSCEV},
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
@ -1194,10 +1228,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If loop has an unroll count pragma or unrolled by explicitly set count
// mark loop as unrolled to prevent unrolling beyond that requested.
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
(IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
L->setLoopAlreadyUnrolled();
return UnrollResult;

View File

@ -59,7 +59,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@ -259,9 +258,6 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
/// AllowExpensiveTripCount is false.
///
/// If we want to perform PGO-based loop peeling, PeelCount is set to the
/// number of iterations we want to peel off.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
@ -311,7 +307,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
ULO.Count = ULO.TripCount;
// Don't enter the unroll code if there is nothing to do.
if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
if (ULO.TripCount == 0 && ULO.Count < 2) {
LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
@ -320,25 +316,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(ULO.TripMultiple > 0);
assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
bool Peeled = false;
if (ULO.PeelCount) {
Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Successful peeling may result in a change in the loop preheader/trip
// counts. If we later unroll the loop, we want these to be updated.
if (Peeled) {
// According to our guards and profitability checks the only
// meaningful exit should be latch block. Other exits go to deopt,
// so we do not worry about them.
BasicBlock *ExitingBlock = L->getLoopLatch();
assert(ExitingBlock && "Loop without exiting block?");
assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
}
// Are we eliminating the loop control altogether? Note that we can know
// we're eliminating the backedge without knowing exactly which iteration
// of the unrolled body exits.
@ -350,10 +327,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
bool RuntimeTripCount =
(ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
assert((!RuntimeTripCount || !ULO.PeelCount) &&
"Did not expect runtime trip-count unrolling "
"and peeling for the same loop");
// All these values should be taken only after peeling because they might have
// changed.
BasicBlock *Preheader = L->getLoopPreheader();
@ -396,9 +369,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
else if (BasicBlock *ExitingBlock = L->getExitingBlock())
ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
// If the peeling guard is changed this assert may be relaxed or even
// deleted.
assert(!Peeled && "Peeling guard changed!");
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
@ -473,16 +443,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
<< "completely unrolled loop with "
<< NV("UnrollCount", ULO.TripCount) << " iterations";
});
} else if (ULO.PeelCount) {
LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
<< " with iteration count " << ULO.PeelCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
L->getHeader())
<< " peeled loop by " << NV("PeelCount", ULO.PeelCount)
<< " iterations";
});
} else {
auto DiagBuilder = [&]() {
OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
@ -835,8 +795,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
SE, DT, AC, TTI);
simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
TTI);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;

View File

@ -987,7 +987,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
{/*Count*/ Count - 1, /*TripCount*/ Count - 1,
/*Force*/ false, /*AllowRuntime*/ false,
/*AllowExpensiveTripCount*/ false, /*TripMultiple*/ 1,
/*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
/*UnrollRemainder*/ false, ForgetAllSCEV},
LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
}

View File

@ -0,0 +1,22 @@
; RUN: not --crash opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s
; CHECK: LLVM ERROR: Cannot specify both explicit peel count and explicit unroll count
@a = global [8 x i32] zeroinitializer, align 16
define void @test1() {
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.body, label %for.exit
for.exit: ; preds = %for.body
ret void
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -loop-unroll -unroll-count=4 -unroll-peel-count=1 < %s | FileCheck %s
; RUN: opt -S -loop-unroll -unroll-peel-count=1 < %s | FileCheck %s
declare zeroext i8 @patatino()
@ -7,6 +7,8 @@ define fastcc void @tinky() {
; CHECK-LABEL: @tinky(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[NEXT:%.*]]
; CHECK: loopexit.loopexit:
; CHECK-NEXT: br label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: ret void
; CHECK: next:
@ -15,7 +17,7 @@ define fastcc void @tinky() {
; CHECK-NEXT: br label [[LOOP_PEEL:%.*]]
; CHECK: loop.peel:
; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT:%.*]]
; CHECK-NEXT: br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT]]
; CHECK: loop.peel.next:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]]
; CHECK: loop.peel.next1:
@ -24,7 +26,7 @@ define fastcc void @tinky() {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: br label [[LOOPEXIT]]
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
;
entry:
br label %next
@ -66,19 +68,11 @@ define void @tinky2() {
; CHECK: next.peel.newph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT_3:%.*]], [[LOOP_2:%.*]] ]
; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: [[B_NEXT:%.*]] = add nuw nsw i32 [[B]], 1
; CHECK-NEXT: [[CALL593_1:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: [[B_NEXT_1:%.*]] = add nuw nsw i32 [[B_NEXT]], 1
; CHECK-NEXT: [[COND_1:%.*]] = icmp ne i32 [[B_NEXT]], 30
; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_2]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !0
; CHECK: loop.2:
; CHECK-NEXT: [[CALL593_2:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: [[B_NEXT_2:%.*]] = add nuw nsw i32 [[B_NEXT_1]], 1
; CHECK-NEXT: [[CALL593_3:%.*]] = tail call zeroext i8 @patatino()
; CHECK-NEXT: [[B_NEXT_3]] = add nuw nsw i32 [[B_NEXT_2]], 1
; CHECK-NEXT: br label [[LOOP]], !llvm.loop !2
; CHECK-NEXT: [[B_NEXT]] = add nuw nsw i32 [[B]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[B]], 30
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
;
entry:
br label %next

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-unroll -unroll-peel-count=2 -S %s | FileCheck --check-prefix=PEEL2 %s
; RUN: opt -loop-unroll -unroll-peel-count=8 -S %s | FileCheck --check-prefix=PEEL8 %s
; RUN: opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S %s | FileCheck --check-prefix=PEEL2UNROLL2 %s
; Test case for PR45939. Make sure unroll count is adjusted when loop is peeled and unrolled.
@ -36,47 +35,17 @@ define void @test1() {
; PEEL2: entry.peel.newph:
; PEEL2-NEXT: br label [[FOR_BODY:%.*]]
; PEEL2: for.body:
; PEEL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_6:%.*]] ]
; PEEL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; PEEL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
; PEEL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; PEEL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; PEEL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; PEEL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; PEEL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; PEEL2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]]
; PEEL2-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
; PEEL2-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX_2]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; PEEL2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]]
; PEEL2-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
; PEEL2-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX_3]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; PEEL2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]]
; PEEL2-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
; PEEL2-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX_4]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; PEEL2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]]
; PEEL2-NEXT: [[TMP7:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
; PEEL2-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX_5]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; PEEL2-NEXT: [[EXITCOND_5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_5]], 8
; PEEL2-NEXT: br i1 [[EXITCOND_5]], label [[FOR_BODY_6]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; PEEL2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 8
; PEEL2-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL2: for.exit.loopexit:
; PEEL2-NEXT: br label [[FOR_EXIT]]
; PEEL2: for.exit:
; PEEL2-NEXT: ret void
; PEEL2: for.body.6:
; PEEL2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]]
; PEEL2-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
; PEEL2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX_6]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; PEEL2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]]
; PEEL2-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; PEEL2-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX_7]], align 4
; PEEL2-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; PEEL2-NEXT: br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
;
; PEEL8-LABEL: @test1(
; PEEL8-NEXT: entry:
@ -160,102 +129,16 @@ define void @test1() {
; PEEL8: entry.peel.newph:
; PEEL8-NEXT: br label [[FOR_BODY:%.*]]
; PEEL8: for.body:
; PEEL8-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ]
; PEEL8-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; PEEL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
; PEEL8-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; PEEL8-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_1:%.*]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL8-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL8: for.exit.loopexit:
; PEEL8-NEXT: br label [[FOR_EXIT]]
; PEEL8: for.exit:
; PEEL8-NEXT: ret void
; PEEL8: for.body.1:
; PEEL8-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; PEEL8-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; PEEL8-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX_1]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_2:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.2:
; PEEL8-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]]
; PEEL8-NEXT: [[TMP10:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
; PEEL8-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX_2]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_3:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.3:
; PEEL8-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]]
; PEEL8-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
; PEEL8-NEXT: store i32 [[TMP11]], i32* [[ARRAYIDX_3]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_4:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.4:
; PEEL8-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]]
; PEEL8-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
; PEEL8-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX_4]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_5:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.5:
; PEEL8-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]]
; PEEL8-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
; PEEL8-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX_5]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_6:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.6:
; PEEL8-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]]
; PEEL8-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
; PEEL8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX_6]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY_7]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
; PEEL8: for.body.7:
; PEEL8-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]]
; PEEL8-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; PEEL8-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX_7]], align 4
; PEEL8-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; PEEL8-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
;
; PEEL2UNROLL2-LABEL: @test1(
; PEEL2UNROLL2-NEXT: entry:
; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; PEEL2UNROLL2: for.body.peel.begin:
; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; PEEL2UNROLL2: for.body.peel:
; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0
; PEEL2UNROLL2-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32
; PEEL2UNROLL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4
; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1
; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8
; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]]
; PEEL2UNROLL2: for.body.peel.next:
; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
; PEEL2UNROLL2: for.body.peel2:
; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]]
; PEEL2UNROLL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32
; PEEL2UNROLL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4
; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1
; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8
; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]]
; PEEL2UNROLL2: for.body.peel.next1:
; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
; PEEL2UNROLL2: for.body.peel.next6:
; PEEL2UNROLL2-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
; PEEL2UNROLL2: entry.peel.newph:
; PEEL2UNROLL2-NEXT: br label [[FOR_BODY:%.*]]
; PEEL2UNROLL2: for.body:
; PEEL2UNROLL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ]
; PEEL2UNROLL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
; PEEL2UNROLL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; PEEL2UNROLL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4
; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; PEEL2UNROLL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; PEEL2UNROLL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; PEEL2UNROLL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4
; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; PEEL2UNROLL2-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8
; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; PEEL2UNROLL2: for.exit.loopexit:
; PEEL2UNROLL2-NEXT: br label [[FOR_EXIT]]
; PEEL2UNROLL2: for.exit:
; PEEL2UNROLL2-NEXT: ret void
;
entry:
br label %for.body

View File

@ -39,13 +39,13 @@ define i32 @test() {
; CHECK-NEXT: [[TMP4]] = add nsw i32 [[TMP3]], [[TMP]]
; CHECK-NEXT: br label [[BB5:%.*]]
; CHECK: bb5:
; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]]
; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]]
; CHECK: bb7:
; CHECK-NEXT: br i1 undef, label [[BB10:%.*]], label [[BB10]]
; CHECK: bb10:
; CHECK-NEXT: br i1 false, label [[BB12]], label [[BB17_LOOPEXIT:%.*]]
; CHECK-NEXT: br i1 undef, label [[BB12]], label [[BB17_LOOPEXIT:%.*]]
; CHECK: bb12:
; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop !0
; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: bb13.loopexit:
; CHECK-NEXT: br label [[BB13]]
; CHECK: bb13: