Revert "[Loop Peeling] Add possibility to enable peeling on loop nests."

This reverts commit 3f3017e because there's a failure on peel-loop-nests.ll
with LLVM_ENABLE_EXPENSIVE_CHECKS on.

Differential Revision: https://reviews.llvm.org/D70304
This commit is contained in:
Arkady Shlykov 2020-01-16 08:35:00 -08:00
parent d51a15d86a
commit c87982b467
5 changed files with 93 additions and 194 deletions

View File

@ -490,8 +490,6 @@ public:
bool UpperBound;
/// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow peeling off loop iterations for loop nests.
bool AllowLoopNestsPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.

View File

@ -154,10 +154,6 @@ static cl::opt<bool>
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
static cl::opt<bool> UnrollAllowLoopNestsPeeling(
"unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
cl::desc("Allows loop nests to be peeled."));
static cl::opt<bool> UnrollUnrollRemainder(
"unroll-remainder", cl::Hidden,
cl::desc("Allow the loop remainder to be unrolled."));
@ -219,7 +215,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Force = false;
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;
UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
@ -260,8 +255,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;

View File

@ -289,10 +289,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!canPeel(L))
return;
// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
if (!UP.AllowLoopNestsPeeling && !L->empty())
// Only try to peel innermost loops.
if (!L->empty())
return;
// If the user provided a peel count, use that.

View File

@ -403,11 +403,76 @@ for.end:
ret void
}
; In this case we cannot peel the inner loop, because the condition involves
; the outer induction variable.
define void @test5(i32 %k) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @f1()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: call void @f2()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
; CHECK: outer.inc:
; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
for.body.lr.ph:
br label %outer.header
outer.header:
%j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
br label %for.body
for.body:
%i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
%cmp1 = icmp ult i32 %j, 2
br i1 %cmp1, label %if.then, label %if.else
if.then:
call void @f1()
br label %for.inc
if.else:
call void @f2()
br label %for.inc
for.inc:
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %outer.inc
outer.inc:
%j.inc = add nsw i32 %j, 1
%outer.cmp = icmp slt i32 %j.inc, %k
br i1 %outer.cmp, label %outer.header, label %for.end
for.end:
ret void
}
; In this test, the condition involves 2 AddRecs. Without evaluating both
; AddRecs, we cannot prove that the condition becomes known in the loop body
; after peeling.
define void @test5(i32 %k) {
; CHECK-LABEL: @test5(
define void @test6(i32 %k) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
@ -456,8 +521,8 @@ for.end:
ret void
}
define void @test6(i32 %k) {
; CHECK-LABEL: @test6(
define void @test7(i32 %k) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:
@ -550,8 +615,8 @@ for.end:
ret void
}
define void @test7(i32 %k) {
; CHECK-LABEL: @test7(
define void @test8(i32 %k) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:
@ -646,8 +711,8 @@ for.end:
; Comparison with non-monotonic predicate due to possible wrapping, loop
; body cannot be simplified.
define void @test8(i32 %k) {
; CHECK-LABEL: @test8(
define void @test9(i32 %k) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
@ -686,8 +751,8 @@ for.end:
}
; CHECK-NOT: llvm.loop.unroll.disable
define void @test_9__peel_first_iter_via_slt_pred(i32 %len) {
; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred(
define void @test_10__peel_first_iter_via_slt_pred(i32 %len) {
; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -753,8 +818,8 @@ if.end: ; preds = %if.then, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) {
; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred(
define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) {
; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -822,8 +887,8 @@ if.end: ; preds = %if.then, %for.body
; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.
define void @test11__peel_first_iter_via_eq_pred(i32 %len) {
; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred(
define void @test12__peel_first_iter_via_eq_pred(i32 %len) {
; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -891,8 +956,8 @@ if.end: ; preds = %if.then, %for.body
; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.
define void @test12__peel_first_iter_via_ne_pred(i32 %len) {
; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred(
define void @test13__peel_first_iter_via_ne_pred(i32 %len) {
; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -959,8 +1024,8 @@ if.end: ; preds = %if.then, %for.body
}
; No peeling is profitable here.
define void @test13__ivar_mod2_is_1(i32 %len) {
; CHECK-LABEL: @test13__ivar_mod2_is_1(
define void @test14__ivar_mod2_is_1(i32 %len) {
; CHECK-LABEL: @test14__ivar_mod2_is_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -1009,8 +1074,8 @@ if.end: ; preds = %if.then, %for.body
}
; No peeling is profitable here.
define void @test14__ivar_mod2_is_0(i32 %len) {
; CHECK-LABEL: @test14__ivar_mod2_is_0(
define void @test15__ivar_mod2_is_0(i32 %len) {
; CHECK-LABEL: @test15__ivar_mod2_is_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@ -1058,10 +1123,10 @@ if.end: ; preds = %if.then, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; Similar to @test6, we need to peel one extra iteration, and we can't do that
; Similar to @test7, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
define void @test15(i32 %k) {
; CHECK-LABEL: @test15(
define void @test16(i32 %k) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
@ -1099,10 +1164,10 @@ for.end:
ret void
}
; Similar to @test7, we need to peel one extra iteration, and we can't do that
; Similar to @test8, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
define void @test16(i32 %k) {
; CHECK-LABEL: @test16(
define void @test17(i32 %k) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:

View File

@ -1,155 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s
; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix PEELED
declare void @f1()
declare void @f2()
; In this case we cannot peel the inner loop, because the condition involves
; the outer induction variable.
; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling.
define void @test1(i32 %k) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @f1()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: call void @f2()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
; CHECK: outer.inc:
; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]], !llvm.loop !{{.*}}
; CHECK: for.end:
; CHECK-NEXT: ret void
;
; PEELED-LABEL: @test1(
; PEELED-NEXT: for.body.lr.ph:
; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]]
; PEELED: outer.header.peel.begin:
; PEELED-NEXT: br label [[OUTER_HEADER_PEEL:%.*]]
; PEELED: outer.header.peel:
; PEELED-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; PEELED: for.body.peel:
; PEELED-NEXT: [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ]
; PEELED-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
; PEELED-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
; PEELED: if.else.peel:
; PEELED-NEXT: call void @f2()
; PEELED-NEXT: br label [[FOR_INC_PEEL]]
; PEELED: if.then.peel:
; PEELED-NEXT: call void @f1()
; PEELED-NEXT: br label [[FOR_INC_PEEL]]
; PEELED: for.inc.peel:
; PEELED-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1
; PEELED-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
; PEELED-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]]
; PEELED: outer.inc.peel:
; PEELED-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1
; PEELED-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]]
; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
; Verify that MD_loop metadata is dropped.
; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
; PEELED: outer.header.peel.next:
; PEELED-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]]
; PEELED: outer.header.peel2:
; PEELED-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
; PEELED: for.body.peel3:
; PEELED-NEXT: [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ]
; PEELED-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2
; PEELED-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]]
; PEELED: if.else.peel6:
; PEELED-NEXT: call void @f2()
; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
; PEELED: if.then.peel7:
; PEELED-NEXT: call void @f1()
; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
; PEELED: for.inc.peel8:
; PEELED-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1
; PEELED-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]]
; PEELED-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]]
; PEELED: outer.inc.peel11:
; PEELED-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1
; PEELED-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]]
; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]]
; Verify that MD_loop metadata is dropped.
; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
; PEELED: outer.header.peel.next1:
; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]]
; PEELED: outer.header.peel.next14:
; PEELED-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
; PEELED: for.body.lr.ph.peel.newph:
; PEELED-NEXT: br label [[OUTER_HEADER:%.*]]
; PEELED: outer.header:
; PEELED-NEXT: [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
; PEELED-NEXT: br label [[FOR_BODY:%.*]]
; PEELED: for.body:
; PEELED-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; PEELED-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; PEELED: if.then:
; PEELED-NEXT: call void @f1()
; PEELED-NEXT: br label [[FOR_INC]]
; PEELED: if.else:
; PEELED-NEXT: call void @f2()
; PEELED-NEXT: br label [[FOR_INC]]
; PEELED: for.inc:
; PEELED-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
; PEELED-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
; PEELED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
; PEELED: outer.inc:
; PEELED-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
; PEELED-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
; PEELED-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
; PEELED: for.end.loopexit:
; PEELED-NEXT: br label [[FOR_END]]
; PEELED: for.end:
; PEELED-NEXT: ret void
;
for.body.lr.ph:
br label %outer.header
outer.header:
%j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
br label %for.body
for.body:
%i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
%cmp1 = icmp ult i32 %j, 2
br i1 %cmp1, label %if.then, label %if.else
if.then:
call void @f1()
br label %for.inc
if.else:
call void @f2()
br label %for.inc
for.inc:
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %outer.inc
outer.inc:
%j.inc = add nsw i32 %j, 1
%outer.cmp = icmp slt i32 %j.inc, %k
br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0
for.end:
ret void
}
!0 = distinct !{!0}