forked from OSchip/llvm-project
[SCEV] More accurate calculation of max backedge count of some less-than loops
In loops that look something like i = n; do { ... } while(i++ < n+k); where k is a constant, the maximum backedge count is k (in fact the backedge count will be either 0 or k, depending on whether n+k wraps). More generally for LHS < RHS if RHS-(LHS of first comparison) is a constant then the loop will iterate either 0 or that constant number of times. This allows for more loop unrolling with the recent upper bound loop unrolling changes, and I'm working on a patch that will let loop unrolling additionally make use of the loop being executed either 0 or k times (we need to retain the loop comparison only on the first unrolled iteration). Differential Revision: https://reviews.llvm.org/D25607 llvm-svn: 284465
This commit is contained in:
parent
692f4e95cf
commit
ecf79300dd
|
@ -8655,43 +8655,68 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
|
|||
: ICmpInst::ICMP_ULT;
|
||||
const SCEV *Start = IV->getStart();
|
||||
const SCEV *End = RHS;
|
||||
if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
|
||||
// If the backedge is taken at least once, then it will be taken
|
||||
// (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
|
||||
// is the LHS value of the less-than comparison the first time it is evaluated
|
||||
// and End is the RHS.
|
||||
const SCEV *BECountIfBackedgeTaken =
|
||||
computeBECount(getMinusSCEV(End, Start), Stride, false);
|
||||
// If the loop entry is guarded by the result of the backedge test of the
|
||||
// first loop iteration, then we know the backedge will be taken at least
|
||||
// once and so the backedge taken count is as above. If not then we use the
|
||||
// expression (max(End,Start)-Start)/Stride to describe the backedge count,
|
||||
// as if the backedge is taken at least once max(End,Start) is End and so the
|
||||
// result is as above, and if not max(End,Start) is Start so we get a backedge
|
||||
// count of zero.
|
||||
const SCEV *BECount;
|
||||
if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
|
||||
BECount = BECountIfBackedgeTaken;
|
||||
else {
|
||||
End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
|
||||
|
||||
const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
|
||||
|
||||
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
|
||||
: getUnsignedRange(Start).getUnsignedMin();
|
||||
|
||||
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
|
||||
|
||||
APInt StrideForMaxBECount;
|
||||
|
||||
if (PositiveStride)
|
||||
StrideForMaxBECount = IsSigned ? getSignedRange(Stride).getSignedMin()
|
||||
: getUnsignedRange(Stride).getUnsignedMin();
|
||||
else
|
||||
// Using a stride of 1 is safe when computing max backedge taken count for
|
||||
// a loop with unknown stride.
|
||||
StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
|
||||
|
||||
APInt Limit =
|
||||
IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
|
||||
: APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
|
||||
|
||||
// Although End can be a MAX expression we estimate MaxEnd considering only
|
||||
// the case End = RHS. This is safe because in the other case (End - Start)
|
||||
// is zero, leading to a zero maximum backedge taken count.
|
||||
APInt MaxEnd =
|
||||
IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
|
||||
: APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
|
||||
BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
|
||||
}
|
||||
|
||||
const SCEV *MaxBECount;
|
||||
if (isa<SCEVConstant>(BECount))
|
||||
MaxBECount = BECount;
|
||||
else
|
||||
else if (isa<SCEVConstant>(BECountIfBackedgeTaken))
|
||||
// If we know exactly how many times the backedge will be taken if it's
|
||||
// taken at least once, then the backedge count will either be that or
|
||||
// zero.
|
||||
MaxBECount = BECountIfBackedgeTaken;
|
||||
else {
|
||||
// Calculate the maximum backedge count based on the range of values
|
||||
// permitted by Start, End, and Stride.
|
||||
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
|
||||
: getUnsignedRange(Start).getUnsignedMin();
|
||||
|
||||
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
|
||||
|
||||
APInt StrideForMaxBECount;
|
||||
|
||||
if (PositiveStride)
|
||||
StrideForMaxBECount =
|
||||
IsSigned ? getSignedRange(Stride).getSignedMin()
|
||||
: getUnsignedRange(Stride).getUnsignedMin();
|
||||
else
|
||||
// Using a stride of 1 is safe when computing max backedge taken count for
|
||||
// a loop with unknown stride.
|
||||
StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
|
||||
|
||||
APInt Limit =
|
||||
IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
|
||||
: APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
|
||||
|
||||
// Although End can be a MAX expression we estimate MaxEnd considering only
|
||||
// the case End = RHS. This is safe because in the other case (End - Start)
|
||||
// is zero, leading to a zero maximum backedge taken count.
|
||||
APInt MaxEnd =
|
||||
IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
|
||||
: APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
|
||||
|
||||
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
|
||||
getConstant(StrideForMaxBECount), false);
|
||||
}
|
||||
|
||||
if (isa<SCEVCouldNotCompute>(MaxBECount))
|
||||
MaxBECount = BECount;
|
||||
|
|
|
@ -14,7 +14,7 @@ loop:
|
|||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u_0
|
||||
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) umax %rhs))
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
|
||||
|
||||
leave:
|
||||
ret void
|
||||
|
@ -34,7 +34,7 @@ loop:
|
|||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u_1
|
||||
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) umax %start))
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
|
||||
|
||||
leave:
|
||||
ret void
|
||||
|
@ -54,7 +54,7 @@ loop:
|
|||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s_0
|
||||
; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) smax %rhs))
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
|
||||
|
||||
leave:
|
||||
ret void
|
||||
|
@ -74,7 +74,7 @@ loop:
|
|||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s_1
|
||||
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) smax %start))
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
|
||||
; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
|
||||
|
||||
leave:
|
||||
ret void
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
|
||||
|
||||
define void @s32_max1(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 1
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp slt i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s32_max1
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s32_max2(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 2
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp slt i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s32_max2
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %x, %n
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp slt i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s32_maxx
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 2
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
|
||||
%cmp = icmp eq i32 %i.0, %x
|
||||
br i1 %cmp, label %do.end, label %if.end ; unpredictable
|
||||
|
||||
if.end:
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp1 = icmp slt i32 %i.0, %add
|
||||
br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
|
||||
; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @u32_max1(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 1
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp ult i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u32_max1
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) umax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @u32_max2(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 2
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp ult i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u32_max2
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) umax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %x, %n
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp = icmp ult i32 %i.0, %add
|
||||
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u32_maxx
|
||||
; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) umax %n))
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
|
||||
entry:
|
||||
%add = add i32 %n, 2
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
|
||||
%cmp = icmp eq i32 %i.0, %x
|
||||
br i1 %cmp, label %do.end, label %if.end ; unpredictable
|
||||
|
||||
if.end:
|
||||
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
|
||||
store i32 %i.0, i32* %arrayidx, align 4
|
||||
%inc = add i32 %i.0, 1
|
||||
%cmp1 = icmp ult i32 %i.0, %add
|
||||
br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
|
||||
|
||||
; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
|
||||
; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
|
||||
; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
|
||||
|
||||
do.end:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue