forked from OSchip/llvm-project
[IndVarSimplify] Use control-dependent range information to prove non-negativity
This change is motivated by the case when IndVarSimplify doesn't widen a comparison of IV increment because it can't prove IV increment being non-negative. We end up with a redundant trunc of the widened increment on this example. for.body: %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] %within_limits = icmp ult i32 %i, 64 br i1 %within_limits, label %continue, label %for.end continue: %i.i64 = zext i32 %i to i64 %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 %val = load i32, i32* %arrayidx, align 4 br label %for.inc for.inc: %i.inc = add nsw nuw i32 %i, 1 %cmp = icmp slt i32 %i.inc, %limit br i1 %cmp, label %for.body, label %for.end There is a range check inside of the loop which guarantees the IV to be non-negative. NSW on the increment guarantees that the increment is also non-negative. Teach IndVarSimplify to use the range check to prove non-negativity of loop increments. Reviewed By: sanjoy Differential Revision: https://reviews.llvm.org/D25738 llvm-svn: 284629
This commit is contained in:
parent
16970a847c
commit
f2d5dc5dc6
|
@ -81,6 +81,11 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
|
|||
clEnumValN(AlwaysRepl, "always",
|
||||
"always replace exit value whenever possible")));
|
||||
|
||||
static cl::opt<bool> UsePostIncrementRanges(
|
||||
"indvars-post-increment-ranges", cl::Hidden,
|
||||
cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
|
||||
cl::init(true));
|
||||
|
||||
namespace {
|
||||
struct RewritePhi;
|
||||
|
||||
|
@ -903,6 +908,33 @@ class WidenIV {
|
|||
// Value: the kind of extension used to widen this Instruction.
|
||||
DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
|
||||
|
||||
typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
|
||||
// A map with control-dependent ranges for post increment IV uses. The key is
|
||||
// a pair of IV def and a use of this def denoting the context. The value is
|
||||
// a ConstantRange representing possible values of the def at the given
|
||||
// context.
|
||||
DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
|
||||
|
||||
Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
|
||||
Instruction *UseI) {
|
||||
DefUserPair Key(Def, UseI);
|
||||
auto It = PostIncRangeInfos.find(Key);
|
||||
return It == PostIncRangeInfos.end()
|
||||
? Optional<ConstantRange>(None)
|
||||
: Optional<ConstantRange>(It->second);
|
||||
}
|
||||
|
||||
void calculatePostIncRanges(PHINode *OrigPhi);
|
||||
void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
|
||||
void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
|
||||
DefUserPair Key(Def, UseI);
|
||||
auto It = PostIncRangeInfos.find(Key);
|
||||
if (It == PostIncRangeInfos.end())
|
||||
PostIncRangeInfos.insert({Key, R});
|
||||
else
|
||||
It->second = R.intersectWith(It->second);
|
||||
}
|
||||
|
||||
public:
|
||||
WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
|
||||
ScalarEvolution *SEv, DominatorTree *DTree,
|
||||
|
@ -1429,7 +1461,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
|||
///
|
||||
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
|
||||
const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
|
||||
bool NeverNegative =
|
||||
bool NonNegativeDef =
|
||||
SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
|
||||
SE->getConstant(NarrowSCEV->getType(), 0));
|
||||
for (User *U : NarrowDef->users()) {
|
||||
|
@ -1439,7 +1471,15 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
|
|||
if (!Widened.insert(NarrowUser).second)
|
||||
continue;
|
||||
|
||||
NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative);
|
||||
bool NonNegativeUse = false;
|
||||
if (!NonNegativeDef) {
|
||||
// We might have a control-dependent range information for this context.
|
||||
if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
|
||||
NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
|
||||
}
|
||||
|
||||
NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
|
||||
NonNegativeDef || NonNegativeUse);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1479,6 +1519,19 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
|
|||
SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
|
||||
"Loop header phi recurrence inputs do not dominate the loop");
|
||||
|
||||
// Iterate over IV uses (including transitive ones) looking for IV increments
|
||||
// of the form 'add nsw %iv, <const>'. For each increment and each use of
|
||||
// the increment calculate control-dependent range information basing on
|
||||
// dominating conditions inside of the loop (e.g. a range check inside of the
|
||||
// loop). Calculated ranges are stored in PostIncRangeInfos map.
|
||||
//
|
||||
// Control-dependent range information is later used to prove that a narrow
|
||||
// definition is not negative (see pushNarrowIVUsers). It's difficult to do
|
||||
// this on demand because when pushNarrowIVUsers needs this information some
|
||||
// of the dominating conditions might be already widened.
|
||||
if (UsePostIncrementRanges)
|
||||
calculatePostIncRanges(OrigPhi);
|
||||
|
||||
// The rewriter provides a value for the desired IV expression. This may
|
||||
// either find an existing phi or materialize a new one. Either way, we
|
||||
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
|
||||
|
@ -1523,6 +1576,99 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
|
|||
return WidePhi;
|
||||
}
|
||||
|
||||
/// Calculates control-dependent range for the given def at the given context
|
||||
/// by looking at dominating conditions inside of the loop
|
||||
void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
|
||||
Instruction *NarrowUser) {
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
Value *NarrowDefLHS;
|
||||
const APInt *NarrowDefRHS;
|
||||
if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
|
||||
m_APInt(NarrowDefRHS))) ||
|
||||
!NarrowDefRHS->isNonNegative())
|
||||
return;
|
||||
|
||||
auto UpdateRangeFromCondition = [&] (Value *Condition,
|
||||
bool TrueDest) {
|
||||
CmpInst::Predicate Pred;
|
||||
Value *CmpRHS;
|
||||
if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
|
||||
m_Value(CmpRHS))))
|
||||
return;
|
||||
|
||||
CmpInst::Predicate P =
|
||||
TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
|
||||
|
||||
auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
|
||||
auto CmpConstrainedLHSRange =
|
||||
ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
|
||||
auto NarrowDefRange =
|
||||
CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
|
||||
|
||||
updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
|
||||
};
|
||||
|
||||
BasicBlock *NarrowUserBB = NarrowUser->getParent();
|
||||
// If NarrowUserBB is statically unreachable asking dominator queries may
|
||||
// yield suprising results. (e.g. the block may not have a dom tree node)
|
||||
if (!DT->isReachableFromEntry(NarrowUserBB))
|
||||
return;
|
||||
|
||||
for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
|
||||
L->contains(DTB->getBlock());
|
||||
DTB = DTB->getIDom()) {
|
||||
auto *BB = DTB->getBlock();
|
||||
auto *TI = BB->getTerminator();
|
||||
|
||||
auto *BI = dyn_cast<BranchInst>(TI);
|
||||
if (!BI || !BI->isConditional())
|
||||
continue;
|
||||
|
||||
auto *TrueSuccessor = BI->getSuccessor(0);
|
||||
auto *FalseSuccessor = BI->getSuccessor(1);
|
||||
|
||||
auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
|
||||
return BBE.isSingleEdge() &&
|
||||
DT->dominates(BBE, NarrowUser->getParent());
|
||||
};
|
||||
|
||||
if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
|
||||
UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
|
||||
|
||||
if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
|
||||
UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates PostIncRangeInfos map for the given IV
|
||||
void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
|
||||
SmallPtrSet<Instruction *, 16> Visited;
|
||||
SmallVector<Instruction *, 6> Worklist;
|
||||
Worklist.push_back(OrigPhi);
|
||||
Visited.insert(OrigPhi);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *NarrowDef = Worklist.pop_back_val();
|
||||
|
||||
for (Use &U : NarrowDef->uses()) {
|
||||
auto *NarrowUser = cast<Instruction>(U.getUser());
|
||||
|
||||
// Don't go looking outside the current loop.
|
||||
auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
|
||||
if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
|
||||
continue;
|
||||
|
||||
if (!Visited.insert(NarrowUser).second)
|
||||
continue;
|
||||
|
||||
Worklist.push_back(NarrowUser);
|
||||
|
||||
calculatePostIncRange(NarrowDef, NarrowUser);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Live IV Reduction - Minimize IVs live across the loop.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
; RUN: opt < %s -indvars -indvars-post-increment-ranges -S | FileCheck %s
|
||||
|
||||
target datalayout = "p:64:64:64-n32:64"
|
||||
|
||||
; When the IV in this loop is widened we want to widen this use as well:
|
||||
; icmp slt i32 %i.inc, %limit
|
||||
; In order to do this indvars need to prove that the narrow IV def (%i.inc)
|
||||
; is not-negative from the range check inside of the loop.
|
||||
define void @test(i32* %base, i32 %limit, i32 %start) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NOT: trunc
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
|
||||
%within_limits = icmp ult i32 %i, 64
|
||||
br i1 %within_limits, label %continue, label %for.end
|
||||
|
||||
continue:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
|
||||
%val = load i32, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.inc = add nsw nuw i32 %i, 1
|
||||
%cmp = icmp slt i32 %i.inc, %limit
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_false_edge(i32* %base, i32 %limit, i32 %start) {
|
||||
; CHECK-LABEL: @test_false_edge(
|
||||
; CHECK-NOT: trunc
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
|
||||
%out_of_bounds = icmp ugt i32 %i, 64
|
||||
br i1 %out_of_bounds, label %for.end, label %continue
|
||||
|
||||
continue:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
|
||||
%val = load i32, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.inc = add nsw nuw i32 %i, 1
|
||||
%cmp = icmp slt i32 %i.inc, %limit
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_range_metadata(i32* %array_length_ptr, i32* %base,
|
||||
i32 %limit, i32 %start) {
|
||||
; CHECK-LABEL: @test_range_metadata(
|
||||
; CHECK-NOT: trunc
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
|
||||
%array_length = load i32, i32* %array_length_ptr, !range !{i32 0, i32 64 }
|
||||
%within_limits = icmp ult i32 %i, %array_length
|
||||
br i1 %within_limits, label %continue, label %for.end
|
||||
|
||||
continue:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
|
||||
%val = load i32, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.inc = add nsw nuw i32 %i, 1
|
||||
%cmp = icmp slt i32 %i.inc, %limit
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Negative version of the test above, we don't know anything about
|
||||
; array_length_ptr range.
|
||||
define void @test_neg(i32* %array_length_ptr, i32* %base,
|
||||
i32 %limit, i32 %start) {
|
||||
; CHECK-LABEL: @test_neg(
|
||||
; CHECK: trunc i64
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
|
||||
%array_length = load i32, i32* %array_length_ptr
|
||||
%within_limits = icmp ult i32 %i, %array_length
|
||||
br i1 %within_limits, label %continue, label %for.end
|
||||
|
||||
continue:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
|
||||
%val = load i32, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.inc = add nsw nuw i32 %i, 1
|
||||
%cmp = icmp slt i32 %i.inc, %limit
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_transitive_use(i32* %base, i32 %limit, i32 %start) {
|
||||
; CHECK-LABEL: @test_transitive_use(
|
||||
; CHECK-NOT: trunc
|
||||
; CHECK: %result = icmp slt i64
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
|
||||
%within_limits = icmp ult i32 %i, 64
|
||||
br i1 %within_limits, label %continue, label %for.end
|
||||
|
||||
continue:
|
||||
%i.mul.3 = mul nsw nuw i32 %i, 3
|
||||
%mul_within = icmp ult i32 %i.mul.3, 64
|
||||
br i1 %mul_within, label %guarded, label %continue.2
|
||||
|
||||
guarded:
|
||||
%i.mul.3.inc = add nsw nuw i32 %i.mul.3, 1
|
||||
%result = icmp slt i32 %i.mul.3.inc, %limit
|
||||
br i1 %result, label %continue.2, label %for.end
|
||||
|
||||
continue.2:
|
||||
%i.i64 = zext i32 %i to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
|
||||
%val = load i32, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%i.inc = add nsw nuw i32 %i, 1
|
||||
%cmp = icmp slt i32 %i.inc, %limit
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
|
||||
for.end:
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue