[IndVarSimplify] Use control-dependent range information to prove non-negativity

This change is motivated by the case when IndVarSimplify doesn't widen a comparison of IV increment because it can't prove IV increment being non-negative. We end up with a redundant trunc of the widened increment on this example. for.body: %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] %within_limits = icmp ult i32 %i, 64 br i1 %within_limits, label %continue, label %for.end continue: %i.i64 = zext i32 %i to i64 %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 %val = load i32, i32* %arrayidx, align 4 br label %for.inc for.inc: %i.inc = add nsw nuw i32 %i, 1 %cmp = icmp slt i32 %i.inc, %limit br i1 %cmp, label %for.body, label %for.end There is a range check inside of the loop which guarantees the IV to be non-negative. NSW on the increment guarantees that the increment is also non-negative. Teach IndVarSimplify to use the range check to prove non-negativity of loop increments. Reviewed By: sanjoy Differential Revision: https://reviews.llvm.org/D25738 llvm-svn: 284629
2016-10-19 18:59:03 +00:00 · 2016-10-19 18:59:03 +00:00 · f2d5dc5dc6
parent 16970a847c
commit f2d5dc5dc6
2 changed files with 323 additions and 2 deletions
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -81,6 +81,11 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
               clEnumValN(AlwaysRepl, "always",
                          "always replace exit value whenever possible")));

+static cl::opt<bool> UsePostIncrementRanges(
+  "indvars-post-increment-ranges", cl::Hidden,
+  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
+  cl::init(true));
+
 namespace {
 struct RewritePhi;

@ -903,6 +908,33 @@ class WidenIV {
  // Value: the kind of extension used to widen this Instruction.
  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;

+  typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
+  // A map with control-dependent ranges for post increment IV uses. The key is
+  // a pair of IV def and a use of this def denoting the context. The value is
+  // a ConstantRange representing possible values of the def at the given
+  // context.
+  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
+
+  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
+                                              Instruction *UseI) {
+    DefUserPair Key(Def, UseI);
+    auto It = PostIncRangeInfos.find(Key);
+    return It == PostIncRangeInfos.end()
+               ? Optional<ConstantRange>(None)
+               : Optional<ConstantRange>(It->second);
+  }
+
+  void calculatePostIncRanges(PHINode *OrigPhi);
+  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
+  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
+    DefUserPair Key(Def, UseI);
+    auto It = PostIncRangeInfos.find(Key);
+    if (It == PostIncRangeInfos.end())
+      PostIncRangeInfos.insert({Key, R});
+    else
+      It->second = R.intersectWith(It->second);
+  }
+
 public:
  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
          ScalarEvolution *SEv, DominatorTree *DTree,
@ -1429,7 +1461,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
 ///
 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
-  bool NeverNegative =
+  bool NonNegativeDef =
      SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
                           SE->getConstant(NarrowSCEV->getType(), 0));
  for (User *U : NarrowDef->users()) {
@ -1439,7 +1471,15 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
    if (!Widened.insert(NarrowUser).second)
      continue;

-    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative);
+    bool NonNegativeUse = false;
+    if (!NonNegativeDef) {
+      // We might have a control-dependent range information for this context.
+      if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
+        NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
+    }
+
+    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
+                               NonNegativeDef || NonNegativeUse);
  }
 }

@ -1479,6 +1519,19 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
      SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
      "Loop header phi recurrence inputs do not dominate the loop");

+  // Iterate over IV uses (including transitive ones) looking for IV increments
+  // of the form 'add nsw %iv, <const>'. For each increment and each use of
+  // the increment calculate control-dependent range information basing on
+  // dominating conditions inside of the loop (e.g. a range check inside of the
+  // loop). Calculated ranges are stored in PostIncRangeInfos map.
+  //
+  // Control-dependent range information is later used to prove that a narrow
+  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
+  // this on demand because when pushNarrowIVUsers needs this information some
+  // of the dominating conditions might be already widened.
+  if (UsePostIncrementRanges)
+    calculatePostIncRanges(OrigPhi);
+
  // The rewriter provides a value for the desired IV expression. This may
  // either find an existing phi or materialize a new one. Either way, we
  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
@ -1523,6 +1576,99 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
  return WidePhi;
 }

+/// Calculates control-dependent range for the given def at the given context
+/// by looking at dominating conditions inside of the loop
+void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
+                                    Instruction *NarrowUser) {
+  using namespace llvm::PatternMatch;
+
+  Value *NarrowDefLHS;
+  const APInt *NarrowDefRHS;
+  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
+                                 m_APInt(NarrowDefRHS))) ||
+      !NarrowDefRHS->isNonNegative())
+    return;
+
+  auto UpdateRangeFromCondition = [&] (Value *Condition,
+                                       bool TrueDest) {
+    CmpInst::Predicate Pred;
+    Value *CmpRHS;
+    if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
+                                 m_Value(CmpRHS))))
+      return;
+
+    CmpInst::Predicate P =
+            TrueDest ? Pred : CmpInst::getInversePredicate(Pred);  
+
+    auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
+    auto CmpConstrainedLHSRange =
+            ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
+    auto NarrowDefRange =
+            CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
+
+    updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
+  };
+
+  BasicBlock *NarrowUserBB = NarrowUser->getParent();
+  // If NarrowUserBB is statically unreachable asking dominator queries may 
+  // yield suprising results. (e.g. the block may not have a dom tree node)
+  if (!DT->isReachableFromEntry(NarrowUserBB))
+    return;
+
+  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
+       L->contains(DTB->getBlock());
+       DTB = DTB->getIDom()) {
+    auto *BB = DTB->getBlock();
+    auto *TI = BB->getTerminator();
+
+    auto *BI = dyn_cast<BranchInst>(TI);
+    if (!BI || !BI->isConditional())
+      continue;
+
+    auto *TrueSuccessor = BI->getSuccessor(0);
+    auto *FalseSuccessor = BI->getSuccessor(1);
+
+    auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
+      return BBE.isSingleEdge() &&
+             DT->dominates(BBE, NarrowUser->getParent());
+    };
+
+    if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
+      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
+
+    if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
+      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
+  }
+}
+
+/// Calculates PostIncRangeInfos map for the given IV
+void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
+  SmallPtrSet<Instruction *, 16> Visited;
+  SmallVector<Instruction *, 6> Worklist;
+  Worklist.push_back(OrigPhi);
+  Visited.insert(OrigPhi);
+
+  while (!Worklist.empty()) {
+    Instruction *NarrowDef = Worklist.pop_back_val();
+
+    for (Use &U : NarrowDef->uses()) {
+      auto *NarrowUser = cast<Instruction>(U.getUser());
+
+      // Don't go looking outside the current loop.
+      auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
+      if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
+        continue;
+
+      if (!Visited.insert(NarrowUser).second)
+        continue;
+
+      Worklist.push_back(NarrowUser);
+
+      calculatePostIncRange(NarrowDef, NarrowUser);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Live IV Reduction - Minimize IVs live across the loop.
 //===----------------------------------------------------------------------===//
--- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
+++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
@ -0,0 +1,175 @@
+; RUN: opt < %s -indvars -indvars-post-increment-ranges -S | FileCheck %s
+
+target datalayout = "p:64:64:64-n32:64"
+
+; When the IV in this loop is widened we want to widen this use as well:
+; icmp slt i32 %i.inc, %limit
+; In order to do this indvars need to prove that the narrow IV def (%i.inc)
+; is not-negative from the range check inside of the loop.
+define void @test(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: trunc
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+  %within_limits = icmp ult i32 %i, 64
+  br i1 %within_limits, label %continue, label %for.end
+
+continue:
+  %i.i64 = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+  %val = load i32, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %i.inc = add nsw nuw i32 %i, 1
+  %cmp = icmp slt i32 %i.inc, %limit
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test_false_edge(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_false_edge(
+; CHECK-NOT: trunc
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+  %out_of_bounds = icmp ugt i32 %i, 64
+  br i1 %out_of_bounds, label %for.end, label %continue
+
+continue:
+  %i.i64 = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+  %val = load i32, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %i.inc = add nsw nuw i32 %i, 1
+  %cmp = icmp slt i32 %i.inc, %limit
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test_range_metadata(i32* %array_length_ptr, i32* %base,
+                                 i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_range_metadata(
+; CHECK-NOT: trunc
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+  %array_length = load i32, i32* %array_length_ptr, !range !{i32 0, i32 64 }
+  %within_limits = icmp ult i32 %i, %array_length
+  br i1 %within_limits, label %continue, label %for.end
+
+continue:
+  %i.i64 = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+  %val = load i32, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %i.inc = add nsw nuw i32 %i, 1
+  %cmp = icmp slt i32 %i.inc, %limit
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Negative version of the test above, we don't know anything about
+; array_length_ptr range.
+define void @test_neg(i32* %array_length_ptr, i32* %base,
+                      i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_neg(
+; CHECK: trunc i64
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+  %array_length = load i32, i32* %array_length_ptr
+  %within_limits = icmp ult i32 %i, %array_length
+  br i1 %within_limits, label %continue, label %for.end
+
+continue:
+  %i.i64 = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+  %val = load i32, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %i.inc = add nsw nuw i32 %i, 1
+  %cmp = icmp slt i32 %i.inc, %limit
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @test_transitive_use(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_transitive_use(
+; CHECK-NOT: trunc
+; CHECK: %result = icmp slt i64
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+  %within_limits = icmp ult i32 %i, 64
+  br i1 %within_limits, label %continue, label %for.end
+
+continue:
+  %i.mul.3 = mul nsw nuw i32 %i, 3
+  %mul_within = icmp ult i32 %i.mul.3, 64
+  br i1 %mul_within, label %guarded, label %continue.2
+  
+guarded:
+  %i.mul.3.inc = add nsw nuw i32 %i.mul.3, 1
+  %result = icmp slt i32 %i.mul.3.inc, %limit
+  br i1 %result, label %continue.2, label %for.end
+
+continue.2:
+  %i.i64 = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+  %val = load i32, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %i.inc = add nsw nuw i32 %i, 1
+  %cmp = icmp slt i32 %i.inc, %limit
+  br i1 %cmp, label %for.body, label %for.end
+
+
+for.end:
+  br label %exit
+
+exit:
+  ret void
+}