[IndVars] Remove monotonic checks with unknown exit count

Even if the exact exit count is unknown, we can still prove that this exit will not be taken. If we can prove that the predicate is monotonic, fulfilled on first & last iteration, and no overflow happened in between, then the check can be removed. Differential Revision: https://reviews.llvm.org/D87832 Reviewed By: apilipenko
2020-10-27 11:35:16 +07:00 · 2020-10-27 11:35:16 +07:00 · c6ca26c0bf
parent d38788b357
commit c6ca26c0bf
5 changed files with 105 additions and 16 deletions
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@ -961,6 +961,17 @@ public:
                                const SCEV *&InvariantLHS,
                                const SCEV *&InvariantRHS);

+  /// Return true if the result of the predicate LHS `Pred` RHS is loop
+  /// invariant with respect to L at given Context during at least first
+  /// MaxIter iterations.  Set InvariantPred, InvariantLHS and InvariantLHS so
+  /// that InvariantLHS `InvariantPred` InvariantRHS is the loop invariant form
+  /// of LHS `Pred` RHS. The predicate should be the loop's exit condition.
+  bool isLoopInvariantExitCondDuringFirstIterations(
+      ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+      const Instruction *Context, const SCEV *MaxIter,
+      ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+      const SCEV *&InvariantRHS);
+
  /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
  /// iff any changes were made. If the operands are provably equal or
  /// unequal, LHS and RHS are set to the same value and Pred is set to either
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@ -9279,6 +9279,75 @@ bool ScalarEvolution::isLoopInvariantPredicate(
  return true;
 }

+bool ScalarEvolution::isLoopInvariantExitCondDuringFirstIterations(
+    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+    const Instruction *Context, const SCEV *MaxIter,
+    ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+    const SCEV *&InvariantRHS) {
+  // Try to prove the following set of facts:
+  // - The predicate is monotonic.
+  // - If the check does not fail on the 1st iteration:
+  //   - No overflow will happen during first MaxIter iterations;
+  //   - It will not fail on the MaxIter'th iteration.
+  // If the check does fail on the 1st iteration, we leave the loop and no
+  // other checks matter.
+
+  // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+  if (!isLoopInvariant(RHS, L)) {
+    if (!isLoopInvariant(LHS, L))
+      return false;
+
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  auto *AR = dyn_cast<SCEVAddRecExpr>(LHS);
+  // TODO: Lift affinity limitation in the future.
+  if (!AR || AR->getLoop() != L || !AR->isAffine())
+    return false;
+
+  // The predicate must be relational (i.e. <, <=, >=, >).
+  if (!ICmpInst::isRelational(Pred))
+    return false;
+
+  // TODO: Support steps other than +/- 1.
+  const SCEV *Step = AR->getOperand(1);
+  auto *One = getOne(Step->getType());
+  auto *MinusOne = getNegativeSCEV(One);
+  if (Step != One && Step != MinusOne)
+    return false;
+
+  // Type mismatch here means that MaxIter is potentially larger than max
+  // unsigned value in start type, which mean we cannot prove no wrap for the
+  // indvar.
+  if (AR->getType() != MaxIter->getType())
+    return false;
+
+  // Value of IV on suggested last iteration.
+  const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this);
+  // Does it still meet the requirement?
+  if (!isKnownPredicateAt(Pred, Last, RHS, Context))
+    return false;
+  // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
+  // not exceed max unsigned value of this type), this effectively proves
+  // that there is no wrap during the iteration. To prove that there is no
+  // signed/unsigned wrap, we need to check that
+  // Start <= Last for step = 1 or Start >= Last for step = -1.
+  ICmpInst::Predicate NoOverflowPred =
+      CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+  if (Step == MinusOne)
+    NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
+  const SCEV *Start = AR->getStart();
+  if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
+    return false;
+
+  // Everything is fine.
+  InvariantPred = Pred;
+  InvariantLHS = Start;
+  InvariantRHS = RHS;
+  return true;
+}
+
 bool ScalarEvolution::isKnownPredicateViaConstantRanges(
    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
  if (HasSameValue(LHS, RHS))
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -2307,9 +2307,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
 }

 // Returns true if the condition of \p BI being checked is invariant and can be
-// proved to be trivially true.
+// proved to be trivially true during at least first \p MaxIter iterations.
 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
-                          bool ProvingLoopExit) {
+                          bool ProvingLoopExit, const SCEV *MaxIter) {
  ICmpInst::Predicate Pred;
  Value *LHS, *RHS;
  using namespace PatternMatch;
@ -2335,7 +2335,20 @@ static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
  if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
    return true;

-  return false;
+  if (ProvingLoopExit)
+    return false;
+
+  ICmpInst::Predicate InvariantPred;
+  const SCEV *InvariantLHS, *InvariantRHS;
+
+  // Check if there is a loop-invariant predicate equivalent to our check.
+  if (!SE->isLoopInvariantExitCondDuringFirstIterations(
+           Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
+           InvariantRHS))
+    return false;
+
+  // Can we prove it to be trivially true?
+  return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI);
 }

 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
@ -2416,14 +2429,15 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
      // Okay, we do not know the exit count here. Can we at least prove that it
      // will remain the same within iteration space?
      auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
-      auto OptimizeCond = [&](bool Inverted) {
-        if (isTrivialCond(L, BI, SE, Inverted)) {
+      auto OptimizeCond = [&](bool Inverted, const SCEV *MaxIter) {
+        if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) {
          FoldExit(ExitingBB, Inverted);
          return true;
        }
        return false;
      };
-      if (OptimizeCond(false) || OptimizeCond(true))
+      if (OptimizeCond(false, MaxExitCount) ||
+          OptimizeCond(true, MaxExitCount))
        Changed = true;
      continue;
    }
--- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll
+++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll
@ -12,8 +12,7 @@ define i32 @test_01(i32* %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], -1
-; CHECK-NEXT:    [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0
 ; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
@ -93,8 +92,7 @@ define i32 @test_02(i32* %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT:    [[RC:%.*]] = icmp sgt i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0
 ; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
@ -172,8 +170,7 @@ define i32 @test_03(i32* %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000
 ; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
@ -211,8 +208,7 @@ define i32 @test_04(i32* %p) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], -1
-; CHECK-NEXT:    [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0
 ; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
--- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
+++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
@ -71,8 +71,7 @@ define void @test_predicated_simple_signed(i32* %p, i32* %arr) {
 ; CHECK-NEXT:    br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]]
 ; CHECK:       range_check_block:
 ; CHECK-NEXT:    [[IV_NEXT]] = sub i32 [[IV]], 1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]]
 ; CHECK-NEXT:    [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4