[IndVars] Use knowledge about execution on last iteration when removing checks

If we know that some check will not be executed on the last iteration, we can use this fact to eliminate its check. Differential Revision: https://reviews.llvm.org/D88210 Reviwed By: ebrevnov
2020-11-03 13:18:46 +07:00 · 2020-11-03 13:18:46 +07:00 · f847094c24
parent b969dfe26f
commit f847094c24
2 changed files with 37 additions and 6 deletions
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -2418,6 +2418,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
  };

  bool Changed = false;
+  bool SkipLastIter = false;
  SmallSet<const SCEV*, 8> DominatingExitCounts;
  for (BasicBlock *ExitingBB : ExitingBlocks) {
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
@ -2425,18 +2426,49 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
      // Okay, we do not know the exit count here. Can we at least prove that it
      // will remain the same within iteration space?
      auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
-      auto OptimizeCond = [&](bool Inverted) {
-        if (isTrivialCond(L, BI, SE, Inverted, MaxExitCount)) {
+      auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit](
+          bool Inverted, bool SkipLastIter) {
+        const SCEV *MaxIter = MaxExitCount;
+        if (SkipLastIter) {
+          const SCEV *One = SE->getOne(MaxIter->getType());
+          MaxIter = SE->getMinusSCEV(MaxIter, One);
+        }
+        if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) {
          FoldExit(ExitingBB, Inverted);
          return true;
        }
        return false;
      };
-      if (OptimizeCond(false) || OptimizeCond(true))
+
+      // TODO: We might have proved that we can skip the last iteration for
+      // this check. In this case, we only want to check the condition on the
+      // pre-last iteration (MaxExitCount - 1). However, there is a nasty
+      // corner case:
+      //
+      //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
+      //
+      // If we could not prove that len != 0, then we also could not prove that
+      // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
+      // OptimizeCond will likely not prove anything for it, even if it could
+      // prove the same fact for len.
+      //
+      // As a temporary solution, we query both last and pre-last iterations in
+      // hope that we will be able to prove triviality for at least one of
+      // them. We can stop querying MaxExitCount for this case once SCEV
+      // understands that (MaxExitCount - 1) will not overflow here.
+      if (OptimizeCond(false, false) || OptimizeCond(true, false))
        Changed = true;
+      else if (SkipLastIter)
+        if (OptimizeCond(false, true) || OptimizeCond(true, true))
+          Changed = true;
      continue;
    }

+    if (MaxExitCount == ExitCount)
+      // If the loop has more than 1 iteration, all further checks will be
+      // executed 1 iteration less.
+      SkipLastIter = true;
+
    // If we know we'd exit on the first iteration, rewrite the exit to
    // reflect this.  This does not imply the loop must exit through this
    // exit; there may be an earlier one taken on the first iteration.
--- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
+++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
@ -2,7 +2,7 @@
 ; RUN: opt -indvars -S < %s | FileCheck %s
 ; RUN: opt -passes=indvars -S < %s | FileCheck %s

-; TODO: should be able to remove the range check basing on the following facts:
+; Check that we are able to remove the range check basing on the following facts:
 ; 0 <= len <= MAX_INT [1];
 ; iv starts from len and goes down stopping at zero and [1], therefore
 ;   0 <= iv <= len [2];
@ -21,8 +21,7 @@ define void @test_predicated_simple_unsigned(i32* %p, i32* %arr) {
 ; CHECK-NEXT:    br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]]
 ; CHECK:       range_check_block:
 ; CHECK-NEXT:    [[IV_NEXT]] = sub i32 [[IV]], 1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]]
 ; CHECK-NEXT:    [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4