[IRCE] Fix miscompile with range checks against negative values

In the patch rL329547, we have lifted the over-restrictive limitation on collected range checks, allowing to work with range checks with the end of their range not being provably non-negative. However it appeared that the non-negativity of this value was assumed in the utility function `ClampedSubtract`. In particular, its reasoning is based on the fact that `0 <= SINT_MAX - X`, which is not true if `X` is negative. The function `ClampedSubtract` is only called twice, once with `X = 0` (which is OK) and the second time with `X = IRC.getEnd()`, where we may now see the problem if the end is actually a negative value. In this case, we may sometimes miscompile. This patch is the conservative fix of the miscompile problem. Rather than rejecting non-provably non-negative `getEnd()` values, we will check it for non-negativity in runtime. For this, we use function `smax(smin(X, 0), -1) + 1` that is equal to `1` if `X` is non-negative and is equal to 0 if `X` is negative. If we multiply `Begin, End` of safe iteration space by this function calculated for `X = IRC.getEnd()`, we will get the original `[Begin, End)` if `IRC.getEnd()` was non-negative (and, thus, `ClampedSubtract` worked correctly) and the empty range `[0, 0)` in case if ` IRC.getEnd()` was negative. So we in fact prohibit execution of the main loop if at least one of range checks was made against a negative value (and we figured it out in runtime). It is still better than what we have before (non-negativity had to be proved in compile time) and prevents us from miscompile, however it is sometiles too restrictive for unsigned range checks against a negative value (which in fact can be eliminated). Once we re-implement `ClampedSubtract` in a way that it handles negative `X` correctly, this limitation can be lifted, too. Differential Revision: https://reviews.llvm.org/D46860 Reviewed By: samparker llvm-svn: 332809
2018-05-19 13:06:37 +00:00 · 2018-05-19 13:06:37 +00:00 · c0b268f90c
parent a76b64ff80
commit c0b268f90c
2 changed files with 639 additions and 2 deletions
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@ -813,6 +813,13 @@ static bool isKnownNonNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, BoundSCEV, Zero);
 }

+static bool isKnownNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
+                                  ScalarEvolution &SE) {
+  const SCEV *Zero = SE.getZero(BoundSCEV->getType());
+  return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
+         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, BoundSCEV, Zero);
+}
+
 Optional<LoopStructure>
 LoopStructure::parseLoopStructure(ScalarEvolution &SE,
                                  BranchProbabilityInfo *BPI, Loop &L,
@ -1700,6 +1707,10 @@ InductiveRangeCheck::computeSafeIterationSpace(
  // values, depending on type of latch condition that defines IV iteration
  // space.
  auto ClampedSubtract = [&](const SCEV *X, const SCEV *Y) {
+    // FIXME: The current implementation assumes that X is in [0, SINT_MAX].
+    // This is required to ensure that SINT_MAX - X does not overflow signed and
+    // that X - Y does not overflow unsigned if Y is negative. Can we lift this
+    // restriction and make it work for negative X either?
    if (IsLatchSigned) {
      // X is a number from signed range, Y is interpreted as signed.
      // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
@ -1729,8 +1740,34 @@ InductiveRangeCheck::computeSafeIterationSpace(
  };
  const SCEV *M = SE.getMinusSCEV(C, A);
  const SCEV *Zero = SE.getZero(M->getType());
-  const SCEV *Begin = ClampedSubtract(Zero, M);
-  const SCEV *End = ClampedSubtract(getEnd(), M);
+
+  // This function returns SCEV equal to 1 if X is non-negative 0 otherwise.
+  auto SCEVCheckNonNegative = [&](const SCEV *X) {
+    const Loop *L = IndVar->getLoop();
+    const SCEV *One = SE.getOne(X->getType());
+    // Can we trivially prove that X is a non-negative or negative value?
+    if (isKnownNonNegativeInLoop(X, L, SE))
+      return One;
+    else if (isKnownNegativeInLoop(X, L, SE))
+      return Zero;
+    // If not, we will have to figure it out during the execution.
+    // Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0.
+    const SCEV *NegOne = SE.getNegativeSCEV(One);
+    return SE.getAddExpr(SE.getSMaxExpr(SE.getSMinExpr(X, Zero), NegOne), One);
+  };
+  // FIXME: Current implementation of ClampedSubtract implicitly assumes that
+  // X is non-negative (in sense of a signed value). We need to re-implement
+  // this function in a way that it will correctly handle negative X as well.
+  // We use it twice: for X = 0 everything is fine, but for X = getEnd() we can
+  // end up with a negative X and produce wrong results. So currently we ensure
+  // that if getEnd() is negative then both ends of the safe range are zero.
+  // Note that this may pessimize elimination of unsigned range checks against
+  // negative values.
+  const SCEV *REnd = getEnd();
+  const SCEV *EndIsNonNegative = SCEVCheckNonNegative(REnd);
+
+  const SCEV *Begin = SE.getMulExpr(ClampedSubtract(Zero, M), EndIsNonNegative);
+  const SCEV *End = SE.getMulExpr(ClampedSubtract(REnd, M), EndIsNonNegative);
  return InductiveRangeCheck::Range(Begin, End);
 }

--- a/llvm/test/Transforms/IRCE/rc-negative-bound.ll
+++ b/llvm/test/Transforms/IRCE/rc-negative-bound.ll
@ -0,0 +1,600 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes='require<branch-prob>,loop(irce)' -S < %s 2>&1 | FileCheck %s
+
+; CHECK-NOT: irce: in function test_01: constrained Loop
+; CHECK-NOT: irce: in function test_02: constrained Loop
+; CHECK: irce: in function test_03: constrained Loop
+
+; RC against known negative value. We should not do IRCE here.
+define void @test_01(i32 *%arr, i32 %n) {
+; CHECK-LABEL: @test_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], -9
+; CHECK-NEXT:    br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, -9
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; Same as test_01, but the latch condition is unsigned.
+define void @test_02(i32 *%arr, i32 %n) {
+; CHECK-LABEL: @test_02(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], -9
+; CHECK-NEXT:    br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, -9
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; RC against a value which is not known to be non-negative. Here we should
+; expand runtime checks against bound being positive or negative.
+define void @test_03(i32 *%arr, i32 %n, i32 %bound) {
+; CHECK-LABEL: @test_03(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
+; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
+; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader5:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit6:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0
+; CHECK:       in.bounds.postloop:
+; CHECK-NEXT:    [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR_POSTLOOP]]
+; CHECK-NEXT:    [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !1, !irce.loop.clone !6
+;
+
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, %bound
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; RC against a value which is not known to be non-negative. Here we should
+; expand runtime checks against bound being positive or negative.
+define void @test_04(i32 *%arr, i32 %n, i32 %bound) {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
+; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader2:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit3:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0
+; CHECK:       in.bounds.postloop:
+; CHECK-NEXT:    [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR_POSTLOOP]]
+; CHECK-NEXT:    [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !7, !irce.loop.clone !6
+;
+
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, %bound
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; Same as test_01, unsigned range check.
+; FIXME: We could remove the range check here, but it does not happen due to the
+; limintation we posed to fix the miscompile (see comments in the method
+; computeSafeIterationSpace).
+define void @test_05(i32 *%arr, i32 %n) {
+; CHECK-LABEL: @test_05(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], -9
+; CHECK-NEXT:    br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp ult i32 %idx, -9
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; Same as test_02, unsigned range check.
+; FIXME: We could remove the range check here, but it does not happen due to the
+; limintation we posed to fix the miscompile (see comments in the method
+; computeSafeIterationSpace).
+define void @test_06(i32 *%arr, i32 %n) {
+; CHECK-LABEL: @test_06(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], -9
+; CHECK-NEXT:    br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp ult i32 %idx, -9
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; Same as test_03, unsigned range check.
+; FIXME: Currently we remove the check, but we will not execute the main loop if
+; %bound is negative (i.e. in [SINT_MAX + 1, UINT_MAX)). We should be able to
+; safely remove this check (see comments in the method
+; computeSafeIterationSpace).
+define void @test_07(i32 *%arr, i32 %n, i32 %bound) {
+; CHECK-LABEL: @test_07(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
+; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
+; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader5:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit6:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_POSTLOOP]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0
+; CHECK:       in.bounds.postloop:
+; CHECK-NEXT:    [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR_POSTLOOP]]
+; CHECK-NEXT:    [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !8, !irce.loop.clone !6
+;
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp ult i32 %idx, %bound
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+
+; Same as test_04, unsigned range check.
+; FIXME: Currently we remove the check, but we will not execute the main loop if
+; %bound is negative (i.e. in [SINT_MAX + 1, UINT_MAX)). We should be able to
+; safely remove this check (see comments in the method
+; computeSafeIterationSpace).
+define void @test_08(i32 *%arr, i32 %n, i32 %bound) {
+; CHECK-LABEL: @test_08(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
+; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       loop.preheader2:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
+; CHECK:       in.bounds:
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       out.of.bounds.loopexit:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK:       out.of.bounds.loopexit3:
+; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+; CHECK:       out.of.bounds:
+; CHECK-NEXT:    ret void
+; CHECK:       exit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[LOOP_POSTLOOP:%.*]]
+; CHECK:       loop.postloop:
+; CHECK-NEXT:    [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT:    [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_POSTLOOP]], [[BOUND]]
+; CHECK-NEXT:    br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof !0
+; CHECK:       in.bounds.postloop:
+; CHECK-NEXT:    [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT:    store i32 0, i32* [[ADDR_POSTLOOP]]
+; CHECK-NEXT:    [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], [[N]]
+; CHECK-NEXT:    br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !9, !irce.loop.clone !6
+;
+  entry:
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp ult i32 %idx, %bound
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !0
+
+  in.bounds:
+  %addr = getelementptr i32, i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp ult i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+  out.of.bounds:
+  ret void
+
+  exit:
+  ret void
+}
+!0 = !{!"branch_weights", i32 64, i32 4}