[LICM] Ignore exits provably not taken on first iteration when computing must execute

It is common to have conditional exits within a loop which are known not to be taken on some iterations, but not necessarily all. This patches extends our reasoning around guaranteed to execute (used when establishing whether it's safe to dereference a location from the preheader) to handle the case where an exit is known not to be taken on the first iteration and the instruction of interest *is* known to be taken on the first iteration.

This case comes up in two major ways:
* If we have a range check which we've been unable to eliminate, we frequently know that it doesn't fail on the first iteration.
* Pass ordering. We may have a check which will be eliminated through some sequence of other passes, but depending on the exact pass sequence we might never actually do so or we might miss other optimizations from passes run before the check is finally eliminated.

The initial version (here) is implemented via InstSimplify. At the moment, it catches a few cases, but misses a lot too. I added test cases for missing cases in InstSimplify which I'll follow up on separately. Longer term, we should probably wire SCEV through to here to get much smarter loop aware simplification of the first iteration predicate.

Differential Revision: https://reviews.llvm.org/D44287

llvm-svn: 327664
This commit is contained in:
Philip Reames 2018-03-15 21:04:28 +00:00
parent d4254ac1b9
commit a21d5f1e18
3 changed files with 347 additions and 1 deletions

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@ -1515,6 +1516,46 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
/// Return true if we can prove that the given ExitBlock is not reached on the
/// first iteration of the given loop. That is, the backedge of the loop must
/// be executed before the ExitBlock is executed in any dynamic execution trace.
static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
const DominatorTree *DT,
const Loop *CurLoop) {
auto *CondExitBlock = ExitBlock->getSinglePredecessor();
if (!CondExitBlock)
// expect unique exits
return false;
assert(CurLoop->contains(CondExitBlock) && "meaning of exit block");
auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator());
if (!BI || !BI->isConditional())
return false;
// todo: handle fcmp someday
// todo: this would be a lot more powerful if we used scev, but all the
// plumbing is currently missing to pass a pointer in from the pass
auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
if (!ICI)
return false;
// Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
auto *LHS = dyn_cast<PHINode>(ICI->getOperand(0));
auto *RHS = ICI->getOperand(1);
if (!LHS || LHS->getParent() != CurLoop->getHeader())
return false;
auto DL = ExitBlock->getModule()->getDataLayout();
auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
auto *SimpleValOrNull = SimplifyICmpInst(ICI->getPredicate(),
IVStart, RHS,
{DL, /*TLI*/ nullptr,
DT, /*AC*/ nullptr, BI});
auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
if (!SimpleCst)
return false;
if (ExitBlock == BI->getSuccessor(0))
return SimpleCst->isZeroValue();
assert(ExitBlock == BI->getSuccessor(1) && "implied by above");
return SimpleCst->isAllOnesValue();
}
/// Returns true if the instruction in a loop is guaranteed to execute at least
/// once.
bool llvm::isGuaranteedToExecute(const Instruction &Inst,
@ -1537,6 +1578,22 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
if (SafetyInfo->MayThrow)
return false;
// Note: There are two styles of reasoning intermixed below for
// implementation efficiency reasons. They are:
// 1) If we can prove that the instruction dominates all exit blocks, then we
// know the instruction must have executed on *some* iteration before we
// exit. We do not prove *which* iteration the instruction must execute on.
// 2) If we can prove that the instruction dominates the latch and all exits
// which might be taken on the first iteration, we know the instruction must
// execute on the first iteration. This second style allows a conditional
// exit before the instruction of interest which is provably not taken on the
// first iteration. This is a quite common case for range check like
// patterns. TODO: support loops with multiple latches.
const bool InstDominatesLatch =
CurLoop->getLoopLatch() != nullptr &&
DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
// Get the exit blocks for the current loop.
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
@ -1544,7 +1601,9 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
// Verify that the block dominates each of the exit blocks of the loop.
for (BasicBlock *ExitBlock : ExitBlocks)
if (!DT->dominates(Inst.getParent(), ExitBlock))
return false;
if (!InstDominatesLatch ||
!CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
return false;
// As a degenerate case, if the loop is statically infinite then we haven't
// proven anything since there are no exit blocks.

View File

@ -0,0 +1,249 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare void @f() nounwind
; constant fold on first ieration
define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test1(
entry:
; CHECK: %i1 = load i32, i32* %a, align 4
; CHECK-NEXT: br label %for.body
br label %for.body
for.body:
%iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue ]
%r.chk = icmp ult i32 %iv, 2000
br i1 %r.chk, label %continue, label %fail
continue:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; Count down from a.length w/entry guard
; TODO: currently unable to prove the following:
; ule i32 (add nsw i32 %len, -1), %len where len is [0, 512]
define i32 @test2(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test2(
entry:
%len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
%is.non.pos = icmp eq i32 %len, 0
br i1 %is.non.pos, label %fail, label %preheader
preheader:
%lenminusone = add nsw i32 %len, -1
br label %for.body
for.body:
%iv = phi i32 [ %lenminusone, %preheader ], [ %dec, %continue ]
%acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
%r.chk = icmp ule i32 %iv, %len
br i1 %r.chk, label %continue, label %fail
continue:
; CHECK-LABEL: continue
; CHECK: %i1 = load i32, i32* %a, align 4
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%dec = add nsw i32 %iv, -1
%exitcond = icmp eq i32 %dec, 0
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; trivially true for zero
define i32 @test3(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test3(
entry:
%len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
%is.zero = icmp eq i32 %len, 0
br i1 %is.zero, label %fail, label %preheader
preheader:
; CHECK: %i1 = load i32, i32* %a, align 4
; CHECK-NEXT: br label %for.body
br label %for.body
for.body:
%iv = phi i32 [ 0, %preheader ], [ %inc, %continue ]
%acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
%r.chk = icmp ule i32 %iv, %len
br i1 %r.chk, label %continue, label %fail
continue:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; requires fact length is non-zero
; TODO: IsKnownNonNullFromDominatingConditions is currently only be done for
; pointers; should handle integers too
define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test4(
entry:
%len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
%is.zero = icmp eq i32 %len, 0
br i1 %is.zero, label %fail, label %preheader
preheader:
br label %for.body
for.body:
%iv = phi i32 [ 0, %preheader ], [ %inc, %continue ]
%acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
%r.chk = icmp ult i32 %iv, %len
br i1 %r.chk, label %continue, label %fail
continue:
; CHECK-LABEL: continue
; CHECK: %i1 = load i32, i32* %a, align 4
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; variation on test1 with branch swapped
define i32 @test-brswap(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test-brswap(
entry:
; CHECK: %i1 = load i32, i32* %a, align 4
; CHECK-NEXT: br label %for.body
br label %for.body
for.body:
%iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue ]
%r.chk = icmp ugt i32 %iv, 2000
br i1 %r.chk, label %fail, label %continue
continue:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
define i32 @test-nonphi(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test-nonphi(
entry:
br label %for.body
for.body:
; CHECK-LABEL: continue
; CHECK: %i1 = load i32, i32* %a, align 4
%iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue ]
%xor = xor i32 %iv, 72
%r.chk = icmp ugt i32 %xor, 2000
br i1 %r.chk, label %fail, label %continue
continue:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
define i32 @test-wrongphi(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test-wrongphi(
entry:
br label %for.body
for.body:
%iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue ]
br label %dummy_block
dummy_block:
%wrongphi = phi i32 [12, %for.body]
%r.chk = icmp ugt i32 %wrongphi, 2000
br i1 %r.chk, label %fail, label %continue
continue:
; CHECK-LABEL: continue
; CHECK: %i1 = load i32, i32* %a, align 4
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; This works because loop-simplify is run implicitly, but test for it anyways
define i32 @test-multiple-latch(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test-multiple-latch(
entry:
; CHECK: %i1 = load i32, i32* %a, align 4
; CHECK-NEXT: br label %for.body
br label %for.body
for.body:
%iv = phi i32 [ 0, %entry ], [ %inc, %continue1 ], [ %inc, %continue2 ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue1 ], [ %add, %continue2 ]
%r.chk = icmp ult i32 %iv, 2000
br i1 %r.chk, label %continue1, label %fail
continue1:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = add nuw nsw i32 %iv, 1
%cmp = icmp eq i32 %add, 0
br i1 %cmp, label %continue2, label %for.body
continue2:
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}

View File

@ -405,6 +405,44 @@ Out:
}
; Early exit is known not to be taken on first iteration and thus doesn't
; effect whether load is known to execute.
define void @test11(i32 %i) {
Entry:
br label %Loop
; CHECK-LABEL: @test11(
; CHECK: Entry:
; CHECK-NEXT: load i32, i32* @X
; CHECK-NEXT: br label %Loop
Loop: ; preds = %Loop, %0
%j = phi i32 [ 0, %Entry ], [ %Next, %body ] ; <i32> [#uses=1]
%early.test = icmp ult i32 %j, 32
br i1 %early.test, label %body, label %Early
body:
%x = load i32, i32* @X ; <i32> [#uses=1]
%x2 = add i32 %x, 1 ; <i32> [#uses=1]
store i32 %x2, i32* @X
%Next = add i32 %j, 1 ; <i32> [#uses=2]
%cond = icmp eq i32 %Next, 0 ; <i1> [#uses=1]
br i1 %cond, label %Out, label %Loop
Early:
; CHECK: Early:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X
; CHECK-NEXT: ret void
ret void
Out:
ret void
; CHECK: Out:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2
; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X
; CHECK-NEXT: ret void
}
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}