[LICM/mustexec] Extend first iteration must execute logic to fcmps

This builds on the work from https://reviews.llvm.org/D44287. It turned out supporting fcmp was much easier than I realized, so let's do that now.

As an aside, our -O3 handling of a floating point IVs leaves a lot to be desired. We do convert the float IV to an integer IV, but do so late enough that many other optimizations are missed (e.g. we don't vectorize).

Differential Revision: https://reviews.llvm.org/D44542

llvm-svn: 327722
This commit is contained in:
Philip Reames 2018-03-16 16:33:49 +00:00
parent a0cd09d4a2
commit 8a106272e8
2 changed files with 37 additions and 10 deletions

View File

@ -1530,20 +1530,19 @@ static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator());
if (!BI || !BI->isConditional())
return false;
// todo: handle fcmp someday
auto *Cond = dyn_cast<CmpInst>(BI->getCondition());
if (!Cond)
return false;
// todo: this would be a lot more powerful if we used scev, but all the
// plumbing is currently missing to pass a pointer in from the pass
auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
if (!ICI)
return false;
// Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
auto *LHS = dyn_cast<PHINode>(ICI->getOperand(0));
auto *RHS = ICI->getOperand(1);
auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0));
auto *RHS = Cond->getOperand(1);
if (!LHS || LHS->getParent() != CurLoop->getHeader())
return false;
auto DL = ExitBlock->getModule()->getDataLayout();
auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
auto *SimpleValOrNull = SimplifyICmpInst(ICI->getPredicate(),
auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(),
IVStart, RHS,
{DL, /*TLI*/ nullptr,
DT, /*AC*/ nullptr, BI});

View File

@ -33,6 +33,34 @@ fail:
ret i32 -1
}
; Same as test1, but with a floating point IR and fcmp
define i32 @test_fcmp(i32* noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: @test_fcmp(
entry:
; CHECK: %i1 = load i32, i32* %a, align 4
; CHECK-NEXT: br label %for.body
br label %for.body
for.body:
%iv = phi float [ 0.0, %entry ], [ %inc, %continue ]
%acc = phi i32 [ 0, %entry ], [ %add, %continue ]
%r.chk = fcmp olt float %iv, 2000.0
br i1 %r.chk, label %continue, label %fail
continue:
%i1 = load i32, i32* %a, align 4
%add = add nsw i32 %i1, %acc
%inc = fadd float %iv, 1.0
%exitcond = fcmp ogt float %inc, 1000.0
br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.cond.cleanup:
ret i32 %add
fail:
call void @f()
ret i32 -1
}
; Count down from a.length w/entry guard
; TODO: currently unable to prove the following:
; ule i32 (add nsw i32 %len, -1), %len where len is [0, 512]