Extend ScalarEvolution's multiple-exit support to compute exact

trip counts in more cases.

Generalize ScalarEvolution's isLoopGuardedByCond code to recognize
And and Or conditions, splitting the code out into an
isNecessaryCond helper function so that it can evaluate Ands and Ors
recursively, and make SCEVExpander be much more aggressive about
hoisting instructions out of loops.

test/CodeGen/X86/pr3495.ll has an additional instruction now, but
it appears to be due to an arbitrary register allocation difference.

llvm-svn: 74048
This commit is contained in:
Dan Gohman 2009-06-24 01:18:18 +00:00
parent 33420090ae
commit f19aeec3f5
6 changed files with 192 additions and 182 deletions

View File

@ -334,6 +334,12 @@ namespace llvm {
/// found.
BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
/// isNecessaryCond - Test whether the given CondValue value is a condition
/// which is at least as strict as the one described by Pred, LHS, and RHS.
bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
bool Inverse);
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
/// in the header of its containing loop, we know the loop executes a
/// constant number of times, and the PHI node is just a recurrence

View File

@ -2813,7 +2813,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
const SCEV* BECount = CouldNotCompute;
const SCEV* MaxBECount = CouldNotCompute;
bool CouldNotComputeBECount = false;
bool CouldNotComputeMaxBECount = false;
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BackedgeTakenInfo NewBTI =
ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
@ -2826,25 +2825,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
} else if (!CouldNotComputeBECount) {
if (BECount == CouldNotCompute)
BECount = NewBTI.Exact;
else {
// TODO: More analysis could be done here. For example, a
// loop with a short-circuiting && operator has an exact count
// of the min of both sides.
CouldNotComputeBECount = true;
BECount = CouldNotCompute;
}
}
if (NewBTI.Max == CouldNotCompute) {
// We couldn't compute an maximum value for this exit, so
// we won't be able to compute an maximum value for the loop.
CouldNotComputeMaxBECount = true;
MaxBECount = CouldNotCompute;
} else if (!CouldNotComputeMaxBECount) {
if (MaxBECount == CouldNotCompute)
MaxBECount = NewBTI.Max;
else
MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
}
if (MaxBECount == CouldNotCompute)
MaxBECount = NewBTI.Max;
else if (NewBTI.Max != CouldNotCompute)
MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
}
return BackedgeTakenInfo(BECount, MaxBECount);
@ -2925,9 +2912,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB) {
// Check if the controlling expression for this loop is an and or or. In
// such cases, an exact backedge-taken count may be infeasible, but a
// maximum count may still be feasible.
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
@ -3899,90 +3884,113 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
LoopEntryPredicate->isUnconditional())
continue;
ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
if (!ICI) continue;
// Now that we found a conditional branch that dominates the loop, check to
// see if it is the comparison we are looking for.
Value *PreCondLHS = ICI->getOperand(0);
Value *PreCondRHS = ICI->getOperand(1);
ICmpInst::Predicate Cond;
if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
Cond = ICI->getPredicate();
else
Cond = ICI->getInversePredicate();
if (Cond == Pred)
; // An exact match.
else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
; // The actual condition is beyond sufficient.
else
// Check a few special cases.
switch (Cond) {
case ICmpInst::ICMP_UGT:
if (Pred == ICmpInst::ICMP_ULT) {
std::swap(PreCondLHS, PreCondRHS);
Cond = ICmpInst::ICMP_ULT;
break;
}
continue;
case ICmpInst::ICMP_SGT:
if (Pred == ICmpInst::ICMP_SLT) {
std::swap(PreCondLHS, PreCondRHS);
Cond = ICmpInst::ICMP_SLT;
break;
}
continue;
case ICmpInst::ICMP_NE:
// Expressions like (x >u 0) are often canonicalized to (x != 0),
// so check for this case by checking if the NE is comparing against
// a minimum or maximum constant.
if (!ICmpInst::isTrueWhenEqual(Pred))
if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
const APInt &A = CI->getValue();
switch (Pred) {
case ICmpInst::ICMP_SLT:
if (A.isMaxSignedValue()) break;
continue;
case ICmpInst::ICMP_SGT:
if (A.isMinSignedValue()) break;
continue;
case ICmpInst::ICMP_ULT:
if (A.isMaxValue()) break;
continue;
case ICmpInst::ICMP_UGT:
if (A.isMinValue()) break;
continue;
default:
continue;
}
Cond = ICmpInst::ICMP_NE;
// NE is symmetric but the original comparison may not be. Swap
// the operands if necessary so that they match below.
if (isa<SCEVConstant>(LHS))
std::swap(PreCondLHS, PreCondRHS);
break;
}
continue;
default:
// We weren't able to reconcile the condition.
continue;
}
if (!PreCondLHS->getType()->isInteger()) continue;
const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS);
const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS);
if ((HasSameValue(LHS, PreCondLHSSCEV) &&
HasSameValue(RHS, PreCondRHSSCEV)) ||
(HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
return true;
}
return false;
}
/// isNecessaryCond - Test whether the given CondValue value is a condition
/// which is at least as strict as the one described by Pred, LHS, and RHS.
bool ScalarEvolution::isNecessaryCond(Value *CondValue,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
bool Inverse) {
// Recursivly handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
} else if (BO->getOpcode() == Instruction::Or) {
if (Inverse)
return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
}
}
ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
if (!ICI) return false;
// Now that we found a conditional branch that dominates the loop, check to
// see if it is the comparison we are looking for.
Value *PreCondLHS = ICI->getOperand(0);
Value *PreCondRHS = ICI->getOperand(1);
ICmpInst::Predicate Cond;
if (Inverse)
Cond = ICI->getInversePredicate();
else
Cond = ICI->getPredicate();
if (Cond == Pred)
; // An exact match.
else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
; // The actual condition is beyond sufficient.
else
// Check a few special cases.
switch (Cond) {
case ICmpInst::ICMP_UGT:
if (Pred == ICmpInst::ICMP_ULT) {
std::swap(PreCondLHS, PreCondRHS);
Cond = ICmpInst::ICMP_ULT;
break;
}
return false;
case ICmpInst::ICMP_SGT:
if (Pred == ICmpInst::ICMP_SLT) {
std::swap(PreCondLHS, PreCondRHS);
Cond = ICmpInst::ICMP_SLT;
break;
}
return false;
case ICmpInst::ICMP_NE:
// Expressions like (x >u 0) are often canonicalized to (x != 0),
// so check for this case by checking if the NE is comparing against
// a minimum or maximum constant.
if (!ICmpInst::isTrueWhenEqual(Pred))
if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
const APInt &A = CI->getValue();
switch (Pred) {
case ICmpInst::ICMP_SLT:
if (A.isMaxSignedValue()) break;
return false;
case ICmpInst::ICMP_SGT:
if (A.isMinSignedValue()) break;
return false;
case ICmpInst::ICMP_ULT:
if (A.isMaxValue()) break;
return false;
case ICmpInst::ICMP_UGT:
if (A.isMinValue()) break;
return false;
default:
return false;
}
Cond = ICmpInst::ICMP_NE;
// NE is symmetric but the original comparison may not be. Swap
// the operands if necessary so that they match below.
if (isa<SCEVConstant>(LHS))
std::swap(PreCondLHS, PreCondRHS);
break;
}
return false;
default:
// We weren't able to reconcile the condition.
return false;
}
if (!PreCondLHS->getType()->isInteger()) return false;
const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
return (HasSameValue(LHS, PreCondLHSSCEV) &&
HasSameValue(RHS, PreCondRHSSCEV)) ||
(HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
}
/// getBECount - Subtract the end and start values and divide by the step,
/// rounding up, to get the number of times the backedge is executed. Return
/// CouldNotCompute if an intermediate computation overflows.

View File

@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
if (Argument *A = dyn_cast<Argument>(V)) {
// Check to see if there is already a cast!
for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
UI != E; ++UI) {
UI != E; ++UI)
if ((*UI)->getType() == Ty)
if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
if (CI->getOpcode() == opcode) {
// If the cast isn't the first instruction of the function, move it.
if (BasicBlock::iterator(CI) !=
A->getParent()->getEntryBlock().begin()) {
// If the CastInst is the insert point, change the insert point.
if (CI == InsertPt) ++InsertPt;
// Splice the cast at the beginning of the entry block.
CI->moveBefore(A->getParent()->getEntryBlock().begin());
// Recreate the cast at the beginning of the entry block.
// The old cast is left in place in case it is being used
// as an insert point.
Instruction *NewCI =
CastInst::Create(opcode, V, Ty, "",
A->getParent()->getEntryBlock().begin());
NewCI->takeName(CI);
CI->replaceAllUsesWith(NewCI);
return NewCI;
}
return CI;
}
}
Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
A->getParent()->getEntryBlock().begin());
InsertedValues.insert(I);
@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
It = cast<InvokeInst>(I)->getNormalDest()->begin();
while (isa<PHINode>(It)) ++It;
if (It != BasicBlock::iterator(CI)) {
// If the CastInst is the insert point, change the insert point.
if (CI == InsertPt) ++InsertPt;
// Splice the cast immediately after the operand in question.
CI->moveBefore(It);
// Recreate the cast at the beginning of the entry block.
// The old cast is left in place in case it is being used
// as an insert point.
Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
NewCI->takeName(CI);
CI->replaceAllUsesWith(NewCI);
return NewCI;
}
return CI;
}
@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
}
}
Value *RestV = expand(Rest);
return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
// Just do a normal add. Pre-expand the operands to suppress folding.
return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
SE.getUnknown(expand(Rest))));
}
// {0,+,1} --> Insert a canonical induction variable into the loop!
@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
getOrInsertCanonicalInductionVariable(L, Ty);
// If this is a simple linear addrec, emit it now as a special case.
if (S->isAffine()) { // {0,+,F} --> i*F
Value *F = expandCodeFor(S->getOperand(1), Ty);
// If the insert point is directly inside of the loop, emit the multiply at
// the insert point. Otherwise, L is a loop that is a parent of the insert
// point loop. If we can, move the multiply to the outer most loop that it
// is safe to be in.
BasicBlock::iterator MulInsertPt = getInsertionPoint();
Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
if (InsertPtLoop != L && InsertPtLoop &&
L->contains(InsertPtLoop->getHeader())) {
do {
// If we cannot hoist the multiply out of this loop, don't.
if (!InsertPtLoop->isLoopInvariant(F)) break;
BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
// If this loop hasn't got a preheader, we aren't able to hoist the
// multiply.
if (!InsertPtLoopPH)
break;
// Otherwise, move the insert point to the preheader.
MulInsertPt = InsertPtLoopPH->getTerminator();
InsertPtLoop = InsertPtLoop->getParentLoop();
} while (InsertPtLoop != L);
}
return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
}
if (S->isAffine()) // {0,+,F} --> i*F
return
expand(SE.getTruncateOrNoop(
SE.getMulExpr(SE.getUnknown(I),
SE.getNoopOrAnyExtend(S->getOperand(1),
I->getType())),
Ty));
// If this is a chain of recurrences, turn it into a closed form, using the
// folders, then expandCodeFor the closed form. This allows the folders to
@ -672,7 +658,30 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (I != InsertedExpressions.end())
return I->second;
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
BasicBlock::iterator InsertPt = getInsertionPoint();
BasicBlock::iterator SaveInsertPt = InsertPt;
for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
L = L->getParentLoop())
if (S->isLoopInvariant(L)) {
if (!L) break;
if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && S->hasComputableLoopEvolution(L))
InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt)) ++InsertPt;
break;
}
setInsertionPoint(InsertPt);
Value *V = visit(S);
setInsertionPoint(SaveInsertPt);
InsertedExpressions[S] = V;
return V;
}
@ -686,6 +695,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
const Type *Ty) {
assert(Ty->isInteger() && "Can only insert integer induction variables!");
const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
SE.getIntegerSCEV(1, Ty), L);
return expand(H);
SE.getIntegerSCEV(1, Ty), L);
BasicBlock::iterator SaveInsertPt = getInsertionPoint();
Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
setInsertionPoint(SaveInsertPt);
return V;
}

View File

@ -104,7 +104,8 @@ namespace {
void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
void RewriteIVExpressions(Loop *L, const Type *LargestType,
SCEVExpander &Rewriter);
SCEVExpander &Rewriter,
BasicBlock::iterator InsertPt);
void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
@ -170,6 +171,8 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
}
// Expand the code for the iteration count into the preheader of the loop.
assert(RHS->isLoopInvariant(L) &&
"Computed iteration count is not loop invariant!");
BasicBlock *Preheader = L->getLoopPreheader();
Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
Preheader->getTerminator());
@ -434,10 +437,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
ExitingBlock, BI, Rewriter);
}
Rewriter.setInsertionPoint(Header->getFirstNonPHI());
BasicBlock::iterator InsertPt = Header->getFirstNonPHI();
// Rewrite IV-derived expressions. Clears the rewriter cache.
RewriteIVExpressions(L, LargestType, Rewriter);
RewriteIVExpressions(L, LargestType, Rewriter, InsertPt);
// The Rewriter may only be used for isInsertedInstruction queries from this
// point on.
@ -462,7 +465,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
}
void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
SCEVExpander &Rewriter) {
SCEVExpander &Rewriter,
BasicBlock::iterator InsertPt) {
SmallVector<WeakVH, 16> DeadInsts;
// Rewrite all induction variable expressions in terms of the canonical
@ -488,29 +492,17 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
// Compute the final addrec to expand into code.
const SCEV* AR = IU->getReplacementExpr(*UI);
Value *NewVal = 0;
if (AR->isLoopInvariant(L)) {
BasicBlock::iterator I = Rewriter.getInsertionPoint();
// Expand loop-invariant values in the loop preheader. They will
// be sunk to the exit block later, if possible.
NewVal =
Rewriter.expandCodeFor(AR, UseTy,
L->getLoopPreheader()->getTerminator());
Rewriter.setInsertionPoint(I);
++NumReplaced;
} else {
// FIXME: It is an extremely bad idea to indvar substitute anything more
// complex than affine induction variables. Doing so will put expensive
// polynomial evaluations inside of the loop, and the str reduction pass
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
if (!Stride->isLoopInvariant(L))
continue;
// FIXME: It is an extremely bad idea to indvar substitute anything more
// complex than affine induction variables. Doing so will put expensive
// polynomial evaluations inside of the loop, and the str reduction pass
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
continue;
// Now expand it into actual Instructions and patch it into place.
NewVal = Rewriter.expandCodeFor(AR, UseTy);
}
// Now expand it into actual Instructions and patch it into place.
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
// Patch the new value into place.
if (Op->hasName())

View File

@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
const SCEV* NewValSCEV = SE->getUnknown(Base);
// If there is no immediate value, skip the next part.
if (!Imm->isZero()) {
// If we are inserting the base and imm values in the same block, make sure
// to adjust the IP position if insertion reused a result.
if (IP == BaseInsertPt)
IP = Rewriter.getInsertionPoint();
// Always emit the immediate (if non-zero) into the same block as the user.
NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
}
// Always emit the immediate into the same block as the user.
NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
}

View File

@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
; PR3495
; The loop reversal kicks in once here, resulting in one fewer instruction.