forked from OSchip/llvm-project
Re-implement the main strength-reduction portion of LoopStrengthReduction.
This new version is much more aggressive about doing "full" reduction in cases where it reduces register pressure, and also more aggressive about rewriting induction variables to count down (or up) to zero when doing so reduces register pressure. It currently uses fairly simplistic algorithms for finding reuse opportunities, but it introduces a new framework allows it to combine multiple strategies at once to form hybrid solutions, instead of doing all full-reduction or all base+index. llvm-svn: 94061
This commit is contained in:
parent
626aba43d0
commit
51ad99d2c5
|
@ -26,19 +26,44 @@ namespace llvm {
|
||||||
/// Clients should create an instance of this class when rewriting is needed,
|
/// Clients should create an instance of this class when rewriting is needed,
|
||||||
/// and destroy it when finished to allow the release of the associated
|
/// and destroy it when finished to allow the release of the associated
|
||||||
/// memory.
|
/// memory.
|
||||||
struct SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
|
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
|
||||||
ScalarEvolution &SE;
|
ScalarEvolution &SE;
|
||||||
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
|
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
|
||||||
InsertedExpressions;
|
InsertedExpressions;
|
||||||
std::set<Value*> InsertedValues;
|
std::set<Value*> InsertedValues;
|
||||||
|
|
||||||
|
/// PostIncLoop - When non-null, expanded addrecs referring to the given
|
||||||
|
/// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in
|
||||||
|
/// post-inc mode returns the add instruction that adds one to the phi
|
||||||
|
/// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only
|
||||||
|
/// supported in non-canonical mode.
|
||||||
|
const Loop *PostIncLoop;
|
||||||
|
|
||||||
|
/// IVIncInsertPos - When this is non-null, addrecs expanded in the
|
||||||
|
/// loop it indicates should be inserted with increments at
|
||||||
|
/// IVIncInsertPos.
|
||||||
|
const Loop *IVIncInsertLoop;
|
||||||
|
|
||||||
|
/// IVIncInsertPos - When expanding addrecs in the IVIncInsertLoop loop,
|
||||||
|
/// insert the IV increment at this position.
|
||||||
|
Instruction *IVIncInsertPos;
|
||||||
|
|
||||||
|
/// CanonicalMode - When true, expressions are expanded in "canonical"
|
||||||
|
/// form. In particular, addrecs are expanded as arithmetic based on
|
||||||
|
/// a canonical induction variable. When false, expression are expanded
|
||||||
|
/// in a more literal form.
|
||||||
|
bool CanonicalMode;
|
||||||
|
|
||||||
|
protected:
|
||||||
typedef IRBuilder<true, TargetFolder> BuilderType;
|
typedef IRBuilder<true, TargetFolder> BuilderType;
|
||||||
BuilderType Builder;
|
BuilderType Builder;
|
||||||
|
|
||||||
friend struct SCEVVisitor<SCEVExpander, Value*>;
|
friend struct SCEVVisitor<SCEVExpander, Value*>;
|
||||||
public:
|
public:
|
||||||
|
/// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
|
||||||
explicit SCEVExpander(ScalarEvolution &se)
|
explicit SCEVExpander(ScalarEvolution &se)
|
||||||
: SE(se), Builder(se.getContext(), TargetFolder(se.TD)) {}
|
: SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true),
|
||||||
|
Builder(se.getContext(), TargetFolder(se.TD)) {}
|
||||||
|
|
||||||
/// clear - Erase the contents of the InsertedExpressions map so that users
|
/// clear - Erase the contents of the InsertedExpressions map so that users
|
||||||
/// trying to expand the same expression into multiple BasicBlocks or
|
/// trying to expand the same expression into multiple BasicBlocks or
|
||||||
|
@ -54,11 +79,36 @@ namespace llvm {
|
||||||
/// expandCodeFor - Insert code to directly compute the specified SCEV
|
/// expandCodeFor - Insert code to directly compute the specified SCEV
|
||||||
/// expression into the program. The inserted code is inserted into the
|
/// expression into the program. The inserted code is inserted into the
|
||||||
/// specified block.
|
/// specified block.
|
||||||
Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *IP) {
|
Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I) {
|
||||||
|
BasicBlock::iterator IP = I;
|
||||||
|
while (isInsertedInstruction(IP)) ++IP;
|
||||||
Builder.SetInsertPoint(IP->getParent(), IP);
|
Builder.SetInsertPoint(IP->getParent(), IP);
|
||||||
return expandCodeFor(SH, Ty);
|
return expandCodeFor(SH, Ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// setIVIncInsertPos - Set the current IV increment loop and position.
|
||||||
|
void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
|
||||||
|
assert(!CanonicalMode &&
|
||||||
|
"IV increment positions are not supported in CanonicalMode");
|
||||||
|
IVIncInsertLoop = L;
|
||||||
|
IVIncInsertPos = Pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// setPostInc - If L is non-null, enable post-inc expansion for addrecs
|
||||||
|
/// referring to the given loop. If L is null, disable post-inc expansion
|
||||||
|
/// completely. Post-inc expansion is only supported in non-canonical
|
||||||
|
/// mode.
|
||||||
|
void setPostInc(const Loop *L) {
|
||||||
|
assert(!CanonicalMode &&
|
||||||
|
"Post-inc expansion is not supported in CanonicalMode");
|
||||||
|
PostIncLoop = L;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// disableCanonicalMode - Disable the behavior of expanding expressions in
|
||||||
|
/// canonical form rather than in a more literal form. Non-canonical mode
|
||||||
|
/// is useful for late optimization passes.
|
||||||
|
void disableCanonicalMode() { CanonicalMode = false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LLVMContext &getContext() const { return SE.getContext(); }
|
LLVMContext &getContext() const { return SE.getContext(); }
|
||||||
|
|
||||||
|
@ -121,6 +171,16 @@ namespace llvm {
|
||||||
Value *visitUnknown(const SCEVUnknown *S) {
|
Value *visitUnknown(const SCEVUnknown *S) {
|
||||||
return S->getValue();
|
return S->getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void rememberInstruction(Value *I) {
|
||||||
|
if (!PostIncLoop) InsertedValues.insert(I);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
|
||||||
|
PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||||
|
const Loop *L,
|
||||||
|
const Type *ExpandTy,
|
||||||
|
const Type *IntTy);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -324,12 +324,6 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
|
||||||
// the actual replacement value.
|
// the actual replacement value.
|
||||||
if (U.isUseOfPostIncrementedValue())
|
if (U.isUseOfPostIncrementedValue())
|
||||||
RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
|
RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
|
||||||
// Evaluate the expression out of the loop, if possible.
|
|
||||||
if (!L->contains(U.getUser())) {
|
|
||||||
const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
|
|
||||||
if (ExitVal->isLoopInvariant(L))
|
|
||||||
RetVal = ExitVal;
|
|
||||||
}
|
|
||||||
return RetVal;
|
return RetVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1089,6 +1089,15 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
|
||||||
if (!isa<SCEVSignExtendExpr>(SExt))
|
if (!isa<SCEVSignExtendExpr>(SExt))
|
||||||
return SExt;
|
return SExt;
|
||||||
|
|
||||||
|
// Force the cast to be folded into the operands of an addrec.
|
||||||
|
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
|
||||||
|
SmallVector<const SCEV *, 4> Ops;
|
||||||
|
for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
|
||||||
|
I != E; ++I)
|
||||||
|
Ops.push_back(getAnyExtendExpr(*I, Ty));
|
||||||
|
return getAddRecExpr(Ops, AR->getLoop());
|
||||||
|
}
|
||||||
|
|
||||||
// If the expression is obviously signed, use the sext cast value.
|
// If the expression is obviously signed, use the sext cast value.
|
||||||
if (isa<SCEVSMaxExpr>(Op))
|
if (isa<SCEVSMaxExpr>(Op))
|
||||||
return SExt;
|
return SExt;
|
||||||
|
@ -1204,6 +1213,17 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
"SCEVAddExpr operand types don't match!");
|
"SCEVAddExpr operand types don't match!");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
|
||||||
|
if (!HasNUW && HasNSW) {
|
||||||
|
bool All = true;
|
||||||
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
||||||
|
if (!isKnownNonNegative(Ops[i])) {
|
||||||
|
All = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (All) HasNUW = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Sort by complexity, this groups all similar expression types together.
|
// Sort by complexity, this groups all similar expression types together.
|
||||||
GroupByComplexity(Ops, LI);
|
GroupByComplexity(Ops, LI);
|
||||||
|
|
||||||
|
@ -1521,10 +1541,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
||||||
ID.AddPointer(Ops[i]);
|
ID.AddPointer(Ops[i]);
|
||||||
void *IP = 0;
|
void *IP = 0;
|
||||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
|
SCEVAddExpr *S =
|
||||||
SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>();
|
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
|
||||||
|
if (!S) {
|
||||||
|
S = SCEVAllocator.Allocate<SCEVAddExpr>();
|
||||||
new (S) SCEVAddExpr(ID, Ops);
|
new (S) SCEVAddExpr(ID, Ops);
|
||||||
UniqueSCEVs.InsertNode(S, IP);
|
UniqueSCEVs.InsertNode(S, IP);
|
||||||
|
}
|
||||||
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
||||||
if (HasNSW) S->setHasNoSignedWrap(true);
|
if (HasNSW) S->setHasNoSignedWrap(true);
|
||||||
return S;
|
return S;
|
||||||
|
@ -1535,6 +1558,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
bool HasNUW, bool HasNSW) {
|
bool HasNUW, bool HasNSW) {
|
||||||
assert(!Ops.empty() && "Cannot get empty mul!");
|
assert(!Ops.empty() && "Cannot get empty mul!");
|
||||||
|
if (Ops.size() == 1) return Ops[0];
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
|
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
|
||||||
assert(getEffectiveSCEVType(Ops[i]->getType()) ==
|
assert(getEffectiveSCEVType(Ops[i]->getType()) ==
|
||||||
|
@ -1542,6 +1566,17 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
"SCEVMulExpr operand types don't match!");
|
"SCEVMulExpr operand types don't match!");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
|
||||||
|
if (!HasNUW && HasNSW) {
|
||||||
|
bool All = true;
|
||||||
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
||||||
|
if (!isKnownNonNegative(Ops[i])) {
|
||||||
|
All = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (All) HasNUW = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Sort by complexity, this groups all similar expression types together.
|
// Sort by complexity, this groups all similar expression types together.
|
||||||
GroupByComplexity(Ops, LI);
|
GroupByComplexity(Ops, LI);
|
||||||
|
|
||||||
|
@ -1576,6 +1611,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
|
} else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
|
||||||
// If we have a multiply of zero, it will always be zero.
|
// If we have a multiply of zero, it will always be zero.
|
||||||
return Ops[0];
|
return Ops[0];
|
||||||
|
} else if (Ops[0]->isAllOnesValue()) {
|
||||||
|
// If we have a mul by -1 of an add, try distributing the -1 among the
|
||||||
|
// add operands.
|
||||||
|
if (Ops.size() == 2)
|
||||||
|
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
|
||||||
|
SmallVector<const SCEV *, 4> NewOps;
|
||||||
|
bool AnyFolded = false;
|
||||||
|
for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
|
||||||
|
I != E; ++I) {
|
||||||
|
const SCEV *Mul = getMulExpr(Ops[0], *I);
|
||||||
|
if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
|
||||||
|
NewOps.push_back(Mul);
|
||||||
|
}
|
||||||
|
if (AnyFolded)
|
||||||
|
return getAddExpr(NewOps);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1642,7 +1693,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
|
|
||||||
// It's tempting to propagate the NSW flag here, but nsw multiplication
|
// It's tempting to propagate the NSW flag here, but nsw multiplication
|
||||||
// is not associative so this isn't necessarily safe.
|
// is not associative so this isn't necessarily safe.
|
||||||
const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
|
const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
|
||||||
|
HasNUW && AddRec->hasNoUnsignedWrap(),
|
||||||
|
/*HasNSW=*/false);
|
||||||
|
|
||||||
// If all of the other operands were loop invariant, we are done.
|
// If all of the other operands were loop invariant, we are done.
|
||||||
if (Ops.size() == 1) return NewRec;
|
if (Ops.size() == 1) return NewRec;
|
||||||
|
@ -1696,10 +1749,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||||
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
||||||
ID.AddPointer(Ops[i]);
|
ID.AddPointer(Ops[i]);
|
||||||
void *IP = 0;
|
void *IP = 0;
|
||||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
|
SCEVMulExpr *S =
|
||||||
SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>();
|
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
|
||||||
|
if (!S) {
|
||||||
|
S = SCEVAllocator.Allocate<SCEVMulExpr>();
|
||||||
new (S) SCEVMulExpr(ID, Ops);
|
new (S) SCEVMulExpr(ID, Ops);
|
||||||
UniqueSCEVs.InsertNode(S, IP);
|
UniqueSCEVs.InsertNode(S, IP);
|
||||||
|
}
|
||||||
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
||||||
if (HasNSW) S->setHasNoSignedWrap(true);
|
if (HasNSW) S->setHasNoSignedWrap(true);
|
||||||
return S;
|
return S;
|
||||||
|
@ -1842,10 +1898,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
|
||||||
return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X
|
return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
|
||||||
|
if (!HasNUW && HasNSW) {
|
||||||
|
bool All = true;
|
||||||
|
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
|
||||||
|
if (!isKnownNonNegative(Operands[i])) {
|
||||||
|
All = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (All) HasNUW = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
|
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
|
||||||
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
|
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
|
||||||
const Loop *NestedLoop = NestedAR->getLoop();
|
const Loop *NestedLoop = NestedAR->getLoop();
|
||||||
if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
|
if (L->contains(NestedLoop->getHeader()) ?
|
||||||
|
(L->getLoopDepth() < NestedLoop->getLoopDepth()) :
|
||||||
|
(!NestedLoop->contains(L->getHeader()) &&
|
||||||
|
DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
|
||||||
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
|
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
|
||||||
NestedAR->op_end());
|
NestedAR->op_end());
|
||||||
Operands[0] = NestedAR->getStart();
|
Operands[0] = NestedAR->getStart();
|
||||||
|
@ -1884,10 +1954,13 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
|
||||||
ID.AddPointer(Operands[i]);
|
ID.AddPointer(Operands[i]);
|
||||||
ID.AddPointer(L);
|
ID.AddPointer(L);
|
||||||
void *IP = 0;
|
void *IP = 0;
|
||||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
|
SCEVAddRecExpr *S =
|
||||||
SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
|
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
|
||||||
|
if (!S) {
|
||||||
|
S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
|
||||||
new (S) SCEVAddRecExpr(ID, Operands, L);
|
new (S) SCEVAddRecExpr(ID, Operands, L);
|
||||||
UniqueSCEVs.InsertNode(S, IP);
|
UniqueSCEVs.InsertNode(S, IP);
|
||||||
|
}
|
||||||
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
if (HasNUW) S->setHasNoUnsignedWrap(true);
|
||||||
if (HasNSW) S->setHasNoSignedWrap(true);
|
if (HasNSW) S->setHasNoSignedWrap(true);
|
||||||
return S;
|
return S;
|
||||||
|
@ -2525,31 +2598,28 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
|
||||||
if (Accum->isLoopInvariant(L) ||
|
if (Accum->isLoopInvariant(L) ||
|
||||||
(isa<SCEVAddRecExpr>(Accum) &&
|
(isa<SCEVAddRecExpr>(Accum) &&
|
||||||
cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
|
cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
|
||||||
|
bool HasNUW = false;
|
||||||
|
bool HasNSW = false;
|
||||||
|
|
||||||
|
// If the increment doesn't overflow, then neither the addrec nor
|
||||||
|
// the post-increment will overflow.
|
||||||
|
if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
|
||||||
|
if (OBO->hasNoUnsignedWrap())
|
||||||
|
HasNUW = true;
|
||||||
|
if (OBO->hasNoSignedWrap())
|
||||||
|
HasNSW = true;
|
||||||
|
}
|
||||||
|
|
||||||
const SCEV *StartVal =
|
const SCEV *StartVal =
|
||||||
getSCEV(PN->getIncomingValue(IncomingEdge));
|
getSCEV(PN->getIncomingValue(IncomingEdge));
|
||||||
const SCEVAddRecExpr *PHISCEV =
|
const SCEV *PHISCEV =
|
||||||
cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
|
getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
|
||||||
|
|
||||||
// If the increment doesn't overflow, then neither the addrec nor the
|
// Since the no-wrap flags are on the increment, they apply to the
|
||||||
// post-increment will overflow.
|
// post-incremented value as well.
|
||||||
if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
|
if (Accum->isLoopInvariant(L))
|
||||||
if (OBO->getOperand(0) == PN &&
|
(void)getAddRecExpr(getAddExpr(StartVal, Accum),
|
||||||
getSCEV(OBO->getOperand(1)) ==
|
Accum, L, HasNUW, HasNSW);
|
||||||
PHISCEV->getStepRecurrence(*this)) {
|
|
||||||
const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
|
|
||||||
if (OBO->hasNoUnsignedWrap()) {
|
|
||||||
const_cast<SCEVAddRecExpr *>(PHISCEV)
|
|
||||||
->setHasNoUnsignedWrap(true);
|
|
||||||
const_cast<SCEVAddRecExpr *>(PostInc)
|
|
||||||
->setHasNoUnsignedWrap(true);
|
|
||||||
}
|
|
||||||
if (OBO->hasNoSignedWrap()) {
|
|
||||||
const_cast<SCEVAddRecExpr *>(PHISCEV)
|
|
||||||
->setHasNoSignedWrap(true);
|
|
||||||
const_cast<SCEVAddRecExpr *>(PostInc)
|
|
||||||
->setHasNoSignedWrap(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Okay, for the entire analysis of this edge we assumed the PHI
|
// Okay, for the entire analysis of this edge we assumed the PHI
|
||||||
// to be symbolic. We now need to go back and purge all of the
|
// to be symbolic. We now need to go back and purge all of the
|
||||||
|
@ -2781,26 +2851,29 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
|
||||||
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
|
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||||
const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
|
const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
|
||||||
const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
|
const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
|
||||||
if (!Trip) return FullSet;
|
ConstantRange ConservativeResult = FullSet;
|
||||||
|
|
||||||
|
// If there's no unsigned wrap, the value will never be less than its
|
||||||
|
// initial value.
|
||||||
|
if (AddRec->hasNoUnsignedWrap())
|
||||||
|
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
|
||||||
|
ConservativeResult =
|
||||||
|
ConstantRange(C->getValue()->getValue(),
|
||||||
|
APInt(getTypeSizeInBits(C->getType()), 0));
|
||||||
|
|
||||||
// TODO: non-affine addrec
|
// TODO: non-affine addrec
|
||||||
if (AddRec->isAffine()) {
|
if (Trip && AddRec->isAffine()) {
|
||||||
const Type *Ty = AddRec->getType();
|
const Type *Ty = AddRec->getType();
|
||||||
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
|
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
|
||||||
if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
|
if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
|
||||||
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
|
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
|
||||||
|
|
||||||
const SCEV *Start = AddRec->getStart();
|
const SCEV *Start = AddRec->getStart();
|
||||||
const SCEV *Step = AddRec->getStepRecurrence(*this);
|
|
||||||
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
|
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
|
||||||
|
|
||||||
// Check for overflow.
|
// Check for overflow.
|
||||||
// TODO: This is very conservative.
|
if (!AddRec->hasNoUnsignedWrap())
|
||||||
if (!(Step->isOne() &&
|
return ConservativeResult;
|
||||||
isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) &&
|
|
||||||
!(Step->isAllOnesValue() &&
|
|
||||||
isKnownPredicate(ICmpInst::ICMP_UGT, Start, End)))
|
|
||||||
return FullSet;
|
|
||||||
|
|
||||||
ConstantRange StartRange = getUnsignedRange(Start);
|
ConstantRange StartRange = getUnsignedRange(Start);
|
||||||
ConstantRange EndRange = getUnsignedRange(End);
|
ConstantRange EndRange = getUnsignedRange(End);
|
||||||
|
@ -2809,10 +2882,12 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
|
||||||
APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
|
APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
|
||||||
EndRange.getUnsignedMax());
|
EndRange.getUnsignedMax());
|
||||||
if (Min.isMinValue() && Max.isMaxValue())
|
if (Min.isMinValue() && Max.isMaxValue())
|
||||||
return FullSet;
|
return ConservativeResult;
|
||||||
return ConstantRange(Min, Max+1);
|
return ConstantRange(Min, Max+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return ConservativeResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
|
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
|
||||||
|
@ -2891,26 +2966,39 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
|
||||||
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
|
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||||
const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
|
const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
|
||||||
const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
|
const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
|
||||||
if (!Trip) return FullSet;
|
ConstantRange ConservativeResult = FullSet;
|
||||||
|
|
||||||
|
// If there's no signed wrap, and all the operands have the same sign or
|
||||||
|
// zero, the value won't ever change sign.
|
||||||
|
if (AddRec->hasNoSignedWrap()) {
|
||||||
|
bool AllNonNeg = true;
|
||||||
|
bool AllNonPos = true;
|
||||||
|
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
|
||||||
|
if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
|
||||||
|
if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
|
||||||
|
}
|
||||||
|
unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
|
||||||
|
if (AllNonNeg)
|
||||||
|
ConservativeResult = ConstantRange(APInt(BitWidth, 0),
|
||||||
|
APInt::getSignedMinValue(BitWidth));
|
||||||
|
else if (AllNonPos)
|
||||||
|
ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth),
|
||||||
|
APInt(BitWidth, 1));
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: non-affine addrec
|
// TODO: non-affine addrec
|
||||||
if (AddRec->isAffine()) {
|
if (Trip && AddRec->isAffine()) {
|
||||||
const Type *Ty = AddRec->getType();
|
const Type *Ty = AddRec->getType();
|
||||||
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
|
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
|
||||||
if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
|
if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
|
||||||
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
|
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
|
||||||
|
|
||||||
const SCEV *Start = AddRec->getStart();
|
const SCEV *Start = AddRec->getStart();
|
||||||
const SCEV *Step = AddRec->getStepRecurrence(*this);
|
|
||||||
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
|
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
|
||||||
|
|
||||||
// Check for overflow.
|
// Check for overflow.
|
||||||
// TODO: This is very conservative.
|
if (!AddRec->hasNoSignedWrap())
|
||||||
if (!(Step->isOne() &&
|
return ConservativeResult;
|
||||||
isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
|
|
||||||
!(Step->isAllOnesValue() &&
|
|
||||||
isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
|
|
||||||
return FullSet;
|
|
||||||
|
|
||||||
ConstantRange StartRange = getSignedRange(Start);
|
ConstantRange StartRange = getSignedRange(Start);
|
||||||
ConstantRange EndRange = getSignedRange(End);
|
ConstantRange EndRange = getSignedRange(End);
|
||||||
|
@ -2919,15 +3007,19 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
|
||||||
APInt Max = APIntOps::smax(StartRange.getSignedMax(),
|
APInt Max = APIntOps::smax(StartRange.getSignedMax(),
|
||||||
EndRange.getSignedMax());
|
EndRange.getSignedMax());
|
||||||
if (Min.isMinSignedValue() && Max.isMaxSignedValue())
|
if (Min.isMinSignedValue() && Max.isMaxSignedValue())
|
||||||
return FullSet;
|
return ConservativeResult;
|
||||||
return ConstantRange(Min, Max+1);
|
return ConstantRange(Min, Max+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return ConservativeResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
|
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
|
||||||
// For a SCEVUnknown, ask ValueTracking.
|
// For a SCEVUnknown, ask ValueTracking.
|
||||||
unsigned BitWidth = getTypeSizeInBits(U->getType());
|
unsigned BitWidth = getTypeSizeInBits(U->getType());
|
||||||
|
if (!U->getValue()->getType()->isInteger() && !TD)
|
||||||
|
return FullSet;
|
||||||
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
|
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
|
||||||
if (NS == 1)
|
if (NS == 1)
|
||||||
return FullSet;
|
return FullSet;
|
||||||
|
|
|
@ -81,7 +81,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
|
||||||
|
|
||||||
Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
|
Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
|
||||||
A->getParent()->getEntryBlock().begin());
|
A->getParent()->getEntryBlock().begin());
|
||||||
InsertedValues.insert(I);
|
rememberInstruction(I);
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
|
||||||
IP = II->getNormalDest()->begin();
|
IP = II->getNormalDest()->begin();
|
||||||
while (isa<PHINode>(IP)) ++IP;
|
while (isa<PHINode>(IP)) ++IP;
|
||||||
Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
|
Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
|
||||||
InsertedValues.insert(CI);
|
rememberInstruction(CI);
|
||||||
return CI;
|
return CI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,7 +144,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
||||||
|
|
||||||
// If we haven't found this binop, insert it.
|
// If we haven't found this binop, insert it.
|
||||||
Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
|
Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
|
||||||
InsertedValues.insert(BO);
|
rememberInstruction(BO);
|
||||||
return BO;
|
return BO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -491,22 +491,39 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
|
||||||
|
|
||||||
// Emit a GEP.
|
// Emit a GEP.
|
||||||
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
|
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
|
||||||
InsertedValues.insert(GEP);
|
rememberInstruction(GEP);
|
||||||
return GEP;
|
return GEP;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
|
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
|
||||||
// because ScalarEvolution may have changed the address arithmetic to
|
// because ScalarEvolution may have changed the address arithmetic to
|
||||||
// compute a value which is beyond the end of the allocated object.
|
// compute a value which is beyond the end of the allocated object.
|
||||||
Value *GEP = Builder.CreateGEP(V,
|
Value *Casted = V;
|
||||||
|
if (V->getType() != PTy)
|
||||||
|
Casted = InsertNoopCastOfTo(Casted, PTy);
|
||||||
|
Value *GEP = Builder.CreateGEP(Casted,
|
||||||
GepIndices.begin(),
|
GepIndices.begin(),
|
||||||
GepIndices.end(),
|
GepIndices.end(),
|
||||||
"scevgep");
|
"scevgep");
|
||||||
Ops.push_back(SE.getUnknown(GEP));
|
Ops.push_back(SE.getUnknown(GEP));
|
||||||
InsertedValues.insert(GEP);
|
rememberInstruction(GEP);
|
||||||
return expand(SE.getAddExpr(Ops));
|
return expand(SE.getAddExpr(Ops));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isNonConstantNegative - Return true if the specified scev is negated, but
|
||||||
|
/// not a constant.
|
||||||
|
static bool isNonConstantNegative(const SCEV *F) {
|
||||||
|
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
|
||||||
|
if (!Mul) return false;
|
||||||
|
|
||||||
|
// If there is a constant factor, it will be first.
|
||||||
|
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
|
||||||
|
if (!SC) return false;
|
||||||
|
|
||||||
|
// Return true if the value is negative, this matches things like (-42 * V).
|
||||||
|
return SC->getValue()->getValue().isNegative();
|
||||||
|
}
|
||||||
|
|
||||||
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
|
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
|
||||||
int NumOperands = S->getNumOperands();
|
int NumOperands = S->getNumOperands();
|
||||||
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
|
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
|
||||||
|
@ -539,9 +556,15 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
|
||||||
// Emit a bunch of add instructions
|
// Emit a bunch of add instructions
|
||||||
for (int i = NumOperands-1; i >= 0; --i) {
|
for (int i = NumOperands-1; i >= 0; --i) {
|
||||||
if (i == PIdx) continue;
|
if (i == PIdx) continue;
|
||||||
Value *W = expandCodeFor(S->getOperand(i), Ty);
|
const SCEV *Op = S->getOperand(i);
|
||||||
|
if (isNonConstantNegative(Op)) {
|
||||||
|
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
|
||||||
|
V = InsertBinop(Instruction::Sub, V, W);
|
||||||
|
} else {
|
||||||
|
Value *W = expandCodeFor(Op, Ty);
|
||||||
V = InsertBinop(Instruction::Add, V, W);
|
V = InsertBinop(Instruction::Add, V, W);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -603,7 +626,175 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
|
||||||
|
/// the base addrec, which is the addrec without any non-loop-dominating
|
||||||
|
/// values, and return the PHI.
|
||||||
|
PHINode *
|
||||||
|
SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||||
|
const Loop *L,
|
||||||
|
const Type *ExpandTy,
|
||||||
|
const Type *IntTy) {
|
||||||
|
// Reuse a previously-inserted PHI, if present.
|
||||||
|
for (BasicBlock::iterator I = L->getHeader()->begin();
|
||||||
|
PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||||
|
if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
|
||||||
|
return PN;
|
||||||
|
|
||||||
|
// Save the original insertion point so we can restore it when we're done.
|
||||||
|
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
|
||||||
|
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
|
||||||
|
|
||||||
|
// Expand code for the start value.
|
||||||
|
Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
|
||||||
|
L->getHeader()->begin());
|
||||||
|
|
||||||
|
// Expand code for the step value. Insert instructions right before the
|
||||||
|
// terminator corresponding to the back-edge. Do this before creating the PHI
|
||||||
|
// so that PHI reuse code doesn't see an incomplete PHI. If the stride is
|
||||||
|
// negative, insert a sub instead of an add for the increment (unless it's a
|
||||||
|
// constant, because subtracts of constants are canonicalized to adds).
|
||||||
|
const SCEV *Step = Normalized->getStepRecurrence(SE);
|
||||||
|
bool isPointer = isa<PointerType>(ExpandTy);
|
||||||
|
bool isNegative = !isPointer && isNonConstantNegative(Step);
|
||||||
|
if (isNegative)
|
||||||
|
Step = SE.getNegativeSCEV(Step);
|
||||||
|
Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
|
||||||
|
|
||||||
|
// Create the PHI.
|
||||||
|
Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin());
|
||||||
|
PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv");
|
||||||
|
rememberInstruction(PN);
|
||||||
|
|
||||||
|
// Create the step instructions and populate the PHI.
|
||||||
|
BasicBlock *Header = L->getHeader();
|
||||||
|
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
|
||||||
|
HPI != HPE; ++HPI) {
|
||||||
|
BasicBlock *Pred = *HPI;
|
||||||
|
|
||||||
|
// Add a start value.
|
||||||
|
if (!L->contains(Pred)) {
|
||||||
|
PN->addIncoming(StartV, Pred);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a step value and add it to the PHI. If IVIncInsertLoop is
|
||||||
|
// non-null and equal to the addrec's loop, insert the instructions
|
||||||
|
// at IVIncInsertPos.
|
||||||
|
Instruction *InsertPos = L == IVIncInsertLoop ?
|
||||||
|
IVIncInsertPos : Pred->getTerminator();
|
||||||
|
Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
|
||||||
|
Value *IncV;
|
||||||
|
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
|
||||||
|
if (isPointer) {
|
||||||
|
const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
|
||||||
|
// If the step isn't constant, don't use an implicitly scaled GEP, because
|
||||||
|
// that would require a multiply inside the loop.
|
||||||
|
if (!isa<ConstantInt>(StepV))
|
||||||
|
GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
|
||||||
|
GEPPtrTy->getAddressSpace());
|
||||||
|
const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
|
||||||
|
IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
|
||||||
|
if (IncV->getType() != PN->getType()) {
|
||||||
|
IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
|
||||||
|
rememberInstruction(IncV);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
IncV = isNegative ?
|
||||||
|
Builder.CreateSub(PN, StepV, "lsr.iv.next") :
|
||||||
|
Builder.CreateAdd(PN, StepV, "lsr.iv.next");
|
||||||
|
rememberInstruction(IncV);
|
||||||
|
}
|
||||||
|
PN->addIncoming(IncV, Pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore the original insert point.
|
||||||
|
if (SaveInsertBB)
|
||||||
|
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
|
||||||
|
|
||||||
|
// Remember this PHI, even in post-inc mode.
|
||||||
|
InsertedValues.insert(PN);
|
||||||
|
|
||||||
|
return PN;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
|
||||||
|
const Type *STy = S->getType();
|
||||||
|
const Type *IntTy = SE.getEffectiveSCEVType(STy);
|
||||||
|
const Loop *L = S->getLoop();
|
||||||
|
|
||||||
|
// Determine a normalized form of this expression, which is the expression
|
||||||
|
// before any post-inc adjustment is made.
|
||||||
|
const SCEVAddRecExpr *Normalized = S;
|
||||||
|
if (L == PostIncLoop) {
|
||||||
|
const SCEV *Step = S->getStepRecurrence(SE);
|
||||||
|
Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip off any non-loop-dominating component from the addrec start.
|
||||||
|
const SCEV *Start = Normalized->getStart();
|
||||||
|
const SCEV *PostLoopOffset = 0;
|
||||||
|
if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
|
||||||
|
PostLoopOffset = Start;
|
||||||
|
Start = SE.getIntegerSCEV(0, Normalized->getType());
|
||||||
|
Normalized =
|
||||||
|
cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
|
||||||
|
Normalized->getStepRecurrence(SE),
|
||||||
|
Normalized->getLoop()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip off any non-loop-dominating component from the addrec step.
|
||||||
|
const SCEV *Step = Normalized->getStepRecurrence(SE);
|
||||||
|
const SCEV *PostLoopScale = 0;
|
||||||
|
if (!Step->hasComputableLoopEvolution(L) &&
|
||||||
|
!Step->dominates(L->getHeader(), SE.DT)) {
|
||||||
|
PostLoopScale = Step;
|
||||||
|
Step = SE.getIntegerSCEV(1, Normalized->getType());
|
||||||
|
Normalized =
|
||||||
|
cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
|
||||||
|
Normalized->getLoop()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expand the core addrec. If we need post-loop scaling, force it to
|
||||||
|
// expand to an integer type to avoid the need for additional casting.
|
||||||
|
const Type *ExpandTy = PostLoopScale ? IntTy : STy;
|
||||||
|
PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
|
||||||
|
|
||||||
|
// Accomodate post-inc mode, if necessary.
|
||||||
|
Value *Result;
|
||||||
|
if (L != PostIncLoop)
|
||||||
|
Result = PN;
|
||||||
|
else {
|
||||||
|
// In PostInc mode, use the post-incremented value.
|
||||||
|
BasicBlock *LatchBlock = L->getLoopLatch();
|
||||||
|
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
|
||||||
|
Result = PN->getIncomingValueForBlock(LatchBlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-apply any non-loop-dominating scale.
|
||||||
|
if (PostLoopScale) {
|
||||||
|
Result = Builder.CreateMul(Result,
|
||||||
|
expandCodeFor(PostLoopScale, IntTy));
|
||||||
|
rememberInstruction(Result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-apply any non-loop-dominating offset.
|
||||||
|
if (PostLoopOffset) {
|
||||||
|
if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
|
||||||
|
const SCEV *const OffsetArray[1] = { PostLoopOffset };
|
||||||
|
Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
|
||||||
|
} else {
|
||||||
|
Result = Builder.CreateAdd(Result,
|
||||||
|
expandCodeFor(PostLoopOffset, IntTy));
|
||||||
|
rememberInstruction(Result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
|
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
|
||||||
|
if (!CanonicalMode) return expandAddRecExprLiterally(S);
|
||||||
|
|
||||||
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
|
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
|
||||||
const Loop *L = S->getLoop();
|
const Loop *L = S->getLoop();
|
||||||
|
|
||||||
|
@ -681,7 +872,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
|
||||||
// specified loop.
|
// specified loop.
|
||||||
BasicBlock *Header = L->getHeader();
|
BasicBlock *Header = L->getHeader();
|
||||||
PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
|
PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
|
||||||
InsertedValues.insert(PN);
|
rememberInstruction(PN);
|
||||||
|
|
||||||
Constant *One = ConstantInt::get(Ty, 1);
|
Constant *One = ConstantInt::get(Ty, 1);
|
||||||
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
|
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
|
||||||
|
@ -691,7 +882,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
|
||||||
// corresponding to the back-edge.
|
// corresponding to the back-edge.
|
||||||
Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
|
Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
|
||||||
(*HPI)->getTerminator());
|
(*HPI)->getTerminator());
|
||||||
InsertedValues.insert(Add);
|
rememberInstruction(Add);
|
||||||
PN->addIncoming(Add, *HPI);
|
PN->addIncoming(Add, *HPI);
|
||||||
} else {
|
} else {
|
||||||
PN->addIncoming(Constant::getNullValue(Ty), *HPI);
|
PN->addIncoming(Constant::getNullValue(Ty), *HPI);
|
||||||
|
@ -738,7 +929,7 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
|
||||||
Value *V = expandCodeFor(S->getOperand(),
|
Value *V = expandCodeFor(S->getOperand(),
|
||||||
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
||||||
Value *I = Builder.CreateTrunc(V, Ty, "tmp");
|
Value *I = Builder.CreateTrunc(V, Ty, "tmp");
|
||||||
InsertedValues.insert(I);
|
rememberInstruction(I);
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -747,7 +938,7 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
|
||||||
Value *V = expandCodeFor(S->getOperand(),
|
Value *V = expandCodeFor(S->getOperand(),
|
||||||
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
||||||
Value *I = Builder.CreateZExt(V, Ty, "tmp");
|
Value *I = Builder.CreateZExt(V, Ty, "tmp");
|
||||||
InsertedValues.insert(I);
|
rememberInstruction(I);
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -756,7 +947,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
|
||||||
Value *V = expandCodeFor(S->getOperand(),
|
Value *V = expandCodeFor(S->getOperand(),
|
||||||
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
SE.getEffectiveSCEVType(S->getOperand()->getType()));
|
||||||
Value *I = Builder.CreateSExt(V, Ty, "tmp");
|
Value *I = Builder.CreateSExt(V, Ty, "tmp");
|
||||||
InsertedValues.insert(I);
|
rememberInstruction(I);
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -772,9 +963,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
|
||||||
}
|
}
|
||||||
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
|
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
|
||||||
Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
|
Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
|
||||||
InsertedValues.insert(ICmp);
|
rememberInstruction(ICmp);
|
||||||
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
|
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
|
||||||
InsertedValues.insert(Sel);
|
rememberInstruction(Sel);
|
||||||
LHS = Sel;
|
LHS = Sel;
|
||||||
}
|
}
|
||||||
// In the case of mixed integer and pointer types, cast the
|
// In the case of mixed integer and pointer types, cast the
|
||||||
|
@ -796,9 +987,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
|
||||||
}
|
}
|
||||||
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
|
Value *RHS = expandCodeFor(S->getOperand(i), Ty);
|
||||||
Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
|
Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
|
||||||
InsertedValues.insert(ICmp);
|
rememberInstruction(ICmp);
|
||||||
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
|
Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
|
||||||
InsertedValues.insert(Sel);
|
rememberInstruction(Sel);
|
||||||
LHS = Sel;
|
LHS = Sel;
|
||||||
}
|
}
|
||||||
// In the case of mixed integer and pointer types, cast the
|
// In the case of mixed integer and pointer types, cast the
|
||||||
|
@ -863,6 +1054,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
|
||||||
Value *V = visit(S);
|
Value *V = visit(S);
|
||||||
|
|
||||||
// Remember the expanded value for this SCEV at this location.
|
// Remember the expanded value for this SCEV at this location.
|
||||||
|
if (!PostIncLoop)
|
||||||
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
|
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
|
||||||
|
|
||||||
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
|
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
|
||||||
|
|
|
@ -944,7 +944,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
||||||
// Okay, we know that we have a scale by now. However, if the scaled
|
// Okay, we know that we have a scale by now. However, if the scaled
|
||||||
// value is an add of something and a constant, we can fold the
|
// value is an add of something and a constant, we can fold the
|
||||||
// constant into the disp field here.
|
// constant into the disp field here.
|
||||||
if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
|
if (ShVal.getNode()->getOpcode() == ISD::ADD &&
|
||||||
isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
|
isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
|
||||||
AM.IndexReg = ShVal.getNode()->getOperand(0);
|
AM.IndexReg = ShVal.getNode()->getOperand(0);
|
||||||
ConstantSDNode *AddVal =
|
ConstantSDNode *AddVal =
|
||||||
|
|
|
@ -471,6 +471,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
|
||||||
// Compute the final addrec to expand into code.
|
// Compute the final addrec to expand into code.
|
||||||
const SCEV *AR = IU->getReplacementExpr(*UI);
|
const SCEV *AR = IU->getReplacementExpr(*UI);
|
||||||
|
|
||||||
|
// Evaluate the expression out of the loop, if possible.
|
||||||
|
if (!L->contains(UI->getUser())) {
|
||||||
|
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
|
||||||
|
if (ExitVal->isLoopInvariant(L))
|
||||||
|
AR = ExitVal;
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: It is an extremely bad idea to indvar substitute anything more
|
// FIXME: It is an extremely bad idea to indvar substitute anything more
|
||||||
// complex than affine induction variables. Doing so will put expensive
|
// complex than affine induction variables. Doing so will put expensive
|
||||||
// polynomial evaluations inside of the loop, and the str reduction pass
|
// polynomial evaluations inside of the loop, and the str reduction pass
|
||||||
|
@ -522,11 +529,10 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
|
||||||
Rewriter.clear();
|
Rewriter.clear();
|
||||||
// Now that we're done iterating through lists, clean up any instructions
|
// Now that we're done iterating through lists, clean up any instructions
|
||||||
// which are now dead.
|
// which are now dead.
|
||||||
while (!DeadInsts.empty()) {
|
while (!DeadInsts.empty())
|
||||||
Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
|
if (Instruction *Inst =
|
||||||
if (Inst)
|
dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
|
||||||
RecursivelyDeleteTriviallyDeadInstructions(Inst);
|
RecursivelyDeleteTriviallyDeadInstructions(Inst);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If there's a single exit block, sink any loop-invariant values that
|
/// If there's a single exit block, sink any loop-invariant values that
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,11 @@
|
||||||
; RUN: llc < %s -march=arm | FileCheck %s
|
; RUN: llc < %s -march=arm | FileCheck %s
|
||||||
|
|
||||||
|
; This loop is rewritten with an indvar which counts down, which
|
||||||
|
; frees up a register from holding the trip count.
|
||||||
|
|
||||||
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
|
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
|
||||||
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
||||||
br i1 %0, label %return, label %bb
|
br i1 %0, label %return, label %bb
|
||||||
|
|
||||||
|
@ -19,3 +22,26 @@ return: ; preds = %bb, %entry
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; This loop has a non-address use of the count-up indvar, so
|
||||||
|
; it'll remain. Now the original store uses a negative-stride address.
|
||||||
|
|
||||||
|
define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
||||||
|
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
||||||
|
br i1 %0, label %return, label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %bb, %entry
|
||||||
|
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||||
|
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
|
||||||
|
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
|
||||||
|
store i32 %A, i32* %tmp2
|
||||||
|
store i32 %indvar, i32* null
|
||||||
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
||||||
|
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
|
||||||
|
br i1 %1, label %return, label %bb
|
||||||
|
|
||||||
|
return: ; preds = %bb, %entry
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
|
; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
|
||||||
; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
|
; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
|
||||||
; This test really wants to check that the resultant "cond_true" block only
|
; This test really wants to check that the resultant "cond_true" block only
|
||||||
; has a single store in it, and that cond_true55 only has code to materialize
|
; has a single store in it, and that cond_true55 only has code to materialize
|
||||||
; the constant and do a store. We do *not* want something like this:
|
; the constant and do a store. We do *not* want something like this:
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
; RUN: llc < %s -mtriple=arm-apple-darwin
|
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization"
|
||||||
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3
|
|
||||||
|
|
||||||
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
|
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
|
||||||
%struct.LOCBOX = type { i32, i32, i32, i32 }
|
%struct.LOCBOX = type { i32, i32, i32, i32 }
|
||||||
|
|
|
@ -1,25 +1,29 @@
|
||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
|
||||||
; rdar://7387640
|
; rdar://7387640
|
||||||
|
|
||||||
; FIXME: We still need to rewrite array reference iv of stride -4 with loop
|
; This now reduces to a single induction variable.
|
||||||
; count iv of stride -1.
|
|
||||||
|
; TODO: It still gets a GPR shuffle at the end of the loop
|
||||||
|
; This is because something in instruction selection has decided
|
||||||
|
; that comparing the pre-incremented value with zero is better
|
||||||
|
; than comparing the post-incremented value with -4.
|
||||||
|
|
||||||
@G = external global i32 ; <i32*> [#uses=2]
|
@G = external global i32 ; <i32*> [#uses=2]
|
||||||
@array = external global i32* ; <i32**> [#uses=1]
|
@array = external global i32* ; <i32**> [#uses=1]
|
||||||
|
|
||||||
define arm_apcscc void @t() nounwind optsize {
|
define arm_apcscc void @t() nounwind optsize {
|
||||||
; CHECK: t:
|
; CHECK: t:
|
||||||
; CHECK: mov.w r2, #4000
|
; CHECK: mov.w r2, #1000
|
||||||
; CHECK: movw r3, #1001
|
|
||||||
entry:
|
entry:
|
||||||
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
|
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
|
||||||
br label %bb
|
br label %bb
|
||||||
|
|
||||||
bb: ; preds = %bb, %entry
|
bb: ; preds = %bb, %entry
|
||||||
; CHECK: LBB1_1:
|
; CHECK: LBB1_1:
|
||||||
; CHECK: subs r3, #1
|
; CHECK: cmp r2, #0
|
||||||
; CHECK: cmp r3, #0
|
; CHECK: sub.w r9, r2, #1
|
||||||
; CHECK: sub.w r2, r2, #4
|
; CHECK: mov r2, r9
|
||||||
|
|
||||||
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
||||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||||
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
|
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||||
|
|
||||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
|
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
||||||
; CHECK: t1:
|
; CHECK: t1:
|
||||||
; CHECK: it ne
|
; CHECK: it ne
|
||||||
; CHECK: cmpne
|
; CHECK: cmpne
|
||||||
|
@ -20,12 +20,12 @@ cond_next:
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
||||||
define i32 @t2(i32 %a, i32 %b) {
|
define i32 @t2(i32 %a, i32 %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t2:
|
; CHECK: t2:
|
||||||
; CHECK: ite le
|
; CHECK: ite gt
|
||||||
; CHECK: suble
|
|
||||||
; CHECK: subgt
|
; CHECK: subgt
|
||||||
|
; CHECK: suble
|
||||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||||
br i1 %tmp1434, label %bb17, label %bb.outer
|
br i1 %tmp1434, label %bb17, label %bb.outer
|
||||||
|
|
||||||
|
@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
|
||||||
|
|
||||||
@x = external global i32* ; <i32**> [#uses=1]
|
@x = external global i32* ; <i32**> [#uses=1]
|
||||||
|
|
||||||
define void @foo(i32 %a) {
|
define void @foo(i32 %a) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%tmp = load i32** @x ; <i32*> [#uses=1]
|
%tmp = load i32** @x ; <i32*> [#uses=1]
|
||||||
store i32 %a, i32* %tmp
|
store i32 %a, i32* %tmp
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @t3(i32 %a, i32 %b) {
|
define void @t3(i32 %a, i32 %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t3:
|
; CHECK: t3:
|
||||||
; CHECK: it lt
|
; CHECK: it lt
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
|
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
|
||||||
; RUN: grep {asm-printer} | grep 31
|
; RUN: grep {asm-printer} | grep 34
|
||||||
|
|
||||||
target datalayout = "e-p:32:32"
|
target datalayout = "e-p:32:32"
|
||||||
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
||||||
|
@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
|
||||||
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
|
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
|
||||||
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
|
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
|
||||||
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
|
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
|
||||||
%tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
|
%tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||||
br i1 %tmp.upgrd.8, label %cond_true, label %return
|
br i1 %tmp.upgrd.8, label %cond_true, label %return
|
||||||
|
|
||||||
|
|
|
@ -1,102 +0,0 @@
|
||||||
; RUN: llc < %s -mtriple=i686-apple-darwin | grep "48(%esp)" | count 5
|
|
||||||
|
|
||||||
%struct..0anon = type { i32 }
|
|
||||||
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
|
|
||||||
%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
|
|
||||||
@rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1]
|
|
||||||
@rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1]
|
|
||||||
|
|
||||||
declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
|
|
||||||
|
|
||||||
define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
|
|
||||||
entry:
|
|
||||||
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
|
|
||||||
|
|
||||||
cond_next: ; preds = %entry
|
|
||||||
%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1]
|
|
||||||
%tmp7 = load i16* %tmp6 ; <i16> [#uses=2]
|
|
||||||
%tmp78 = zext i16 %tmp7 to i32 ; <i32> [#uses=2]
|
|
||||||
%tmp10 = icmp eq i16 %tmp7, 54 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp10, label %cond_true13, label %cond_next32
|
|
||||||
|
|
||||||
cond_true13: ; preds = %cond_next
|
|
||||||
%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1]
|
|
||||||
%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
|
||||||
%tmp19 = load %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1]
|
|
||||||
%tmp21 = load i16* %tmp20 ; <i16> [#uses=1]
|
|
||||||
%tmp22 = icmp eq i16 %tmp21, 57 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp22, label %cond_true25, label %cond_next32
|
|
||||||
|
|
||||||
cond_true25: ; preds = %cond_true13
|
|
||||||
%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
ret %struct.rtx_def* %tmp29
|
|
||||||
|
|
||||||
cond_next32: ; preds = %cond_true13, %cond_next
|
|
||||||
%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1]
|
|
||||||
%tmp35 = load i8** %tmp34, align 4 ; <i8*> [#uses=1]
|
|
||||||
%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1]
|
|
||||||
%tmp38 = load i32* %tmp37, align 4 ; <i32> [#uses=1]
|
|
||||||
%i.011 = add i32 %tmp38, -1 ; <i32> [#uses=2]
|
|
||||||
%tmp12513 = icmp sgt i32 %i.011, -1 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
|
|
||||||
|
|
||||||
bb: ; preds = %bb123, %cond_next32
|
|
||||||
%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ] ; <i32> [#uses=2]
|
|
||||||
%i.01.0 = sub i32 %i.011, %indvar ; <i32> [#uses=5]
|
|
||||||
%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2]
|
|
||||||
%tmp43 = load i8* %tmp42 ; <i8> [#uses=1]
|
|
||||||
switch i8 %tmp43, label %bb123 [
|
|
||||||
i8 101, label %cond_true47
|
|
||||||
i8 69, label %bb105.preheader
|
|
||||||
]
|
|
||||||
|
|
||||||
cond_true47: ; preds = %bb
|
|
||||||
%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
|
|
||||||
%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
|
||||||
%tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
%tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
|
|
||||||
%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1]
|
|
||||||
store i32 %tmp58.c, i32* %tmp62
|
|
||||||
%tmp6816 = load i8* %tmp42 ; <i8> [#uses=1]
|
|
||||||
%tmp6917 = icmp eq i8 %tmp6816, 69 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp6917, label %bb105.preheader, label %bb123
|
|
||||||
|
|
||||||
bb105.preheader: ; preds = %cond_true47, %bb
|
|
||||||
%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
|
|
||||||
%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def** ; <%struct.rtvec_def**> [#uses=3]
|
|
||||||
%tmp11322 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
|
|
||||||
%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1]
|
|
||||||
%tmp11524 = load i32* %tmp11423 ; <i32> [#uses=1]
|
|
||||||
%tmp11625 = icmp eq i32 %tmp11524, 0 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp11625, label %bb123, label %bb73
|
|
||||||
|
|
||||||
bb73: ; preds = %bb73, %bb105.preheader
|
|
||||||
%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ] ; <i32> [#uses=3]
|
|
||||||
%tmp81 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2]
|
|
||||||
%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
|
|
||||||
%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
|
|
||||||
%tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
%tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
|
|
||||||
%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1]
|
|
||||||
store i32 %tmp98.c, i32* %tmp101
|
|
||||||
%tmp104 = add i32 %j.019, 1 ; <i32> [#uses=2]
|
|
||||||
%tmp113 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
|
|
||||||
%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1]
|
|
||||||
%tmp115 = load i32* %tmp114 ; <i32> [#uses=1]
|
|
||||||
%tmp116 = icmp ult i32 %tmp104, %tmp115 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp116, label %bb73, label %bb123
|
|
||||||
|
|
||||||
bb123: ; preds = %bb73, %bb105.preheader, %cond_true47, %bb
|
|
||||||
%i.0 = add i32 %i.01.0, -1 ; <i32> [#uses=1]
|
|
||||||
%tmp125 = icmp sgt i32 %i.0, -1 ; <i1> [#uses=1]
|
|
||||||
%indvar.next26 = add i32 %indvar, 1 ; <i32> [#uses=1]
|
|
||||||
br i1 %tmp125, label %bb, label %UnifiedReturnBlock
|
|
||||||
|
|
||||||
UnifiedReturnBlock: ; preds = %bb123, %cond_next32, %entry
|
|
||||||
%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ] ; <%struct.rtx_def*> [#uses=1]
|
|
||||||
ret %struct.rtx_def* %UnifiedRetVal
|
|
||||||
}
|
|
|
@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
|
||||||
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
||||||
; CHECK: %bb.i28.i
|
; CHECK: %bb.i28.i
|
||||||
; CHECK: addl $2
|
; CHECK: addl $2
|
||||||
; CHECK: addl $2
|
; CHECK: addl $-2
|
||||||
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
||||||
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
||||||
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
|
||||||
|
|
||||||
|
; This test shouldn't require spills.
|
||||||
|
|
||||||
|
; CHECK: subq $8, %rsp
|
||||||
|
; CHECK-NOT: $rsp
|
||||||
|
; CHECK: addq $8, %rsp
|
||||||
|
|
||||||
%struct..0anon = type { i32 }
|
%struct..0anon = type { i32 }
|
||||||
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
|
%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
|
||||||
%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
|
%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
|
||||||
|
@ -10,9 +16,6 @@ declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*
|
||||||
|
|
||||||
define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
|
define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: Spill
|
|
||||||
; CHECK: Folded Spill
|
|
||||||
; CHECK: Reload
|
|
||||||
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
|
%tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1]
|
||||||
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
|
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
; RUN: llc < %s -march=x86 -enable-full-lsr >%t
|
; RUN: llc < %s -march=x86 >%t
|
||||||
; RUN: grep {addl \\\$4,} %t | count 3
|
|
||||||
; RUN: not grep {,%} %t
|
; TODO: Enhance full lsr mode to get this:
|
||||||
|
; RUNX: grep {addl \\\$4,} %t | count 3
|
||||||
|
; RUNX: not grep {,%} %t
|
||||||
|
|
||||||
|
; For now, it should find this, which is still pretty good:
|
||||||
|
; RUN: not grep {addl \\\$4,} %t
|
||||||
|
; RUN: grep {,%} %t | count 6
|
||||||
|
|
||||||
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
|
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
; RUN: llc < %s -march=x86-64 -o %t
|
; RUN: llc < %s -march=x86-64 -o %t
|
||||||
; RUN: grep inc %t | count 1
|
; RUN: not grep inc %t
|
||||||
; RUN: grep dec %t | count 2
|
; RUN: grep dec %t | count 2
|
||||||
; RUN: grep addq %t | count 13
|
; RUN: grep addq %t | count 10
|
||||||
; RUN: not grep addb %t
|
; RUN: not grep addb %t
|
||||||
; RUN: grep leaq %t | count 9
|
; RUN: grep leaq %t | count 9
|
||||||
; RUN: grep leal %t | count 3
|
; RUN: grep leal %t | count 2
|
||||||
; RUN: grep movq %t | count 5
|
; RUN: grep movq %t | count 10
|
||||||
|
|
||||||
; IV users in each of the loops from other loops shouldn't cause LSR
|
; IV users in each of the loops from other loops shouldn't cause LSR
|
||||||
; to insert new induction variables. Previously it would create a
|
; to insert new induction variables. Previously it would create a
|
||||||
|
|
|
@ -1,5 +1,19 @@
|
||||||
; RUN: llc < %s -march=x86 | grep cmp | grep 64
|
; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
|
||||||
; RUN: llc < %s -march=x86 | not grep inc
|
; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
|
||||||
|
|
||||||
|
; By starting the IV at -64 instead of 0, a cmp is eliminated,
|
||||||
|
; as the flags from the add can be used directly.
|
||||||
|
|
||||||
|
; STATIC: movl $-64, %ecx
|
||||||
|
|
||||||
|
; STATIC: movl %eax, _state+76(%ecx)
|
||||||
|
; STATIC: addl $16, %ecx
|
||||||
|
; STATIC: jne
|
||||||
|
|
||||||
|
; In PIC mode the symbol can't be folded, so the change-compare-stride
|
||||||
|
; trick applies.
|
||||||
|
|
||||||
|
; PIC: cmpl $64
|
||||||
|
|
||||||
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||||
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||||
|
|
|
@ -1,4 +1,10 @@
|
||||||
; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
|
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: leal 16(%eax), %edx
|
||||||
|
; CHECK: align
|
||||||
|
; CHECK: addl $4, %edx
|
||||||
|
; CHECK: decl %ecx
|
||||||
|
; CHECK: jne LBB1_2
|
||||||
|
|
||||||
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
|
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
|
||||||
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
|
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
|
||||||
|
|
|
@ -0,0 +1,159 @@
|
||||||
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64:64"
|
||||||
|
target triple = "x86_64-unknown-unknown"
|
||||||
|
|
||||||
|
; Full strength reduction reduces register pressure from 5 to 4 here.
|
||||||
|
|
||||||
|
; CHECK: full_me:
|
||||||
|
; CHECK: movsd (%rsi), %xmm0
|
||||||
|
; CHECK: mulsd (%rdx), %xmm0
|
||||||
|
; CHECK: movsd %xmm0, (%rdi)
|
||||||
|
; CHECK: addq $8, %rsi
|
||||||
|
; CHECK: addq $8, %rdx
|
||||||
|
; CHECK: addq $8, %rdi
|
||||||
|
; CHECK: decq %rcx
|
||||||
|
; CHECK: jne
|
||||||
|
|
||||||
|
define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||||
|
entry:
|
||||||
|
%t0 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %t0, label %loop, label %return
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||||
|
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||||
|
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||||
|
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||||
|
%t1 = load double* %Bi
|
||||||
|
%t2 = load double* %Ci
|
||||||
|
%m = fmul double %t1, %t2
|
||||||
|
store double %m, double* %Ai
|
||||||
|
%i.next = add nsw i64 %i, 1
|
||||||
|
%exitcond = icmp eq i64 %i.next, %n
|
||||||
|
br i1 %exitcond, label %return, label %loop
|
||||||
|
|
||||||
|
return:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; In this test, the counting IV exit value is used, so full strength reduction
|
||||||
|
; would not reduce register pressure. IndVarSimplify ought to simplify such
|
||||||
|
; cases away, but it's useful here to verify that LSR's register pressure
|
||||||
|
; heuristics are working as expected.
|
||||||
|
|
||||||
|
; CHECK: count_me_0:
|
||||||
|
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
||||||
|
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
||||||
|
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
||||||
|
; CHECK: incq %rax
|
||||||
|
; CHECK: cmpq %rax, %rcx
|
||||||
|
; CHECK: jne
|
||||||
|
|
||||||
|
define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||||
|
entry:
|
||||||
|
%t0 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %t0, label %loop, label %return
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||||
|
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||||
|
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||||
|
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||||
|
%t1 = load double* %Bi
|
||||||
|
%t2 = load double* %Ci
|
||||||
|
%m = fmul double %t1, %t2
|
||||||
|
store double %m, double* %Ai
|
||||||
|
%i.next = add nsw i64 %i, 1
|
||||||
|
%exitcond = icmp eq i64 %i.next, %n
|
||||||
|
br i1 %exitcond, label %return, label %loop
|
||||||
|
|
||||||
|
return:
|
||||||
|
%q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
||||||
|
ret i64 %q
|
||||||
|
}
|
||||||
|
|
||||||
|
; In this test, the trip count value is used, so full strength reduction
|
||||||
|
; would not reduce register pressure.
|
||||||
|
; (though it would reduce register pressure inside the loop...)
|
||||||
|
|
||||||
|
; CHECK: count_me_1:
|
||||||
|
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
||||||
|
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
||||||
|
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
||||||
|
; CHECK: incq %rax
|
||||||
|
; CHECK: cmpq %rax, %rcx
|
||||||
|
; CHECK: jne
|
||||||
|
|
||||||
|
define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||||
|
entry:
|
||||||
|
%t0 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %t0, label %loop, label %return
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||||
|
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||||
|
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||||
|
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||||
|
%t1 = load double* %Bi
|
||||||
|
%t2 = load double* %Ci
|
||||||
|
%m = fmul double %t1, %t2
|
||||||
|
store double %m, double* %Ai
|
||||||
|
%i.next = add nsw i64 %i, 1
|
||||||
|
%exitcond = icmp eq i64 %i.next, %n
|
||||||
|
br i1 %exitcond, label %return, label %loop
|
||||||
|
|
||||||
|
return:
|
||||||
|
%q = phi i64 [ 0, %entry ], [ %n, %loop ]
|
||||||
|
ret i64 %q
|
||||||
|
}
|
||||||
|
|
||||||
|
; This should be fully strength-reduced to reduce register pressure, however
|
||||||
|
; the current heuristics get distracted by all the reuse with the stride-1
|
||||||
|
; induction variable first.
|
||||||
|
|
||||||
|
; But even so, be clever and start the stride-1 variable at a non-zero value
|
||||||
|
; to eliminate an in-loop immediate value.
|
||||||
|
|
||||||
|
; CHECK: count_me_2:
|
||||||
|
; CHECK: movl $5, %eax
|
||||||
|
; CHECK: align
|
||||||
|
; CHECK: BB4_1:
|
||||||
|
; CHECK: movsd (%rdi,%rax,8), %xmm0
|
||||||
|
; CHECK: addsd (%rsi,%rax,8), %xmm0
|
||||||
|
; CHECK: movsd %xmm0, (%rdx,%rax,8)
|
||||||
|
; CHECK: movsd 40(%rdi,%rax,8), %xmm0
|
||||||
|
; CHECK: addsd 40(%rsi,%rax,8), %xmm0
|
||||||
|
; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
|
||||||
|
; CHECK: incq %rax
|
||||||
|
; CHECK: cmpq $5005, %rax
|
||||||
|
; CHECK: jne
|
||||||
|
|
||||||
|
define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
|
||||||
|
entry:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
||||||
|
%i5 = add i64 %i, 5
|
||||||
|
%Ai = getelementptr double* %A, i64 %i5
|
||||||
|
%t2 = load double* %Ai
|
||||||
|
%Bi = getelementptr double* %B, i64 %i5
|
||||||
|
%t4 = load double* %Bi
|
||||||
|
%t5 = fadd double %t2, %t4
|
||||||
|
%Ci = getelementptr double* %C, i64 %i5
|
||||||
|
store double %t5, double* %Ci
|
||||||
|
%i10 = add i64 %i, 10
|
||||||
|
%Ai10 = getelementptr double* %A, i64 %i10
|
||||||
|
%t9 = load double* %Ai10
|
||||||
|
%Bi10 = getelementptr double* %B, i64 %i10
|
||||||
|
%t11 = load double* %Bi10
|
||||||
|
%t12 = fadd double %t9, %t11
|
||||||
|
%Ci10 = getelementptr double* %C, i64 %i10
|
||||||
|
store double %t12, double* %Ci10
|
||||||
|
%i.next = add i64 %i, 1
|
||||||
|
%exitcond = icmp eq i64 %i.next, 5000
|
||||||
|
br i1 %exitcond, label %return, label %loop
|
||||||
|
|
||||||
|
return:
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -4,9 +4,9 @@
|
||||||
; RUN: not grep sar %t
|
; RUN: not grep sar %t
|
||||||
; RUN: not grep shl %t
|
; RUN: not grep shl %t
|
||||||
; RUN: grep add %t | count 2
|
; RUN: grep add %t | count 2
|
||||||
; RUN: grep inc %t | count 4
|
; RUN: grep inc %t | count 3
|
||||||
; RUN: grep dec %t | count 2
|
; RUN: grep dec %t | count 2
|
||||||
; RUN: grep lea %t | count 2
|
; RUN: grep lea %t | count 3
|
||||||
|
|
||||||
; Optimize away zext-inreg and sext-inreg on the loop induction
|
; Optimize away zext-inreg and sext-inreg on the loop induction
|
||||||
; variable using trip-count information.
|
; variable using trip-count information.
|
||||||
|
@ -127,6 +127,9 @@ return:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; TODO: If we could handle all the loads and stores as post-inc users, we could
|
||||||
|
; use {-1,+,1} in the induction variable register, and we'd get another inc,
|
||||||
|
; one fewer add, and a comparison with zero.
|
||||||
define void @another_count_up(double* %d, i64 %n) nounwind {
|
define void @another_count_up(double* %d, i64 %n) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
|
; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32"
|
||||||
target triple = "i386-apple-darwin9.6"
|
target triple = "i386-apple-darwin9.6"
|
||||||
%struct.constraintVCGType = type { i32, i32, i32, i32 }
|
%struct.constraintVCGType = type { i32, i32, i32, i32 }
|
||||||
%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
|
%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
|
||||||
|
|
|
@ -1,13 +1,33 @@
|
||||||
; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
|
|
||||||
; CodeGen should remat the zero instead of spilling it.
|
; CodeGen should remat the zero instead of spilling it.
|
||||||
|
|
||||||
declare void @foo(i64 %p)
|
declare void @foo(i64 %p)
|
||||||
|
|
||||||
|
; CHECK: bar:
|
||||||
|
; CHECK: xorl %edi, %edi
|
||||||
|
; CHECK: xorl %edi, %edi
|
||||||
define void @bar() nounwind {
|
define void @bar() nounwind {
|
||||||
call void @foo(i64 0)
|
|
||||||
call void @foo(i64 0)
|
|
||||||
call void @foo(i64 0)
|
call void @foo(i64 0)
|
||||||
call void @foo(i64 0)
|
call void @foo(i64 0)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: bat:
|
||||||
|
; CHECK: movq $-1, %rdi
|
||||||
|
; CHECK: movq $-1, %rdi
|
||||||
|
define void @bat() nounwind {
|
||||||
|
call void @foo(i64 -1)
|
||||||
|
call void @foo(i64 -1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: bau:
|
||||||
|
; CHECK: movl $1, %edi
|
||||||
|
; CHECK: movl $1, %edi
|
||||||
|
define void @bau() nounwind {
|
||||||
|
call void @foo(i64 1)
|
||||||
|
call void @foo(i64 1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
; RUN: llc < %s -march=x86 | grep -- -1 | grep mov | count 2
|
|
||||||
|
|
||||||
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
|
|
||||||
%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
|
|
||||||
%struct._CompT = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, i8, %struct._PixT*, %struct._CompT*, i8, %struct._CompT* }
|
|
||||||
%struct._PixT = type { i32, i32, %struct._PixT* }
|
|
||||||
%struct.__sFILEX = type opaque
|
|
||||||
%struct.__sbuf = type { i8*, i32 }
|
|
||||||
|
|
||||||
declare fastcc void @MergeComponents(%struct._CompT*, %struct._CompT*, %struct._CompT*, %struct._CompT**, %struct.ImgT*) nounwind
|
|
||||||
|
|
||||||
define fastcc void @MergeToLeft(%struct._CompT* %comp, %struct._CompT** %head, %struct.ImgT* %img) nounwind {
|
|
||||||
entry:
|
|
||||||
br label %bb208
|
|
||||||
|
|
||||||
bb105: ; preds = %bb200
|
|
||||||
br i1 false, label %bb197, label %bb149
|
|
||||||
|
|
||||||
bb149: ; preds = %bb105
|
|
||||||
%tmp151 = getelementptr %struct._CompT* %comp, i32 0, i32 0 ; <i32*> [#uses=1]
|
|
||||||
br label %bb193
|
|
||||||
|
|
||||||
bb193: ; preds = %bb184, %bb149
|
|
||||||
%tmp196 = load i32* %tmp151, align 4 ; <i32> [#uses=1]
|
|
||||||
br label %bb197
|
|
||||||
|
|
||||||
bb197: ; preds = %bb193, %bb105
|
|
||||||
%last_comp.0 = phi i32 [ %tmp196, %bb193 ], [ 0, %bb105 ] ; <i32> [#uses=0]
|
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
|
||||||
br label %bb200
|
|
||||||
|
|
||||||
bb200: ; preds = %bb208, %bb197
|
|
||||||
%indvar = phi i32 [ 0, %bb208 ], [ %indvar.next, %bb197 ] ; <i32> [#uses=2]
|
|
||||||
%xm.0 = sub i32 %indvar, 0 ; <i32> [#uses=1]
|
|
||||||
%tmp202 = icmp slt i32 %xm.0, 1 ; <i1> [#uses=1]
|
|
||||||
br i1 %tmp202, label %bb105, label %bb208
|
|
||||||
|
|
||||||
bb208: ; preds = %bb200, %entry
|
|
||||||
br label %bb200
|
|
||||||
}
|
|
|
@ -1,35 +0,0 @@
|
||||||
; RUN: llc < %s -march=x86-64 > %t
|
|
||||||
; RUN: grep addl %t
|
|
||||||
; RUN: not egrep {movl|movq} %t
|
|
||||||
|
|
||||||
define float @foo(float* %B) nounwind {
|
|
||||||
entry:
|
|
||||||
br label %bb2
|
|
||||||
|
|
||||||
bb2: ; preds = %bb3, %entry
|
|
||||||
%B_addr.0.rec = phi i64 [ %indvar.next154, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2]
|
|
||||||
%z = icmp slt i64 %B_addr.0.rec, 20000
|
|
||||||
br i1 %z, label %bb3, label %bb4
|
|
||||||
|
|
||||||
bb3: ; preds = %bb2
|
|
||||||
%indvar.next154 = add i64 %B_addr.0.rec, 1 ; <i64> [#uses=1]
|
|
||||||
br label %bb2
|
|
||||||
|
|
||||||
bb4: ; preds = %bb2
|
|
||||||
%B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec ; <float*> [#uses=1]
|
|
||||||
%t1 = ptrtoint float* %B_addr.0 to i64 ; <i64> [#uses=1]
|
|
||||||
%t2 = and i64 %t1, 4294967295 ; <i64> [#uses=1]
|
|
||||||
%t3 = icmp eq i64 %t2, 0 ; <i1> [#uses=1]
|
|
||||||
br i1 %t3, label %bb5, label %bb10.preheader
|
|
||||||
|
|
||||||
bb10.preheader: ; preds = %bb4
|
|
||||||
br label %bb9
|
|
||||||
|
|
||||||
bb5: ; preds = %bb4
|
|
||||||
ret float 7.0
|
|
||||||
|
|
||||||
bb9: ; preds = %bb10.preheader
|
|
||||||
%t5 = getelementptr float* %B, i64 0 ; <float*> [#uses=1]
|
|
||||||
%t7 = load float* %t5 ; <float> [#uses=1]
|
|
||||||
ret float %t7
|
|
||||||
}
|
|
|
@ -1,6 +1,7 @@
|
||||||
; RUN: opt < %s -indvars -S > %t
|
; RUN: opt < %s -indvars -S > %t
|
||||||
; RUN: grep add %t | count 8
|
; RUN: grep add %t | count 6
|
||||||
; RUN: grep mul %t | count 7
|
; RUN: grep sub %t | count 2
|
||||||
|
; RUN: grep mul %t | count 6
|
||||||
|
|
||||||
define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
|
define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
; RUN: opt < %s -loop-reduce -S | grep ugt
|
; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
|
||||||
; PR2535
|
|
||||||
|
|
||||||
@.str = internal constant [4 x i8] c"%d\0A\00"
|
@.str = internal constant [4 x i8] c"%d\0A\00"
|
||||||
|
|
||||||
|
@ -16,7 +15,7 @@ forbody:
|
||||||
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
|
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
|
||||||
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
|
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
|
||||||
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
|
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
|
||||||
%cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
|
%cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
|
||||||
br i1 %cmp, label %forbody, label %afterfor
|
br i1 %cmp, label %forbody, label %afterfor
|
||||||
|
|
||||||
afterfor: ; preds = %forcond
|
afterfor: ; preds = %forcond
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
|
; RUN: llc < %s -o - | grep {testl %ecx, %ecx}
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
target triple = "x86_64-apple-darwin9"
|
target triple = "x86_64-apple-darwin9"
|
||||||
|
|
||||||
; This is like change-compare-stride-trickiness-1.ll except the comparison
|
; The comparison happens before the relevant use, but it can still be rewritten
|
||||||
; happens before the relevant use, so the comparison stride can't be
|
; to compare with zero.
|
||||||
; easily changed.
|
|
||||||
|
|
||||||
define void @foo() nounwind {
|
define void @foo() nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpq \$8}
|
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$8}
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
target triple = "x86_64-apple-darwin9"
|
target triple = "x86_64-apple-darwin9"
|
||||||
|
|
||||||
; This is like change-compare-stride-trickiness-0.ll except the comparison
|
; The comparison happens after the relevant use, so the stride can easily
|
||||||
; happens after the relevant use, so the comparison stride can be
|
; be changed. The comparison can be done in a narrower mode than the
|
||||||
; easily changed.
|
; induction variable.
|
||||||
|
|
||||||
define void @foo() nounwind {
|
define void @foo() nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
@ -19,7 +19,7 @@ bb3: ; preds = %bb1
|
||||||
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
|
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
|
||||||
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
|
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||||
; CHECK: sub i32 %lsr.iv, 1
|
; CHECK: add i32 %lsr.iv, -1
|
||||||
br label %bb6
|
br label %bb6
|
||||||
|
|
||||||
bb6: ; preds = %bb3, %entry
|
bb6: ; preds = %bb3, %entry
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
|
||||||
|
|
||||||
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
|
|
||||||
entry:
|
|
||||||
br i1 undef, label %bb4.preheader, label %bb.nph8
|
|
||||||
|
|
||||||
bb4.preheader: ; preds = %entry
|
|
||||||
br label %bb4
|
|
||||||
|
|
||||||
bb1: ; preds = %bb4
|
|
||||||
br i1 undef, label %bb.nph8, label %bb3
|
|
||||||
|
|
||||||
bb3: ; preds = %bb1
|
|
||||||
%phitmp = add i32 %indvar, 1 ; <i32> [#uses=1]
|
|
||||||
br label %bb4
|
|
||||||
|
|
||||||
bb4: ; preds = %bb3, %bb4.preheader
|
|
||||||
; CHECK: %lsr.iv = phi
|
|
||||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
|
|
||||||
; CHECK: %0 = icmp slt i32 %lsr.iv.next, %argc
|
|
||||||
%indvar = phi i32 [ 1, %bb4.preheader ], [ %phitmp, %bb3 ] ; <i32> [#uses=2]
|
|
||||||
%0 = icmp slt i32 %indvar, %argc ; <i1> [#uses=1]
|
|
||||||
br i1 %0, label %bb1, label %bb.nph8
|
|
||||||
|
|
||||||
bb.nph8: ; preds = %bb4, %bb1, %entry
|
|
||||||
unreachable
|
|
||||||
}
|
|
Loading…
Reference in New Issue