forked from OSchip/llvm-project
Reapply the new LoopStrengthReduction code, with compile time and
bug fixes, and with improved heuristics for analyzing foreign-loop addrecs. This change also flattens IVUsers, eliminating the stride-oriented groupings, which makes it easier to work with. llvm-svn: 95975
This commit is contained in:
parent
c7ef4cc9fc
commit
45774ce0ad
|
@ -16,29 +16,27 @@
|
|||
#define LLVM_ANALYSIS_IVUSERS_H
|
||||
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include <map>
|
||||
#include "llvm/Support/ValueHandle.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class DominatorTree;
|
||||
class Instruction;
|
||||
class Value;
|
||||
struct IVUsersOfOneStride;
|
||||
class IVUsers;
|
||||
class ScalarEvolution;
|
||||
class SCEV;
|
||||
|
||||
/// IVStrideUse - Keep track of one use of a strided induction variable, where
|
||||
/// the stride is stored externally. The Offset member keeps track of the
|
||||
/// offset from the IV, User is the actual user of the operand, and
|
||||
/// 'OperandValToReplace' is the operand of the User that is the use.
|
||||
/// IVStrideUse - Keep track of one use of a strided induction variable.
|
||||
/// The Expr member keeps track of the expression, User is the actual user
|
||||
/// instruction of the operand, and 'OperandValToReplace' is the operand of
|
||||
/// the User that is the use.
|
||||
class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
|
||||
public:
|
||||
IVStrideUse(IVUsersOfOneStride *parent,
|
||||
const SCEV *offset,
|
||||
IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off,
|
||||
Instruction* U, Value *O)
|
||||
: CallbackVH(U), Parent(parent), Offset(offset),
|
||||
OperandValToReplace(O),
|
||||
IsUseOfPostIncrementedValue(false) {
|
||||
: CallbackVH(U), Parent(P), Stride(S), Offset(Off),
|
||||
OperandValToReplace(O), IsUseOfPostIncrementedValue(false) {
|
||||
}
|
||||
|
||||
/// getUser - Return the user instruction for this use.
|
||||
|
@ -51,9 +49,17 @@ public:
|
|||
setValPtr(NewUser);
|
||||
}
|
||||
|
||||
/// getParent - Return a pointer to the IVUsersOfOneStride that owns
|
||||
/// getParent - Return a pointer to the IVUsers that owns
|
||||
/// this IVStrideUse.
|
||||
IVUsersOfOneStride *getParent() const { return Parent; }
|
||||
IVUsers *getParent() const { return Parent; }
|
||||
|
||||
/// getStride - Return the expression for the stride for the use.
|
||||
const SCEV *getStride() const { return Stride; }
|
||||
|
||||
/// setStride - Assign a new stride to this use.
|
||||
void setStride(const SCEV *Val) {
|
||||
Stride = Val;
|
||||
}
|
||||
|
||||
/// getOffset - Return the offset to add to a theoeretical induction
|
||||
/// variable that starts at zero and counts up by the stride to compute
|
||||
|
@ -92,8 +98,11 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/// Parent - a pointer to the IVUsersOfOneStride that owns this IVStrideUse.
|
||||
IVUsersOfOneStride *Parent;
|
||||
/// Parent - a pointer to the IVUsers that owns this IVStrideUse.
|
||||
IVUsers *Parent;
|
||||
|
||||
/// Stride - The stride for this use.
|
||||
const SCEV *Stride;
|
||||
|
||||
/// Offset - The offset to add to the base induction expression.
|
||||
const SCEV *Offset;
|
||||
|
@ -138,42 +147,8 @@ private:
|
|||
mutable ilist_node<IVStrideUse> Sentinel;
|
||||
};
|
||||
|
||||
/// IVUsersOfOneStride - This structure keeps track of all instructions that
|
||||
/// have an operand that is based on the trip count multiplied by some stride.
|
||||
struct IVUsersOfOneStride : public ilist_node<IVUsersOfOneStride> {
|
||||
private:
|
||||
IVUsersOfOneStride(const IVUsersOfOneStride &I); // do not implement
|
||||
void operator=(const IVUsersOfOneStride &I); // do not implement
|
||||
|
||||
public:
|
||||
IVUsersOfOneStride() : Stride(0) {}
|
||||
|
||||
explicit IVUsersOfOneStride(const SCEV *stride) : Stride(stride) {}
|
||||
|
||||
/// Stride - The stride for all the contained IVStrideUses. This is
|
||||
/// a constant for affine strides.
|
||||
const SCEV *Stride;
|
||||
|
||||
/// Users - Keep track of all of the users of this stride as well as the
|
||||
/// initial value and the operand that uses the IV.
|
||||
ilist<IVStrideUse> Users;
|
||||
|
||||
void addUser(const SCEV *Offset, Instruction *User, Value *Operand) {
|
||||
Users.push_back(new IVStrideUse(this, Offset, User, Operand));
|
||||
}
|
||||
|
||||
void removeUser(IVStrideUse *User) {
|
||||
Users.erase(User);
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
|
||||
/// dump - This method is used for debugging.
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
class IVUsers : public LoopPass {
|
||||
friend class IVStrideUserVH;
|
||||
friend class IVStrideUse;
|
||||
Loop *L;
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
|
@ -182,19 +157,8 @@ class IVUsers : public LoopPass {
|
|||
|
||||
/// IVUses - A list of all tracked IV uses of induction variable expressions
|
||||
/// we are interested in.
|
||||
ilist<IVUsersOfOneStride> IVUses;
|
||||
ilist<IVStrideUse> IVUses;
|
||||
|
||||
public:
|
||||
/// IVUsesByStride - A mapping from the strides in StrideOrder to the
|
||||
/// uses in IVUses.
|
||||
std::map<const SCEV *, IVUsersOfOneStride*> IVUsesByStride;
|
||||
|
||||
/// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
|
||||
/// We use this to iterate over the IVUsesByStride collection without being
|
||||
/// dependent on random ordering of pointers in the process.
|
||||
SmallVector<const SCEV *, 16> StrideOrder;
|
||||
|
||||
private:
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
|
||||
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
|
||||
|
@ -210,8 +174,8 @@ public:
|
|||
/// return true. Otherwise, return false.
|
||||
bool AddUsersIfInteresting(Instruction *I);
|
||||
|
||||
void AddUser(const SCEV *Stride, const SCEV *Offset,
|
||||
Instruction *User, Value *Operand);
|
||||
IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset,
|
||||
Instruction *User, Value *Operand);
|
||||
|
||||
/// getReplacementExpr - Return a SCEV expression which computes the
|
||||
/// value of the OperandValToReplace of the given IVStrideUse.
|
||||
|
@ -222,6 +186,14 @@ public:
|
|||
/// isUseOfPostIncrementedValue flag.
|
||||
const SCEV *getCanonicalExpr(const IVStrideUse &U) const;
|
||||
|
||||
typedef ilist<IVStrideUse>::iterator iterator;
|
||||
typedef ilist<IVStrideUse>::const_iterator const_iterator;
|
||||
iterator begin() { return IVUses.begin(); }
|
||||
iterator end() { return IVUses.end(); }
|
||||
const_iterator begin() const { return IVUses.begin(); }
|
||||
const_iterator end() const { return IVUses.end(); }
|
||||
bool empty() const { return IVUses.empty(); }
|
||||
|
||||
void print(raw_ostream &OS, const Module* = 0) const;
|
||||
|
||||
/// dump - This method is used for debugging.
|
||||
|
|
|
@ -27,10 +27,7 @@ namespace llvm {
|
|||
/// and destroy it when finished to allow the release of the associated
|
||||
/// memory.
|
||||
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
|
||||
public:
|
||||
ScalarEvolution &SE;
|
||||
|
||||
private:
|
||||
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
|
||||
InsertedExpressions;
|
||||
std::set<Value*> InsertedValues;
|
||||
|
|
|
@ -36,42 +36,30 @@ Pass *llvm::createIVUsersPass() {
|
|||
return new IVUsers();
|
||||
}
|
||||
|
||||
/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
|
||||
/// subexpression that is an AddRec from a loop other than L. An outer loop
|
||||
/// of L is OK, but not an inner loop nor a disjoint loop.
|
||||
static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
|
||||
// This is very common, put it first.
|
||||
if (isa<SCEVConstant>(S))
|
||||
return false;
|
||||
if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
|
||||
for (unsigned int i=0; i< AE->getNumOperands(); i++)
|
||||
if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||
if (const Loop *newLoop = AE->getLoop()) {
|
||||
if (newLoop == L)
|
||||
return false;
|
||||
// if newLoop is an outer loop of L, this is OK.
|
||||
if (newLoop->contains(L))
|
||||
return false;
|
||||
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
|
||||
/// separate registers.
|
||||
static void CollectSubexprs(const SCEV *S,
|
||||
SmallVectorImpl<const SCEV *> &Ops,
|
||||
ScalarEvolution &SE) {
|
||||
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
|
||||
// Break out add operands.
|
||||
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
|
||||
I != E; ++I)
|
||||
CollectSubexprs(*I, Ops, SE);
|
||||
return;
|
||||
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||
// Split a non-zero base out of an addrec.
|
||||
if (!AR->getStart()->isZero()) {
|
||||
CollectSubexprs(AR->getStart(), Ops, SE);
|
||||
CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
|
||||
AR->getStepRecurrence(SE),
|
||||
AR->getLoop()), Ops, SE);
|
||||
return;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
|
||||
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
|
||||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
|
||||
#if 0
|
||||
// SCEVSDivExpr has been backed out temporarily, but will be back; we'll
|
||||
// need this when it is.
|
||||
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
|
||||
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
|
||||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
|
||||
#endif
|
||||
if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
|
||||
return containsAddRecFromDifferentLoop(CE->getOperand(), L);
|
||||
return false;
|
||||
|
||||
// Otherwise use the value itself.
|
||||
Ops.push_back(S);
|
||||
}
|
||||
|
||||
/// getSCEVStartAndStride - Compute the start and stride of this expression,
|
||||
|
@ -90,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
|
|||
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
|
||||
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
|
||||
if (const SCEVAddRecExpr *AddRec =
|
||||
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
|
||||
if (AddRec->getLoop() == L)
|
||||
TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
|
||||
else
|
||||
return false; // Nested IV of some sort?
|
||||
} else {
|
||||
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
|
||||
TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
|
||||
else
|
||||
Start = SE->getAddExpr(Start, AE->getOperand(i));
|
||||
}
|
||||
} else if (isa<SCEVAddRecExpr>(SH)) {
|
||||
TheAddRec = SH;
|
||||
} else {
|
||||
return false; // not analyzable.
|
||||
}
|
||||
|
||||
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
|
||||
if (!AddRec || AddRec->getLoop() != L) return false;
|
||||
// Break down TheAddRec into its component parts.
|
||||
SmallVector<const SCEV *, 4> Subexprs;
|
||||
CollectSubexprs(TheAddRec, Subexprs, *SE);
|
||||
|
||||
// Look for an addrec on the current loop among the parts.
|
||||
const SCEV *AddRecStride = 0;
|
||||
for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
|
||||
E = Subexprs.end(); I != E; ++I) {
|
||||
const SCEV *S = *I;
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
|
||||
if (AR->getLoop() == L) {
|
||||
*I = AR->getStart();
|
||||
AddRecStride = AR->getStepRecurrence(*SE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!AddRecStride)
|
||||
return false;
|
||||
|
||||
// Add up everything else into a start value (which may not be
|
||||
// loop-invariant).
|
||||
const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
|
||||
|
||||
// Use getSCEVAtScope to attempt to simplify other loops out of
|
||||
// the picture.
|
||||
const SCEV *AddRecStart = AddRec->getStart();
|
||||
AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
|
||||
const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
|
||||
|
||||
// FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
|
||||
// than an outer loop of the current loop, reject it. LSR has no concept of
|
||||
// operating on more than one loop at a time so don't confuse it with such
|
||||
// expressions.
|
||||
if (containsAddRecFromDifferentLoop(AddRecStart, L))
|
||||
return false;
|
||||
|
||||
Start = SE->getAddExpr(Start, AddRecStart);
|
||||
|
||||
|
@ -131,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
|
|||
|
||||
DEBUG(dbgs() << "[";
|
||||
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
|
||||
dbgs() << "] Variable stride: " << *AddRec << "\n");
|
||||
dbgs() << "] Variable stride: " << *AddRecStride << "\n");
|
||||
}
|
||||
|
||||
Stride = AddRecStride;
|
||||
|
@ -247,14 +242,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
|
|||
}
|
||||
|
||||
if (AddUserToIVUsers) {
|
||||
IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
|
||||
if (!StrideUses) { // First occurrence of this stride?
|
||||
StrideOrder.push_back(Stride);
|
||||
StrideUses = new IVUsersOfOneStride(Stride);
|
||||
IVUses.push_back(StrideUses);
|
||||
IVUsesByStride[Stride] = StrideUses;
|
||||
}
|
||||
|
||||
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
||||
// and decide what to do with it. If we are a use inside of the loop, use
|
||||
// the value before incrementation, otherwise use it after incrementation.
|
||||
|
@ -262,27 +249,21 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
|
|||
// The value used will be incremented by the stride more than we are
|
||||
// expecting, so subtract this off.
|
||||
const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
|
||||
StrideUses->addUser(NewStart, User, I);
|
||||
StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
|
||||
IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
|
||||
IVUses.back().setIsUseOfPostIncrementedValue(true);
|
||||
DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n");
|
||||
} else {
|
||||
StrideUses->addUser(Start, User, I);
|
||||
IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
|
||||
Instruction *User, Value *Operand) {
|
||||
IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
|
||||
if (!StrideUses) { // First occurrence of this stride?
|
||||
StrideOrder.push_back(Stride);
|
||||
StrideUses = new IVUsersOfOneStride(Stride);
|
||||
IVUses.push_back(StrideUses);
|
||||
IVUsesByStride[Stride] = StrideUses;
|
||||
}
|
||||
IVUsesByStride[Stride]->addUser(Offset, User, Operand);
|
||||
IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
|
||||
Instruction *User, Value *Operand) {
|
||||
IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
|
||||
return IVUses.back();
|
||||
}
|
||||
|
||||
IVUsers::IVUsers()
|
||||
|
@ -316,15 +297,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
|
|||
/// value of the OperandValToReplace of the given IVStrideUse.
|
||||
const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
|
||||
// Start with zero.
|
||||
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
|
||||
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
|
||||
// Create the basic add recurrence.
|
||||
RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
|
||||
RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
|
||||
// Add the offset in a separate step, because it may be loop-variant.
|
||||
RetVal = SE->getAddExpr(RetVal, U.getOffset());
|
||||
// For uses of post-incremented values, add an extra stride to compute
|
||||
// the actual replacement value.
|
||||
if (U.isUseOfPostIncrementedValue())
|
||||
RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
|
||||
RetVal = SE->getAddExpr(RetVal, U.getStride());
|
||||
return RetVal;
|
||||
}
|
||||
|
||||
|
@ -333,9 +314,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
|
|||
/// isUseOfPostIncrementedValue flag.
|
||||
const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
|
||||
// Start with zero.
|
||||
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
|
||||
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
|
||||
// Create the basic add recurrence.
|
||||
RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
|
||||
RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
|
||||
// Add the offset in a separate step, because it may be loop-variant.
|
||||
RetVal = SE->getAddExpr(RetVal, U.getOffset());
|
||||
return RetVal;
|
||||
|
@ -358,24 +339,17 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
|
|||
OS << ":\n";
|
||||
|
||||
IVUsersAsmAnnotator Annotator;
|
||||
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
|
||||
std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
|
||||
IVUsesByStride.find(StrideOrder[Stride]);
|
||||
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
|
||||
OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
|
||||
|
||||
for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
|
||||
E = SI->second->Users.end(); UI != E; ++UI) {
|
||||
OS << " ";
|
||||
WriteAsOperand(OS, UI->getOperandValToReplace(), false);
|
||||
OS << " = ";
|
||||
OS << *getReplacementExpr(*UI);
|
||||
if (UI->isUseOfPostIncrementedValue())
|
||||
OS << " (post-inc)";
|
||||
OS << " in ";
|
||||
UI->getUser()->print(OS, &Annotator);
|
||||
OS << '\n';
|
||||
}
|
||||
for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
|
||||
E = IVUses.end(); UI != E; ++UI) {
|
||||
OS << " ";
|
||||
WriteAsOperand(OS, UI->getOperandValToReplace(), false);
|
||||
OS << " = "
|
||||
<< *getReplacementExpr(*UI);
|
||||
if (UI->isUseOfPostIncrementedValue())
|
||||
OS << " (post-inc)";
|
||||
OS << " in ";
|
||||
UI->getUser()->print(OS, &Annotator);
|
||||
OS << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -384,37 +358,12 @@ void IVUsers::dump() const {
|
|||
}
|
||||
|
||||
void IVUsers::releaseMemory() {
|
||||
IVUsesByStride.clear();
|
||||
StrideOrder.clear();
|
||||
Processed.clear();
|
||||
IVUses.clear();
|
||||
}
|
||||
|
||||
void IVStrideUse::deleted() {
|
||||
// Remove this user from the list.
|
||||
Parent->Users.erase(this);
|
||||
Parent->IVUses.erase(this);
|
||||
// this now dangles!
|
||||
}
|
||||
|
||||
void IVUsersOfOneStride::print(raw_ostream &OS) const {
|
||||
OS << "IV Users of one stride:\n";
|
||||
|
||||
if (Stride)
|
||||
OS << " Stride: " << *Stride << '\n';
|
||||
|
||||
OS << " Users:\n";
|
||||
|
||||
unsigned Count = 1;
|
||||
|
||||
for (ilist<IVStrideUse>::const_iterator
|
||||
I = Users.begin(), E = Users.end(); I != E; ++I) {
|
||||
const IVStrideUse &SU = *I;
|
||||
OS << " " << Count++ << '\n';
|
||||
OS << " Offset: " << *SU.getOffset() << '\n';
|
||||
OS << " Instr: " << *SU << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void IVUsersOfOneStride::dump() const {
|
||||
print(dbgs());
|
||||
}
|
||||
|
|
|
@ -641,8 +641,24 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
|||
// Reuse a previously-inserted PHI, if present.
|
||||
for (BasicBlock::iterator I = L->getHeader()->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||
if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
|
||||
return PN;
|
||||
if (SE.isSCEVable(PN->getType()) &&
|
||||
(SE.getEffectiveSCEVType(PN->getType()) ==
|
||||
SE.getEffectiveSCEVType(Normalized->getType())) &&
|
||||
SE.getSCEV(PN) == Normalized)
|
||||
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
|
||||
// Remember this PHI, even in post-inc mode.
|
||||
InsertedValues.insert(PN);
|
||||
// Remember the increment.
|
||||
Instruction *IncV =
|
||||
cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)
|
||||
->stripPointerCasts());
|
||||
rememberInstruction(IncV);
|
||||
// Make sure the increment is where we want it. But don't move it
|
||||
// down past a potential existing post-inc user.
|
||||
if (L == IVIncInsertLoop && !SE.DT->dominates(IncV, IVIncInsertPos))
|
||||
IncV->moveBefore(IVIncInsertPos);
|
||||
return PN;
|
||||
}
|
||||
|
||||
// Save the original insertion point so we can restore it when we're done.
|
||||
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include "llvm/Assembly/PrintModulePass.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
|
@ -234,6 +235,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
|
|||
PM.add(createLoopStrengthReducePass(getTargetLowering()));
|
||||
if (PrintLSR)
|
||||
PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
|
||||
#ifndef NDEBUG
|
||||
PM.add(createVerifierPass());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Turn exception handling constructs into something the code generators can
|
||||
|
|
|
@ -364,20 +364,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||
if (ExitingBlock)
|
||||
NeedCannIV = true;
|
||||
}
|
||||
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
|
||||
const SCEV *Stride = IU->StrideOrder[i];
|
||||
const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
|
||||
for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
|
||||
const Type *Ty =
|
||||
SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
|
||||
if (!LargestType ||
|
||||
SE->getTypeSizeInBits(Ty) >
|
||||
SE->getTypeSizeInBits(LargestType))
|
||||
LargestType = Ty;
|
||||
|
||||
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
|
||||
IU->IVUsesByStride.find(IU->StrideOrder[i]);
|
||||
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
|
||||
|
||||
if (!SI->second->Users.empty())
|
||||
NeedCannIV = true;
|
||||
NeedCannIV = true;
|
||||
}
|
||||
|
||||
// Now that we know the largest of the induction variable expressions
|
||||
|
@ -455,72 +449,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
|
|||
// add the offsets to the primary induction variable and cast, avoiding
|
||||
// the need for the code evaluation methods to insert induction variables
|
||||
// of different sizes.
|
||||
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
|
||||
const SCEV *Stride = IU->StrideOrder[i];
|
||||
for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
|
||||
const SCEV *Stride = UI->getStride();
|
||||
Value *Op = UI->getOperandValToReplace();
|
||||
const Type *UseTy = Op->getType();
|
||||
Instruction *User = UI->getUser();
|
||||
|
||||
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
|
||||
IU->IVUsesByStride.find(IU->StrideOrder[i]);
|
||||
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
|
||||
ilist<IVStrideUse> &List = SI->second->Users;
|
||||
for (ilist<IVStrideUse>::iterator UI = List.begin(),
|
||||
E = List.end(); UI != E; ++UI) {
|
||||
Value *Op = UI->getOperandValToReplace();
|
||||
const Type *UseTy = Op->getType();
|
||||
Instruction *User = UI->getUser();
|
||||
// Compute the final addrec to expand into code.
|
||||
const SCEV *AR = IU->getReplacementExpr(*UI);
|
||||
|
||||
// Compute the final addrec to expand into code.
|
||||
const SCEV *AR = IU->getReplacementExpr(*UI);
|
||||
|
||||
// Evaluate the expression out of the loop, if possible.
|
||||
if (!L->contains(UI->getUser())) {
|
||||
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
|
||||
if (ExitVal->isLoopInvariant(L))
|
||||
AR = ExitVal;
|
||||
}
|
||||
|
||||
// FIXME: It is an extremely bad idea to indvar substitute anything more
|
||||
// complex than affine induction variables. Doing so will put expensive
|
||||
// polynomial evaluations inside of the loop, and the str reduction pass
|
||||
// currently can only reduce affine polynomials. For now just disable
|
||||
// indvar subst on anything more complex than an affine addrec, unless
|
||||
// it can be expanded to a trivial value.
|
||||
if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
|
||||
continue;
|
||||
|
||||
// Determine the insertion point for this user. By default, insert
|
||||
// immediately before the user. The SCEVExpander class will automatically
|
||||
// hoist loop invariants out of the loop. For PHI nodes, there may be
|
||||
// multiple uses, so compute the nearest common dominator for the
|
||||
// incoming blocks.
|
||||
Instruction *InsertPt = User;
|
||||
if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
|
||||
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
|
||||
if (PHI->getIncomingValue(i) == Op) {
|
||||
if (InsertPt == User)
|
||||
InsertPt = PHI->getIncomingBlock(i)->getTerminator();
|
||||
else
|
||||
InsertPt =
|
||||
DT->findNearestCommonDominator(InsertPt->getParent(),
|
||||
PHI->getIncomingBlock(i))
|
||||
->getTerminator();
|
||||
}
|
||||
|
||||
// Now expand it into actual Instructions and patch it into place.
|
||||
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
|
||||
|
||||
// Patch the new value into place.
|
||||
if (Op->hasName())
|
||||
NewVal->takeName(Op);
|
||||
User->replaceUsesOfWith(Op, NewVal);
|
||||
UI->setOperandValToReplace(NewVal);
|
||||
DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
|
||||
<< " into = " << *NewVal << "\n");
|
||||
++NumRemoved;
|
||||
Changed = true;
|
||||
|
||||
// The old value may be dead now.
|
||||
DeadInsts.push_back(Op);
|
||||
// Evaluate the expression out of the loop, if possible.
|
||||
if (!L->contains(UI->getUser())) {
|
||||
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
|
||||
if (ExitVal->isLoopInvariant(L))
|
||||
AR = ExitVal;
|
||||
}
|
||||
|
||||
// FIXME: It is an extremely bad idea to indvar substitute anything more
|
||||
// complex than affine induction variables. Doing so will put expensive
|
||||
// polynomial evaluations inside of the loop, and the str reduction pass
|
||||
// currently can only reduce affine polynomials. For now just disable
|
||||
// indvar subst on anything more complex than an affine addrec, unless
|
||||
// it can be expanded to a trivial value.
|
||||
if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
|
||||
continue;
|
||||
|
||||
// Determine the insertion point for this user. By default, insert
|
||||
// immediately before the user. The SCEVExpander class will automatically
|
||||
// hoist loop invariants out of the loop. For PHI nodes, there may be
|
||||
// multiple uses, so compute the nearest common dominator for the
|
||||
// incoming blocks.
|
||||
Instruction *InsertPt = User;
|
||||
if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
|
||||
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
|
||||
if (PHI->getIncomingValue(i) == Op) {
|
||||
if (InsertPt == User)
|
||||
InsertPt = PHI->getIncomingBlock(i)->getTerminator();
|
||||
else
|
||||
InsertPt =
|
||||
DT->findNearestCommonDominator(InsertPt->getParent(),
|
||||
PHI->getIncomingBlock(i))
|
||||
->getTerminator();
|
||||
}
|
||||
|
||||
// Now expand it into actual Instructions and patch it into place.
|
||||
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
|
||||
|
||||
// Patch the new value into place.
|
||||
if (Op->hasName())
|
||||
NewVal->takeName(Op);
|
||||
User->replaceUsesOfWith(Op, NewVal);
|
||||
UI->setOperandValToReplace(NewVal);
|
||||
DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
|
||||
<< " into = " << *NewVal << "\n");
|
||||
++NumRemoved;
|
||||
Changed = true;
|
||||
|
||||
// The old value may be dead now.
|
||||
DeadInsts.push_back(Op);
|
||||
}
|
||||
|
||||
// Clear the rewriter cache, because values that are in the rewriter's cache
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,11 @@
|
|||
; RUN: llc < %s -march=arm | FileCheck %s
|
||||
|
||||
; This loop is rewritten with an indvar which counts down, which
|
||||
; frees up a register from holding the trip count.
|
||||
|
||||
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
|
||||
entry:
|
||||
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
||||
; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
|
||||
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
||||
br i1 %0, label %return, label %bb
|
||||
|
||||
|
@ -19,3 +22,26 @@ return: ; preds = %bb, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
; This loop has a non-address use of the count-up indvar, so
|
||||
; it'll remain. Now the original store uses a negative-stride address.
|
||||
|
||||
define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
|
||||
entry:
|
||||
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
||||
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
||||
br i1 %0, label %return, label %bb
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
|
||||
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
|
||||
store i32 %A, i32* %tmp2
|
||||
store i32 %indvar, i32* null
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
||||
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
|
||||
br i1 %1, label %return, label %bb
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
|
||||
; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
|
||||
; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
|
||||
; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
|
||||
; This test really wants to check that the resultant "cond_true" block only
|
||||
; has a single store in it, and that cond_true55 only has code to materialize
|
||||
; the constant and do a store. We do *not* want something like this:
|
||||
|
|
|
@ -1,25 +1,29 @@
|
|||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
|
||||
; rdar://7387640
|
||||
|
||||
; FIXME: We still need to rewrite array reference iv of stride -4 with loop
|
||||
; count iv of stride -1.
|
||||
; This now reduces to a single induction variable.
|
||||
|
||||
; TODO: It still gets a GPR shuffle at the end of the loop
|
||||
; This is because something in instruction selection has decided
|
||||
; that comparing the pre-incremented value with zero is better
|
||||
; than comparing the post-incremented value with -4.
|
||||
|
||||
@G = external global i32 ; <i32*> [#uses=2]
|
||||
@array = external global i32* ; <i32**> [#uses=1]
|
||||
|
||||
define arm_apcscc void @t() nounwind optsize {
|
||||
; CHECK: t:
|
||||
; CHECK: mov.w r2, #4000
|
||||
; CHECK: movw r3, #1001
|
||||
; CHECK: mov.w r2, #1000
|
||||
entry:
|
||||
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
|
||||
br label %bb
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
; CHECK: LBB1_1:
|
||||
; CHECK: subs r3, #1
|
||||
; CHECK: cmp r3, #0
|
||||
; CHECK: sub.w r2, r2, #4
|
||||
; CHECK: cmp r2, #0
|
||||
; CHECK: sub.w r9, r2, #1
|
||||
; CHECK: mov r2, r9
|
||||
|
||||
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||
|
||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
||||
; CHECK: t1:
|
||||
; CHECK: it ne
|
||||
; CHECK: cmpne
|
||||
|
@ -20,12 +20,12 @@ cond_next:
|
|||
}
|
||||
|
||||
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
||||
define i32 @t2(i32 %a, i32 %b) {
|
||||
define i32 @t2(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: ite le
|
||||
; CHECK: suble
|
||||
; CHECK: ite gt
|
||||
; CHECK: subgt
|
||||
; CHECK: suble
|
||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||
br i1 %tmp1434, label %bb17, label %bb.outer
|
||||
|
||||
|
@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
|
|||
|
||||
@x = external global i32* ; <i32**> [#uses=1]
|
||||
|
||||
define void @foo(i32 %a) {
|
||||
define void @foo(i32 %a) nounwind {
|
||||
entry:
|
||||
%tmp = load i32** @x ; <i32*> [#uses=1]
|
||||
store i32 %a, i32* %tmp
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t3(i32 %a, i32 %b) {
|
||||
define void @t3(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: t3:
|
||||
; CHECK: it lt
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
|
||||
; RUN: grep {asm-printer} | grep 31
|
||||
; RUN: grep {asm-printer} | grep 34
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
||||
|
@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
|
|||
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
|
||||
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
|
||||
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
|
||||
%tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
|
||||
%tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||
br i1 %tmp.upgrd.8, label %cond_true, label %return
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
|
||||
; RUN: grep push | count 3
|
||||
|
||||
define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
|
||||
define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
|
||||
entry:
|
||||
icmp sgt i32 %size, 0 ; <i1>:0 [#uses=1]
|
||||
br i1 %0, label %bb.preheader, label %return
|
||||
|
|
|
@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
|
|||
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
||||
; CHECK: %bb.i28.i
|
||||
; CHECK: addl $2
|
||||
; CHECK: addl $2
|
||||
; CHECK: addl $-2
|
||||
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
||||
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
||||
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
||||
|
|
|
@ -1,12 +1,7 @@
|
|||
; RUN: llc < %s -march=x86 >%t
|
||||
|
||||
; TODO: Enhance full lsr mode to get this:
|
||||
; RUNX: grep {addl \\\$4,} %t | count 3
|
||||
; RUNX: not grep {,%} %t
|
||||
|
||||
; For now, it should find this, which is still pretty good:
|
||||
; RUN: not grep {addl \\\$4,} %t
|
||||
; RUN: grep {,%} %t | count 6
|
||||
; RUN: grep {addl \\\$4,} %t | count 3
|
||||
; RUN: not grep {,%} %t
|
||||
|
||||
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
|
||||
entry:
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
; RUN: llc < %s -march=x86-64 -o %t
|
||||
; RUN: grep inc %t | count 1
|
||||
; RUN: not grep inc %t
|
||||
; RUN: grep dec %t | count 2
|
||||
; RUN: grep addq %t | count 13
|
||||
; RUN: not grep addb %t
|
||||
; RUN: grep leaq %t | count 9
|
||||
; RUN: grep leal %t | count 3
|
||||
; RUN: grep movq %t | count 5
|
||||
; RUN: not grep leaq %t
|
||||
; RUN: not grep leal %t
|
||||
; RUN: not grep movq %t
|
||||
|
||||
; IV users in each of the loops from other loops shouldn't cause LSR
|
||||
; to insert new induction variables. Previously it would create a
|
||||
|
|
|
@ -1,11 +1,24 @@
|
|||
; RUN: llc < %s -march=x86 -relocation-model=pic | \
|
||||
; RUN: grep {, 4} | count 1
|
||||
; RUN: llc < %s -march=x86 | not grep lea
|
||||
; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
|
||||
; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
|
||||
;
|
||||
; Make sure the common loop invariant A is hoisted up to preheader,
|
||||
; since too many registers are needed to subsume it into the addressing modes.
|
||||
; It's safe to sink A in when it's not pic.
|
||||
|
||||
; PIC: align
|
||||
; PIC: movl $4, -4([[REG:%e[a-z]+]])
|
||||
; PIC: movl $5, ([[REG]])
|
||||
; PIC: addl $4, [[REG]]
|
||||
; PIC: decl {{%e[[a-z]+}}
|
||||
; PIC: jne
|
||||
|
||||
; STATIC: align
|
||||
; STATIC: movl $4, -4(%ecx)
|
||||
; STATIC: movl $5, (%ecx)
|
||||
; STATIC: addl $4, %ecx
|
||||
; STATIC: decl %eax
|
||||
; STATIC: jne
|
||||
|
||||
@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
|
||||
|
||||
define void @test(i32 %row, i32 %N.in) nounwind {
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
|
||||
; RUN: grep {A+} | count 2
|
||||
;
|
||||
; Make sure the common loop invariant A is not hoisted up to preheader,
|
||||
; since it can be subsumed it into the addressing modes.
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
|
||||
|
||||
; CHECK: align
|
||||
; CHECK: movl $4, -4(%ecx)
|
||||
; CHECK: movl $5, (%ecx)
|
||||
; CHECK: addl $4, %ecx
|
||||
; CHECK: decl %eax
|
||||
; CHECK: jne
|
||||
|
||||
@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
|
||||
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
; RUN: llc < %s -march=x86 -relocation-model=static | \
|
||||
; RUN: grep {A+} | count 2
|
||||
;
|
||||
; Make sure the common loop invariant A is not hoisted up to preheader,
|
||||
; since it can be subsumed into the addressing mode in all uses.
|
||||
; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
|
||||
|
||||
; CHECK: align
|
||||
; CHECK: movl $4, -4(%ecx)
|
||||
; CHECK: movl $5, (%ecx)
|
||||
; CHECK: addl $4, %ecx
|
||||
; CHECK: decl %eax
|
||||
; CHECK: jne
|
||||
|
||||
@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
|
||||
|
||||
|
|
|
@ -1,5 +1,19 @@
|
|||
; RUN: llc < %s -march=x86 | grep cmp | grep 64
|
||||
; RUN: llc < %s -march=x86 | not grep inc
|
||||
; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
|
||||
; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
|
||||
|
||||
; By starting the IV at -64 instead of 0, a cmp is eliminated,
|
||||
; as the flags from the add can be used directly.
|
||||
|
||||
; STATIC: movl $-64, %ecx
|
||||
|
||||
; STATIC: movl %eax, _state+76(%ecx)
|
||||
; STATIC: addl $16, %ecx
|
||||
; STATIC: jne
|
||||
|
||||
; In PIC mode the symbol can't be folded, so the change-compare-stride
|
||||
; trick applies.
|
||||
|
||||
; PIC: cmpl $64
|
||||
|
||||
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
|
||||
|
||||
; CHECK: leal 16(%eax), %edx
|
||||
; CHECK: align
|
||||
; CHECK: addl $4, %edx
|
||||
; CHECK: decl %ecx
|
||||
; CHECK: jne LBB1_2
|
||||
|
||||
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
|
||||
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
|
||||
|
|
|
@ -0,0 +1,386 @@
|
|||
; RUN: llc < %s -march=x86-64 -O3 | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
; Full strength reduction reduces register pressure from 5 to 4 here.
|
||||
; Instruction selection should use the FLAGS value from the dec for
|
||||
; the branch. Scheduling should push the adds upwards.
|
||||
|
||||
; CHECK: full_me_0:
|
||||
; CHECK: movsd (%rsi), %xmm0
|
||||
; CHECK: addq $8, %rsi
|
||||
; CHECK: mulsd (%rdx), %xmm0
|
||||
; CHECK: addq $8, %rdx
|
||||
; CHECK: movsd %xmm0, (%rdi)
|
||||
; CHECK: addq $8, %rdi
|
||||
; CHECK: decq %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Mostly-full strength reduction means we do full strength reduction on all
|
||||
; except for the offsets.
|
||||
;
|
||||
; Given a choice between constant offsets -2048 and 2048, choose the negative
|
||||
; value, because at boundary conditions it has a smaller encoding.
|
||||
; TODO: That's an over-general heuristic. It would be better for the target
|
||||
; to indicate what the encoding cost would be. Then using a 2048 offset
|
||||
; would be better on x86-64, since the start value would be 0 instead of
|
||||
; 2048.
|
||||
|
||||
; CHECK: mostly_full_me_0:
|
||||
; CHECK: movsd -2048(%rsi), %xmm0
|
||||
; CHECK: mulsd -2048(%rdx), %xmm0
|
||||
; CHECK: movsd %xmm0, -2048(%rdi)
|
||||
; CHECK: movsd (%rsi), %xmm0
|
||||
; CHECK: addq $8, %rsi
|
||||
; CHECK: divsd (%rdx), %xmm0
|
||||
; CHECK: addq $8, %rdx
|
||||
; CHECK: movsd %xmm0, (%rdi)
|
||||
; CHECK: addq $8, %rdi
|
||||
; CHECK: decq %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%j = add i64 %i, 256
|
||||
%Aj = getelementptr inbounds double* %A, i64 %j
|
||||
%Bj = getelementptr inbounds double* %B, i64 %j
|
||||
%Cj = getelementptr inbounds double* %C, i64 %j
|
||||
%t3 = load double* %Bj
|
||||
%t4 = load double* %Cj
|
||||
%o = fdiv double %t3, %t4
|
||||
store double %o, double* %Aj
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; A minor variation on mostly_full_me_0.
|
||||
; Prefer to start the indvar at 0.
|
||||
|
||||
; CHECK: mostly_full_me_1:
|
||||
; CHECK: movsd (%rsi), %xmm0
|
||||
; CHECK: mulsd (%rdx), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdi)
|
||||
; CHECK: movsd -2048(%rsi), %xmm0
|
||||
; CHECK: addq $8, %rsi
|
||||
; CHECK: divsd -2048(%rdx), %xmm0
|
||||
; CHECK: addq $8, %rdx
|
||||
; CHECK: movsd %xmm0, -2048(%rdi)
|
||||
; CHECK: addq $8, %rdi
|
||||
; CHECK: decq %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%j = sub i64 %i, 256
|
||||
%Aj = getelementptr inbounds double* %A, i64 %j
|
||||
%Bj = getelementptr inbounds double* %B, i64 %j
|
||||
%Cj = getelementptr inbounds double* %C, i64 %j
|
||||
%t3 = load double* %Bj
|
||||
%t4 = load double* %Cj
|
||||
%o = fdiv double %t3, %t4
|
||||
store double %o, double* %Aj
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; A slightly less minor variation on mostly_full_me_0.
|
||||
|
||||
; CHECK: mostly_full_me_2:
|
||||
; CHECK: movsd (%rsi), %xmm0
|
||||
; CHECK: mulsd (%rdx), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdi)
|
||||
; CHECK: movsd -4096(%rsi), %xmm0
|
||||
; CHECK: addq $8, %rsi
|
||||
; CHECK: divsd -4096(%rdx), %xmm0
|
||||
; CHECK: addq $8, %rdx
|
||||
; CHECK: movsd %xmm0, -4096(%rdi)
|
||||
; CHECK: addq $8, %rdi
|
||||
; CHECK: decq %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%k = add i64 %i, 256
|
||||
%Ak = getelementptr inbounds double* %A, i64 %k
|
||||
%Bk = getelementptr inbounds double* %B, i64 %k
|
||||
%Ck = getelementptr inbounds double* %C, i64 %k
|
||||
%t1 = load double* %Bk
|
||||
%t2 = load double* %Ck
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ak
|
||||
%j = sub i64 %i, 256
|
||||
%Aj = getelementptr inbounds double* %A, i64 %j
|
||||
%Bj = getelementptr inbounds double* %B, i64 %j
|
||||
%Cj = getelementptr inbounds double* %C, i64 %j
|
||||
%t3 = load double* %Bj
|
||||
%t4 = load double* %Cj
|
||||
%o = fdiv double %t3, %t4
|
||||
store double %o, double* %Aj
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; In this test, the counting IV exit value is used, so full strength reduction
|
||||
; would not reduce register pressure. IndVarSimplify ought to simplify such
|
||||
; cases away, but it's useful here to verify that LSR's register pressure
|
||||
; heuristics are working as expected.
|
||||
|
||||
; CHECK: count_me_0:
|
||||
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
||||
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
||||
; CHECK: incq %rax
|
||||
; CHECK: cmpq %rax, %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
%q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
||||
ret i64 %q
|
||||
}
|
||||
|
||||
; In this test, the trip count value is used, so full strength reduction
|
||||
; would not reduce register pressure.
|
||||
; (though it would reduce register pressure inside the loop...)
|
||||
|
||||
; CHECK: count_me_1:
|
||||
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
||||
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
||||
; CHECK: incq %rax
|
||||
; CHECK: cmpq %rax, %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
%q = phi i64 [ 0, %entry ], [ %n, %loop ]
|
||||
ret i64 %q
|
||||
}
|
||||
|
||||
; Full strength reduction doesn't save any registers here because the
|
||||
; loop tripcount is a constant.
|
||||
|
||||
; CHECK: count_me_2:
|
||||
; CHECK: movl $10, %eax
|
||||
; CHECK: align
|
||||
; CHECK: BB7_1:
|
||||
; CHECK: movsd -40(%rdi,%rax,8), %xmm0
|
||||
; CHECK: addsd -40(%rsi,%rax,8), %xmm0
|
||||
; CHECK: movsd %xmm0, -40(%rdx,%rax,8)
|
||||
; CHECK: movsd (%rdi,%rax,8), %xmm0
|
||||
; CHECK: subsd (%rsi,%rax,8), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdx,%rax,8)
|
||||
; CHECK: incq %rax
|
||||
; CHECK: cmpq $5010, %rax
|
||||
; CHECK: jne
|
||||
|
||||
define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
||||
%i5 = add i64 %i, 5
|
||||
%Ai = getelementptr double* %A, i64 %i5
|
||||
%t2 = load double* %Ai
|
||||
%Bi = getelementptr double* %B, i64 %i5
|
||||
%t4 = load double* %Bi
|
||||
%t5 = fadd double %t2, %t4
|
||||
%Ci = getelementptr double* %C, i64 %i5
|
||||
store double %t5, double* %Ci
|
||||
%i10 = add i64 %i, 10
|
||||
%Ai10 = getelementptr double* %A, i64 %i10
|
||||
%t9 = load double* %Ai10
|
||||
%Bi10 = getelementptr double* %B, i64 %i10
|
||||
%t11 = load double* %Bi10
|
||||
%t12 = fsub double %t9, %t11
|
||||
%Ci10 = getelementptr double* %C, i64 %i10
|
||||
store double %t12, double* %Ci10
|
||||
%i.next = add i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, 5000
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This should be fully strength-reduced to reduce register pressure.
|
||||
|
||||
; CHECK: full_me_1:
|
||||
; CHECK: align
|
||||
; CHECK: BB8_1:
|
||||
; CHECK: movsd (%rdi), %xmm0
|
||||
; CHECK: addsd (%rsi), %xmm0
|
||||
; CHECK: movsd %xmm0, (%rdx)
|
||||
; CHECK: movsd 40(%rdi), %xmm0
|
||||
; CHECK: addq $8, %rdi
|
||||
; CHECK: subsd 40(%rsi), %xmm0
|
||||
; CHECK: addq $8, %rsi
|
||||
; CHECK: movsd %xmm0, 40(%rdx)
|
||||
; CHECK: addq $8, %rdx
|
||||
; CHECK: decq %rcx
|
||||
; CHECK: jne
|
||||
|
||||
define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
||||
%i5 = add i64 %i, 5
|
||||
%Ai = getelementptr double* %A, i64 %i5
|
||||
%t2 = load double* %Ai
|
||||
%Bi = getelementptr double* %B, i64 %i5
|
||||
%t4 = load double* %Bi
|
||||
%t5 = fadd double %t2, %t4
|
||||
%Ci = getelementptr double* %C, i64 %i5
|
||||
store double %t5, double* %Ci
|
||||
%i10 = add i64 %i, 10
|
||||
%Ai10 = getelementptr double* %A, i64 %i10
|
||||
%t9 = load double* %Ai10
|
||||
%Bi10 = getelementptr double* %B, i64 %i10
|
||||
%t11 = load double* %Bi10
|
||||
%t12 = fsub double %t9, %t11
|
||||
%Ci10 = getelementptr double* %C, i64 %i10
|
||||
store double %t12, double* %Ci10
|
||||
%i.next = add i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This is a variation on full_me_0 in which the 0,+,1 induction variable
|
||||
; has a non-address use, pinning that value in a register.
|
||||
|
||||
; CHECK: count_me_3:
|
||||
; CHECK: call
|
||||
; CHECK: movsd (%r15,%r13,8), %xmm0
|
||||
; CHECK: mulsd (%r14,%r13,8), %xmm0
|
||||
; CHECK: movsd %xmm0, (%r12,%r13,8)
|
||||
; CHECK: incq %r13
|
||||
; CHECK: cmpq %r13, %rbx
|
||||
; CHECK: jne
|
||||
|
||||
declare void @use(i64)
|
||||
|
||||
define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
||||
entry:
|
||||
%t0 = icmp sgt i64 %n, 0
|
||||
br i1 %t0, label %loop, label %return
|
||||
|
||||
loop:
|
||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
||||
call void @use(i64 %i)
|
||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
||||
%t1 = load double* %Bi
|
||||
%t2 = load double* %Ci
|
||||
%m = fmul double %t1, %t2
|
||||
store double %m, double* %Ai
|
||||
%i.next = add nsw i64 %i, 1
|
||||
%exitcond = icmp eq i64 %i.next, %n
|
||||
br i1 %exitcond, label %return, label %loop
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
|
@ -169,7 +169,7 @@ loop:
|
|||
%indvar.i24 = and i64 %indvar, 16777215
|
||||
%t3 = getelementptr double* %d, i64 %indvar.i24
|
||||
%t4 = load double* %t3
|
||||
%t5 = fmul double %t4, 2.3
|
||||
%t5 = fdiv double %t4, 2.3
|
||||
store double %t5, double* %t3
|
||||
%t6 = getelementptr double* %d, i64 %indvar
|
||||
%t7 = load double* %t6
|
||||
|
@ -199,7 +199,7 @@ loop:
|
|||
%indvar.i24 = ashr i64 %s1, 24
|
||||
%t3 = getelementptr double* %d, i64 %indvar.i24
|
||||
%t4 = load double* %t3
|
||||
%t5 = fmul double %t4, 2.3
|
||||
%t5 = fdiv double %t4, 2.3
|
||||
store double %t5, double* %t3
|
||||
%t6 = getelementptr double* %d, i64 %indvar
|
||||
%t7 = load double* %t6
|
||||
|
@ -229,7 +229,7 @@ loop:
|
|||
%indvar.i24 = ashr i64 %s1, 24
|
||||
%t3 = getelementptr double* %d, i64 %indvar.i24
|
||||
%t4 = load double* %t3
|
||||
%t5 = fmul double %t4, 2.3
|
||||
%t5 = fdiv double %t4, 2.3
|
||||
store double %t5, double* %t3
|
||||
%t6 = getelementptr double* %d, i64 %indvar
|
||||
%t7 = load double* %t6
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 37
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
|
||||
; PR3495
|
||||
; The loop reversal kicks in once here, resulting in one fewer instruction.
|
||||
|
||||
target triple = "i386-pc-linux-gnu"
|
||||
@x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1]
|
||||
|
|
|
@ -25,7 +25,7 @@ bb1: ; preds = %bb2, %bb.nph
|
|||
%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
|
||||
%tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
|
||||
%tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
|
||||
%z0 = add i64 %tmp4, 5203
|
||||
%z0 = add i64 %tmp3, 5203
|
||||
%tmp5 = getelementptr double* %p, i64 %z0 ; <double*> [#uses=1]
|
||||
%tmp6 = load double* %tmp5, align 8 ; <double> [#uses=1]
|
||||
%tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: opt < %s -loop-reduce -S | grep ugt
|
||||
; PR2535
|
||||
; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
|
||||
|
||||
@.str = internal constant [4 x i8] c"%d\0A\00"
|
||||
|
||||
|
@ -16,7 +15,7 @@ forbody:
|
|||
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
|
||||
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
|
||||
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
|
||||
%cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
|
||||
%cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
|
||||
br i1 %cmp, label %forbody, label %afterfor
|
||||
|
||||
afterfor: ; preds = %forcond
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
|
||||
; RUN: llc < %s -o - | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-apple-darwin9"
|
||||
|
||||
; This is like change-compare-stride-trickiness-1.ll except the comparison
|
||||
; happens before the relevant use, so the comparison stride can't be
|
||||
; easily changed.
|
||||
; The comparison happens before the relevant use, but it can still be rewritten
|
||||
; to compare with zero.
|
||||
|
||||
; CHECK: foo:
|
||||
; CHECK: align
|
||||
; CHECK: incl %eax
|
||||
; CHECK-NEXT: decl %ecx
|
||||
; CHECK-NEXT: jne
|
||||
|
||||
define void @foo() nounwind {
|
||||
entry:
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$8}
|
||||
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp. \$10}
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-apple-darwin9"
|
||||
|
||||
; The comparison happens after the relevant use, so the stride can easily
|
||||
; be changed. The comparison can be done in a narrower mode than the
|
||||
; induction variable.
|
||||
; TODO: By making the first store post-increment as well, the loop setup
|
||||
; could be made simpler.
|
||||
|
||||
define void @foo() nounwind {
|
||||
entry:
|
||||
|
|
|
@ -19,7 +19,7 @@ bb3: ; preds = %bb1
|
|||
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
|
||||
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
|
||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||
; CHECK: sub i32 %lsr.iv, 1
|
||||
; CHECK: add i32 %lsr.iv, -1
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb3, %entry
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; Check that the index of 'P[outer]' is pulled out of the loop.
|
||||
; RUN: opt < %s -loop-reduce -S | \
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
|
||||
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
|
||||
|
||||
declare i1 @pred()
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; Check that the index of 'P[outer]' is pulled out of the loop.
|
||||
; RUN: opt < %s -loop-reduce -S | \
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
|
||||
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
|
||||
|
||||
declare i1 @pred()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Check that this test makes INDVAR and related stuff dead, because P[indvar]
|
||||
; gets reduced, making INDVAR dead.
|
||||
|
||||
; RUN: opt < %s -loop-reduce -S | not grep INDVAR
|
||||
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
|
||||
|
||||
declare i1 @pred()
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -analyze -iv-users | grep {Stride i64 {3,+,2}<%loop>:}
|
||||
; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)}
|
||||
|
||||
; The value of %r is dependent on a polynomial iteration expression.
|
||||
|
||||
|
|
|
@ -7,10 +7,12 @@ define void @test(i32* %P) {
|
|||
; <label>:0
|
||||
br label %Loop
|
||||
Loop: ; preds = %Loop, %0
|
||||
%i = phi i32 [ 0, %0 ], [ %i.next, %Loop ]
|
||||
%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ] ; <i32> [#uses=2]
|
||||
%STRRED = getelementptr i32* %P, i32 %INDVAR ; <i32*> [#uses=1]
|
||||
store i32 0, i32* %STRRED
|
||||
%INDVAR2 = add i32 %INDVAR, 1 ; <i32> [#uses=1]
|
||||
%i.next = add i32 %i, 1
|
||||
%cond = call i1 @pred( ) ; <i1> [#uses=1]
|
||||
br i1 %cond, label %Loop, label %Out
|
||||
Out: ; preds = %Loop
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt < %s -loop-reduce -S | \
|
||||
; RUN: grep {add i32 %lsr.iv.next, 1}
|
||||
; RUN: grep {add i32 %indvar630.ui, 1}
|
||||
;
|
||||
; Make sure that the use of the IV outside of the loop (the store) uses the
|
||||
; post incremented value of the IV, not the preincremented value. This
|
||||
|
|
Loading…
Reference in New Issue