2004-10-19 05:08:22 +08:00
|
|
|
//===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2004-10-19 05:08:22 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file was developed by Nate Begeman and is distributed under the
|
|
|
|
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2004-10-19 05:08:22 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass performs a strength reduction on array references inside loops that
|
|
|
|
// have as one or more of their components the loop induction variable. This is
|
|
|
|
// accomplished by creating a new Value to hold the initial value of the array
|
|
|
|
// access for the first iteration, and then creating a new GEP instruction in
|
|
|
|
// the loop to increment the value by the appropriate amount.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-08-04 07:30:08 +08:00
|
|
|
#define DEBUG_TYPE "loop-reduce"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
|
|
|
#include "llvm/Constants.h"
|
|
|
|
#include "llvm/Instructions.h"
|
|
|
|
#include "llvm/Type.h"
|
2005-03-04 12:04:26 +08:00
|
|
|
#include "llvm/DerivedTypes.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Analysis/Dominators.h"
|
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Support/CFG.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2005-03-04 12:04:26 +08:00
|
|
|
#include "llvm/Target/TargetData.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2005-07-31 02:22:27 +08:00
|
|
|
#include <algorithm>
|
2004-10-19 05:08:22 +08:00
|
|
|
#include <set>
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced");
|
|
|
|
|
2005-03-07 05:58:22 +08:00
|
|
|
class GEPCache {
|
2005-03-06 06:40:34 +08:00
|
|
|
public:
|
|
|
|
GEPCache() : CachedPHINode(0), Map() {}
|
|
|
|
|
2005-03-07 05:58:22 +08:00
|
|
|
GEPCache *get(Value *v) {
|
2005-03-06 06:40:34 +08:00
|
|
|
std::map<Value *, GEPCache>::iterator I = Map.find(v);
|
|
|
|
if (I == Map.end())
|
|
|
|
I = Map.insert(std::pair<Value *, GEPCache>(v, GEPCache())).first;
|
2005-03-07 05:58:22 +08:00
|
|
|
return &I->second;
|
2005-03-06 06:40:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PHINode *CachedPHINode;
|
|
|
|
std::map<Value *, GEPCache> Map;
|
|
|
|
};
|
2005-08-04 06:21:05 +08:00
|
|
|
|
|
|
|
/// IVStrideUse - Keep track of one use of a strided induction variable, where
|
|
|
|
/// the stride is stored externally. The Offset member keeps track of the
|
|
|
|
/// offset from the IV, User is the actual user of the operand, and 'Operand'
|
|
|
|
/// is the operand # of the User that is the use.
|
|
|
|
struct IVStrideUse {
|
|
|
|
SCEVHandle Offset;
|
|
|
|
Instruction *User;
|
|
|
|
Value *OperandValToReplace;
|
|
|
|
|
|
|
|
IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
|
|
|
|
: Offset(Offs), User(U), OperandValToReplace(O) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// IVUsersOfOneStride - This structure keeps track of all instructions that
|
|
|
|
/// have an operand that is based on the trip count multiplied by some stride.
|
|
|
|
/// The stride for all of these users is common and kept external to this
|
|
|
|
/// structure.
|
|
|
|
struct IVUsersOfOneStride {
|
2005-07-30 08:15:07 +08:00
|
|
|
/// Users - Keep track of all of the users of this stride as well as the
|
2005-08-04 06:21:05 +08:00
|
|
|
/// initial value and the operand that uses the IV.
|
|
|
|
std::vector<IVStrideUse> Users;
|
|
|
|
|
|
|
|
void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
|
|
|
|
Users.push_back(IVStrideUse(Offset, User, Operand));
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
class LoopStrengthReduce : public FunctionPass {
|
|
|
|
LoopInfo *LI;
|
|
|
|
DominatorSet *DS;
|
2005-07-30 08:15:07 +08:00
|
|
|
ScalarEvolution *SE;
|
|
|
|
const TargetData *TD;
|
|
|
|
const Type *UIntPtrTy;
|
2004-10-19 05:08:22 +08:00
|
|
|
bool Changed;
|
2005-08-02 10:52:02 +08:00
|
|
|
|
|
|
|
/// MaxTargetAMSize - This is the maximum power-of-two scale value that the
|
|
|
|
/// target can handle for free with its addressing modes.
|
2005-03-04 12:04:26 +08:00
|
|
|
unsigned MaxTargetAMSize;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// IVUsesByStride - Keep track of all uses of induction variables that we
|
|
|
|
/// are interested in. The key of the map is the stride of the access.
|
2005-08-04 06:21:05 +08:00
|
|
|
std::map<Value*, IVUsersOfOneStride> IVUsesByStride;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// CastedBasePointers - As we need to lower getelementptr instructions, we
|
|
|
|
/// cast the pointer input to uintptr_t. This keeps track of the casted
|
|
|
|
/// values for the pointers we have processed so far.
|
|
|
|
std::map<Value*, Value*> CastedBasePointers;
|
|
|
|
|
|
|
|
/// DeadInsts - Keep track of instructions we may have made dead, so that
|
|
|
|
/// we can remove them after we are done working.
|
|
|
|
std::set<Instruction*> DeadInsts;
|
2004-10-19 05:08:22 +08:00
|
|
|
public:
|
2005-03-04 12:04:26 +08:00
|
|
|
LoopStrengthReduce(unsigned MTAMS = 1)
|
|
|
|
: MaxTargetAMSize(MTAMS) {
|
|
|
|
}
|
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
virtual bool runOnFunction(Function &) {
|
|
|
|
LI = &getAnalysis<LoopInfo>();
|
|
|
|
DS = &getAnalysis<DominatorSet>();
|
2005-07-30 08:15:07 +08:00
|
|
|
SE = &getAnalysis<ScalarEvolution>();
|
|
|
|
TD = &getAnalysis<TargetData>();
|
|
|
|
UIntPtrTy = TD->getIntPtrType();
|
2004-10-19 05:08:22 +08:00
|
|
|
Changed = false;
|
|
|
|
|
|
|
|
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
|
|
|
|
runOnLoop(*I);
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.setPreservesCFG();
|
2005-02-28 03:37:07 +08:00
|
|
|
AU.addRequiredID(LoopSimplifyID);
|
2004-10-19 05:08:22 +08:00
|
|
|
AU.addRequired<LoopInfo>();
|
|
|
|
AU.addRequired<DominatorSet>();
|
2005-03-04 12:04:26 +08:00
|
|
|
AU.addRequired<TargetData>();
|
2005-07-30 08:15:07 +08:00
|
|
|
AU.addRequired<ScalarEvolution>();
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
|
|
|
private:
|
|
|
|
void runOnLoop(Loop *L);
|
2005-07-30 08:15:07 +08:00
|
|
|
bool AddUsersIfInteresting(Instruction *I, Loop *L);
|
|
|
|
void AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP, Instruction *I,
|
|
|
|
Loop *L);
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
void StrengthReduceStridedIVUsers(Value *Stride, IVUsersOfOneStride &Uses,
|
|
|
|
Loop *L, bool isOnlyStride);
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
void strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
|
2005-03-06 06:40:34 +08:00
|
|
|
GEPCache* GEPCache,
|
2004-10-19 05:08:22 +08:00
|
|
|
Instruction *InsertBefore,
|
|
|
|
std::set<Instruction*> &DeadInsts);
|
|
|
|
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
|
|
|
|
};
|
2005-04-22 07:48:37 +08:00
|
|
|
RegisterOpt<LoopStrengthReduce> X("loop-reduce",
|
2004-10-19 05:08:22 +08:00
|
|
|
"Strength Reduce GEP Uses of Ind. Vars");
|
|
|
|
}
|
|
|
|
|
2005-03-04 12:04:26 +08:00
|
|
|
FunctionPass *llvm::createLoopStrengthReducePass(unsigned MaxTargetAMSize) {
|
|
|
|
return new LoopStrengthReduce(MaxTargetAMSize);
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
|
|
|
|
/// specified set are trivially dead, delete them and see if this makes any of
|
|
|
|
/// their operands subsequently dead.
|
|
|
|
void LoopStrengthReduce::
|
|
|
|
DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
|
|
|
|
while (!Insts.empty()) {
|
|
|
|
Instruction *I = *Insts.begin();
|
|
|
|
Insts.erase(Insts.begin());
|
|
|
|
if (isInstructionTriviallyDead(I)) {
|
2005-03-01 11:46:11 +08:00
|
|
|
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
|
|
|
|
if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
|
|
|
|
Insts.insert(U);
|
2005-08-04 05:36:09 +08:00
|
|
|
SE->deleteInstructionFromRecords(I);
|
|
|
|
I->eraseFromParent();
|
2004-10-19 05:08:22 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// CanReduceSCEV - Return true if we can strength reduce this scalar evolution
|
|
|
|
/// in the specified loop.
|
|
|
|
static bool CanReduceSCEV(const SCEVHandle &SH, Loop *L) {
|
|
|
|
SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH);
|
|
|
|
if (!AddRec || AddRec->getLoop() != L) return false;
|
|
|
|
|
|
|
|
// FIXME: Generalize to non-affine IV's.
|
|
|
|
if (!AddRec->isAffine()) return false;
|
|
|
|
|
|
|
|
// FIXME: generalize to IV's with more complex strides (must emit stride
|
|
|
|
// expression outside of loop!)
|
|
|
|
if (isa<SCEVConstant>(AddRec->getOperand(1)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// We handle steps by unsigned values, because we know we won't have to insert
|
|
|
|
// a cast for them.
|
|
|
|
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(AddRec->getOperand(1)))
|
|
|
|
if (SU->getValue()->getType()->isUnsigned())
|
|
|
|
return true;
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Otherwise, no, we can't handle it yet.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// GetAdjustedIndex - Adjust the specified GEP sequential type index to match
|
|
|
|
/// the size of the pointer type, and scale it by the type size.
|
|
|
|
static SCEVHandle GetAdjustedIndex(const SCEVHandle &Idx, uint64_t TySize,
|
|
|
|
const Type *UIntPtrTy) {
|
|
|
|
SCEVHandle Result = Idx;
|
|
|
|
if (Result->getType()->getUnsignedVersion() != UIntPtrTy) {
|
|
|
|
if (UIntPtrTy->getPrimitiveSize() < Result->getType()->getPrimitiveSize())
|
|
|
|
Result = SCEVTruncateExpr::get(Result, UIntPtrTy);
|
|
|
|
else
|
|
|
|
Result = SCEVZeroExtendExpr::get(Result, UIntPtrTy);
|
|
|
|
}
|
|
|
|
|
|
|
|
// This index is scaled by the type size being indexed.
|
|
|
|
if (TySize != 1)
|
2005-07-31 02:33:25 +08:00
|
|
|
Result = SCEVMulExpr::get(Result,
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
|
|
|
|
TySize)));
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// AnalyzeGetElementPtrUsers - Analyze all of the users of the specified
|
|
|
|
/// getelementptr instruction, adding them to the IVUsesByStride table. Note
|
|
|
|
/// that we only want to analyze a getelementptr instruction once, and it can
|
|
|
|
/// have multiple operands that are uses of the indvar (e.g. A[i][i]). Because
|
|
|
|
/// of this, we only process a GEP instruction if its first recurrent operand is
|
|
|
|
/// "op", otherwise we will either have already processed it or we will sometime
|
|
|
|
/// later.
|
|
|
|
void LoopStrengthReduce::AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP,
|
|
|
|
Instruction *Op, Loop *L) {
|
|
|
|
// Analyze all of the subscripts of this getelementptr instruction, looking
|
|
|
|
// for uses that are determined by the trip count of L. First, skip all
|
|
|
|
// operands the are not dependent on the IV.
|
|
|
|
|
|
|
|
// Build up the base expression. Insert an LLVM cast of the pointer to
|
|
|
|
// uintptr_t first.
|
|
|
|
Value *BasePtr;
|
|
|
|
if (Constant *CB = dyn_cast<Constant>(GEP->getOperand(0)))
|
|
|
|
BasePtr = ConstantExpr::getCast(CB, UIntPtrTy);
|
2005-07-31 02:33:25 +08:00
|
|
|
else {
|
2005-07-30 08:15:07 +08:00
|
|
|
Value *&BP = CastedBasePointers[GEP->getOperand(0)];
|
|
|
|
if (BP == 0) {
|
|
|
|
BasicBlock::iterator InsertPt;
|
|
|
|
if (isa<Argument>(GEP->getOperand(0))) {
|
|
|
|
InsertPt = GEP->getParent()->getParent()->begin()->begin();
|
|
|
|
} else {
|
|
|
|
InsertPt = cast<Instruction>(GEP->getOperand(0));
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(GEP->getOperand(0)))
|
|
|
|
InsertPt = II->getNormalDest()->begin();
|
|
|
|
else
|
|
|
|
++InsertPt;
|
|
|
|
}
|
2005-08-02 11:31:14 +08:00
|
|
|
|
|
|
|
// Do not insert casts into the middle of PHI node blocks.
|
|
|
|
while (isa<PHINode>(InsertPt)) ++InsertPt;
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
BP = new CastInst(GEP->getOperand(0), UIntPtrTy,
|
|
|
|
GEP->getOperand(0)->getName(), InsertPt);
|
2005-03-04 12:04:26 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
BasePtr = BP;
|
|
|
|
}
|
2005-03-04 12:04:26 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVHandle Base = SCEVUnknown::get(BasePtr);
|
|
|
|
|
|
|
|
gep_type_iterator GTI = gep_type_begin(GEP);
|
|
|
|
unsigned i = 1;
|
|
|
|
for (; GEP->getOperand(i) != Op; ++i, ++GTI) {
|
|
|
|
// If this is a use of a recurrence that we can analyze, and it comes before
|
|
|
|
// Op does in the GEP operand list, we will handle this when we process this
|
|
|
|
// operand.
|
|
|
|
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
|
|
|
const StructLayout *SL = TD->getStructLayout(STy);
|
|
|
|
unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
|
|
|
|
uint64_t Offset = SL->MemberOffsets[Idx];
|
|
|
|
Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
|
|
|
|
UIntPtrTy));
|
2005-03-07 06:52:29 +08:00
|
|
|
} else {
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
|
2005-08-02 09:32:29 +08:00
|
|
|
|
|
|
|
// If this operand is reducible, and it's not the one we are looking at
|
|
|
|
// currently, do not process the GEP at this time.
|
2005-07-30 08:15:07 +08:00
|
|
|
if (CanReduceSCEV(Idx, L))
|
|
|
|
return;
|
|
|
|
Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
|
|
|
|
TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
|
2005-03-07 06:52:29 +08:00
|
|
|
}
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
// Get the index, convert it to intptr_t.
|
|
|
|
SCEVHandle GEPIndexExpr =
|
|
|
|
GetAdjustedIndex(SE->getSCEV(Op), TD->getTypeSize(GTI.getIndexedType()),
|
|
|
|
UIntPtrTy);
|
|
|
|
|
|
|
|
// Process all remaining subscripts in the GEP instruction.
|
|
|
|
for (++i, ++GTI; i != GEP->getNumOperands(); ++i, ++GTI)
|
|
|
|
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
|
|
|
const StructLayout *SL = TD->getStructLayout(STy);
|
|
|
|
unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
|
|
|
|
uint64_t Offset = SL->MemberOffsets[Idx];
|
|
|
|
Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
|
|
|
|
UIntPtrTy));
|
2005-03-06 06:40:34 +08:00
|
|
|
} else {
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
|
|
|
|
if (CanReduceSCEV(Idx, L)) { // Another IV subscript
|
|
|
|
GEPIndexExpr = SCEVAddExpr::get(GEPIndexExpr,
|
|
|
|
GetAdjustedIndex(Idx, TD->getTypeSize(GTI.getIndexedType()),
|
|
|
|
UIntPtrTy));
|
|
|
|
assert(CanReduceSCEV(GEPIndexExpr, L) &&
|
|
|
|
"Cannot reduce the sum of two reducible SCEV's??");
|
|
|
|
} else {
|
|
|
|
Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
|
|
|
|
TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
|
|
|
|
}
|
2005-03-06 06:40:34 +08:00
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
assert(CanReduceSCEV(GEPIndexExpr, L) && "Non reducible idx??");
|
|
|
|
|
2005-08-02 09:32:29 +08:00
|
|
|
// FIXME: If the base is not loop invariant, we currently cannot emit this.
|
|
|
|
if (!Base->isLoopInvariant(L)) {
|
|
|
|
DEBUG(std::cerr << "IGNORING GEP due to non-invaiant base: "
|
|
|
|
<< *Base << "\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
Base = SCEVAddExpr::get(Base, cast<SCEVAddRecExpr>(GEPIndexExpr)->getStart());
|
|
|
|
SCEVHandle Stride = cast<SCEVAddRecExpr>(GEPIndexExpr)->getOperand(1);
|
|
|
|
|
|
|
|
DEBUG(std::cerr << "GEP BASE : " << *Base << "\n");
|
|
|
|
DEBUG(std::cerr << "GEP STRIDE: " << *Stride << "\n");
|
|
|
|
|
|
|
|
Value *Step = 0; // Step of ISE.
|
|
|
|
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride))
|
|
|
|
/// Always get the step value as an unsigned value.
|
|
|
|
Step = ConstantExpr::getCast(SC->getValue(),
|
|
|
|
SC->getValue()->getType()->getUnsignedVersion());
|
|
|
|
else
|
|
|
|
Step = cast<SCEVUnknown>(Stride)->getValue();
|
|
|
|
assert(Step->getType()->isUnsigned() && "Bad step value!");
|
|
|
|
|
|
|
|
|
|
|
|
// Now that we know the base and stride contributed by the GEP instruction,
|
|
|
|
// process all users.
|
|
|
|
for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
|
|
|
|
UI != E; ++UI) {
|
|
|
|
Instruction *User = cast<Instruction>(*UI);
|
|
|
|
|
|
|
|
// Do not infinitely recurse on PHI nodes.
|
|
|
|
if (isa<PHINode>(User) && User->getParent() == L->getHeader())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// If this is an instruction defined in a nested loop, or outside this loop,
|
|
|
|
// don't mess with it.
|
|
|
|
if (LI->getLoopFor(User->getParent()) != L)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
DEBUG(std::cerr << "FOUND USER: " << *User
|
|
|
|
<< " OF STRIDE: " << *Step << " BASE = " << *Base << "\n");
|
|
|
|
|
|
|
|
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
|
|
|
// and decide what to do with it.
|
|
|
|
IVUsesByStride[Step].addUser(Base, User, GEP);
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
|
|
|
|
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
|
|
|
|
/// return true. Otherwise, return false.
|
|
|
|
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L) {
|
2005-07-30 08:21:31 +08:00
|
|
|
if (I->getType() == Type::VoidTy) return false;
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVHandle ISE = SE->getSCEV(I);
|
|
|
|
if (!CanReduceSCEV(ISE, L)) return false;
|
|
|
|
|
|
|
|
SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(ISE);
|
|
|
|
SCEVHandle Start = AR->getStart();
|
|
|
|
|
|
|
|
// Get the step value, canonicalizing to an unsigned integer type so that
|
|
|
|
// lookups in the map will match.
|
|
|
|
Value *Step = 0; // Step of ISE.
|
|
|
|
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getOperand(1)))
|
|
|
|
/// Always get the step value as an unsigned value.
|
|
|
|
Step = ConstantExpr::getCast(SC->getValue(),
|
|
|
|
SC->getValue()->getType()->getUnsignedVersion());
|
|
|
|
else
|
|
|
|
Step = cast<SCEVUnknown>(AR->getOperand(1))->getValue();
|
|
|
|
assert(Step->getType()->isUnsigned() && "Bad step value!");
|
|
|
|
|
|
|
|
std::set<GetElementPtrInst*> AnalyzedGEPs;
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
|
|
|
|
Instruction *User = cast<Instruction>(*UI);
|
|
|
|
|
|
|
|
// Do not infinitely recurse on PHI nodes.
|
|
|
|
if (isa<PHINode>(User) && User->getParent() == L->getHeader())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// If this is an instruction defined in a nested loop, or outside this loop,
|
When processing outer loops and we find uses of an IV in inner loops, make
sure to handle the use, just don't recurse into it.
This permits us to generate this code for a simple nested loop case:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r29, 44(r1)
stw r30, 40(r1)
mflr r11
stw r11, 56(r1)
lis r2, ha16(L_A$non_lazy_ptr)
lwz r30, lo16(L_A$non_lazy_ptr)(r2)
li r29, 1
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
li r2, 1
or r3, r30, r30
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r3)
stfd f0, 0(r3)
addi r4, r2, 1
addi r3, r3, 8
cmpwi cr0, r2, 100
or r2, r4, r4
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r30, r30, 800
addi r2, r29, 1
cmpwi cr0, r29, 100
or r29, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 40(r1)
lwz r29, 44(r1)
lwz r1, 0(r1)
blr
instead of this:
_foo:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r28, 44(r1) ;; uses an extra register.
stw r29, 40(r1)
stw r30, 36(r1)
mflr r11
stw r11, 56(r1)
li r30, 1
li r29, 0
or r28, r29, r29
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
mulli r2, r28, 800 ;; unstrength-reduced multiply
lis r3, ha16(L_A$non_lazy_ptr) ;; loop invariant address computation
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
mulli r4, r29, 800 ;; unstrength-reduced multiply
addi r3, r3, 8
add r3, r4, r3
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8 ;; multiple stride 8 IV's
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r28, r28, 1 ;;; Many IV's with stride 1
addi r29, r29, 1
addi r2, r30, 1
cmpwi cr0, r30, 100
or r30, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 36(r1)
lwz r29, 40(r1)
lwz r28, 44(r1)
lwz r1, 0(r1)
blr
llvm-svn: 22640
2005-08-04 08:14:11 +08:00
|
|
|
// don't recurse into it.
|
|
|
|
if (LI->getLoopFor(User->getParent()) != L) {
|
|
|
|
DEBUG(std::cerr << "FOUND USER in nested loop: " << *User
|
|
|
|
<< " OF SCEV: " << *ISE << "\n");
|
|
|
|
|
|
|
|
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
|
|
|
// and decide what to do with it.
|
|
|
|
IVUsesByStride[Step].addUser(Start, User, I);
|
2005-07-30 08:15:07 +08:00
|
|
|
continue;
|
When processing outer loops and we find uses of an IV in inner loops, make
sure to handle the use, just don't recurse into it.
This permits us to generate this code for a simple nested loop case:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r29, 44(r1)
stw r30, 40(r1)
mflr r11
stw r11, 56(r1)
lis r2, ha16(L_A$non_lazy_ptr)
lwz r30, lo16(L_A$non_lazy_ptr)(r2)
li r29, 1
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
li r2, 1
or r3, r30, r30
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r3)
stfd f0, 0(r3)
addi r4, r2, 1
addi r3, r3, 8
cmpwi cr0, r2, 100
or r2, r4, r4
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r30, r30, 800
addi r2, r29, 1
cmpwi cr0, r29, 100
or r29, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 40(r1)
lwz r29, 44(r1)
lwz r1, 0(r1)
blr
instead of this:
_foo:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r28, 44(r1) ;; uses an extra register.
stw r29, 40(r1)
stw r30, 36(r1)
mflr r11
stw r11, 56(r1)
li r30, 1
li r29, 0
or r28, r29, r29
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
mulli r2, r28, 800 ;; unstrength-reduced multiply
lis r3, ha16(L_A$non_lazy_ptr) ;; loop invariant address computation
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
mulli r4, r29, 800 ;; unstrength-reduced multiply
addi r3, r3, 8
add r3, r4, r3
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8 ;; multiple stride 8 IV's
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r28, r28, 1 ;;; Many IV's with stride 1
addi r29, r29, 1
addi r2, r30, 1
cmpwi cr0, r30, 100
or r30, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 36(r1)
lwz r29, 40(r1)
lwz r28, 44(r1)
lwz r1, 0(r1)
blr
llvm-svn: 22640
2005-08-04 08:14:11 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-07-31 02:33:25 +08:00
|
|
|
// Next, see if this user is analyzable itself!
|
2005-08-04 08:40:47 +08:00
|
|
|
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
|
|
|
|
// If this is a getelementptr instruction, figure out what linear
|
|
|
|
// expression of induction variable is actually being used.
|
|
|
|
if (AnalyzedGEPs.insert(GEP).second) // Not already analyzed?
|
|
|
|
AnalyzeGetElementPtrUsers(GEP, I, L);
|
|
|
|
} else if (!AddUsersIfInteresting(User, L)) {
|
|
|
|
DEBUG(std::cerr << "FOUND USER: " << *User
|
|
|
|
<< " OF SCEV: " << *ISE << "\n");
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2005-08-04 08:40:47 +08:00
|
|
|
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
|
|
|
// and decide what to do with it.
|
|
|
|
IVUsesByStride[Step].addUser(Start, User, I);
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// BasedUser - For a particular base value, keep information about how we've
|
|
|
|
/// partitioned the expression so far.
|
|
|
|
struct BasedUser {
|
|
|
|
/// Inst - The instruction using the induction variable.
|
|
|
|
Instruction *Inst;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
/// OperandValToReplace - The operand value of Inst to replace with the
|
|
|
|
/// EmittedBase.
|
|
|
|
Value *OperandValToReplace;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// Imm - The immediate value that should be added to the base immediately
|
|
|
|
/// before Inst, because it will be folded into the imm field of the
|
|
|
|
/// instruction.
|
|
|
|
SCEVHandle Imm;
|
|
|
|
|
|
|
|
/// EmittedBase - The actual value* to use for the base value of this
|
|
|
|
/// operation. This is null if we should just use zero so far.
|
|
|
|
Value *EmittedBase;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
BasedUser(Instruction *I, Value *Op, const SCEVHandle &IMM)
|
|
|
|
: Inst(I), OperandValToReplace(Op), Imm(IMM), EmittedBase(0) {}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
|
|
|
|
// No need to compare these.
|
|
|
|
bool operator<(const BasedUser &BU) const { return 0; }
|
|
|
|
|
|
|
|
void dump() const;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
void BasedUser::dump() const {
|
|
|
|
std::cerr << " Imm=" << *Imm;
|
|
|
|
if (EmittedBase)
|
|
|
|
std::cerr << " EB=" << *EmittedBase;
|
|
|
|
|
|
|
|
std::cerr << " Inst: " << *Inst;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isTargetConstant - Return true if the following can be referenced by the
|
|
|
|
/// immediate field of a target instruction.
|
|
|
|
static bool isTargetConstant(const SCEVHandle &V) {
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// FIXME: Look at the target to decide if &GV is a legal constant immediate.
|
|
|
|
if (isa<SCEVConstant>(V)) return true;
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
return false; // ENABLE this for x86
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
|
|
|
|
if (CE->getOpcode() == Instruction::Cast)
|
|
|
|
if (isa<GlobalValue>(CE->getOperand(0)))
|
|
|
|
// FIXME: should check to see that the dest is uintptr_t!
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// GetImmediateValues - Look at Val, and pull out any additions of constants
|
|
|
|
/// that can fit into the immediate field of instructions in the target.
|
|
|
|
static SCEVHandle GetImmediateValues(SCEVHandle Val, bool isAddress) {
|
|
|
|
if (!isAddress)
|
|
|
|
return SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
|
|
|
if (isTargetConstant(Val))
|
|
|
|
return Val;
|
|
|
|
|
Teach loop-reduce to see into nested loops, to pull out immediate values
pushed down by SCEV.
In a nested loop case, this allows us to emit this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
li r3, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r2) ;; Uses offset of 8 instead of 0
stfd f0, 0(r2)
addi r4, r3, 1
addi r2, r2, 8
cmpwi cr0, r3, 100
or r3, r4, r4
bne .LBB_foo_2 ; no_exit.1
instead of this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
addi r3, r3, 8
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
llvm-svn: 22639
2005-08-04 07:44:42 +08:00
|
|
|
if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
|
2005-07-30 08:15:07 +08:00
|
|
|
unsigned i = 0;
|
|
|
|
for (; i != SAE->getNumOperands(); ++i)
|
|
|
|
if (isTargetConstant(SAE->getOperand(i))) {
|
|
|
|
SCEVHandle ImmVal = SAE->getOperand(i);
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// If there are any other immediates that we can handle here, pull them
|
|
|
|
// out too.
|
|
|
|
for (++i; i != SAE->getNumOperands(); ++i)
|
|
|
|
if (isTargetConstant(SAE->getOperand(i)))
|
|
|
|
ImmVal = SCEVAddExpr::get(ImmVal, SAE->getOperand(i));
|
|
|
|
return ImmVal;
|
|
|
|
}
|
Teach loop-reduce to see into nested loops, to pull out immediate values
pushed down by SCEV.
In a nested loop case, this allows us to emit this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
li r3, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r2) ;; Uses offset of 8 instead of 0
stfd f0, 0(r2)
addi r4, r3, 1
addi r2, r2, 8
cmpwi cr0, r3, 100
or r3, r4, r4
bne .LBB_foo_2 ; no_exit.1
instead of this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
addi r3, r3, 8
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
llvm-svn: 22639
2005-08-04 07:44:42 +08:00
|
|
|
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
|
|
|
|
// Try to pull immediates out of the start value of nested addrec's.
|
|
|
|
return GetImmediateValues(SARE->getStart(), isAddress);
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
|
|
|
|
/// stride of IV. All of the users may have different starting values, and this
|
|
|
|
/// may not be the only stride (we know it is if isOnlyStride is true).
|
|
|
|
void LoopStrengthReduce::StrengthReduceStridedIVUsers(Value *Stride,
|
2005-08-04 06:21:05 +08:00
|
|
|
IVUsersOfOneStride &Uses,
|
|
|
|
Loop *L,
|
2005-07-30 08:15:07 +08:00
|
|
|
bool isOnlyStride) {
|
|
|
|
// Transform our list of users and offsets to a bit more complex table. In
|
|
|
|
// this new vector, the first entry for each element is the base of the
|
|
|
|
// strided access, and the second is the BasedUser object for the use. We
|
|
|
|
// progressively move information from the first to the second entry, until we
|
|
|
|
// eventually emit the object.
|
|
|
|
std::vector<std::pair<SCEVHandle, BasedUser> > UsersToProcess;
|
|
|
|
UsersToProcess.reserve(Uses.Users.size());
|
2005-07-31 02:33:25 +08:00
|
|
|
|
|
|
|
SCEVHandle ZeroBase = SCEVUnknown::getIntegerSCEV(0,
|
2005-08-04 06:21:05 +08:00
|
|
|
Uses.Users[0].Offset->getType());
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i)
|
2005-08-04 06:21:05 +08:00
|
|
|
UsersToProcess.push_back(std::make_pair(Uses.Users[i].Offset,
|
|
|
|
BasedUser(Uses.Users[i].User,
|
|
|
|
Uses.Users[i].OperandValToReplace,
|
2005-07-30 08:15:07 +08:00
|
|
|
ZeroBase)));
|
|
|
|
|
|
|
|
// First pass, figure out what we can represent in the immediate fields of
|
|
|
|
// instructions. If we can represent anything there, move it to the imm
|
|
|
|
// fields of the BasedUsers.
|
|
|
|
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
|
|
|
|
bool isAddress = isa<LoadInst>(UsersToProcess[i].second.Inst) ||
|
|
|
|
isa<StoreInst>(UsersToProcess[i].second.Inst);
|
2005-07-31 02:33:25 +08:00
|
|
|
UsersToProcess[i].second.Imm = GetImmediateValues(UsersToProcess[i].first,
|
2005-07-30 08:15:07 +08:00
|
|
|
isAddress);
|
|
|
|
UsersToProcess[i].first = SCEV::getMinusSCEV(UsersToProcess[i].first,
|
|
|
|
UsersToProcess[i].second.Imm);
|
|
|
|
|
|
|
|
DEBUG(std::cerr << "BASE: " << *UsersToProcess[i].first);
|
|
|
|
DEBUG(UsersToProcess[i].second.dump());
|
2005-03-07 05:58:22 +08:00
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVExpander Rewriter(*SE, *LI);
|
|
|
|
BasicBlock *Preheader = L->getLoopPreheader();
|
|
|
|
Instruction *PreInsertPt = Preheader->getTerminator();
|
|
|
|
Instruction *PhiInsertBefore = L->getHeader()->begin();
|
|
|
|
|
2005-07-31 02:33:25 +08:00
|
|
|
assert(isa<PHINode>(PhiInsertBefore) &&
|
2005-07-30 08:15:07 +08:00
|
|
|
"How could this loop have IV's without any phis?");
|
|
|
|
PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
|
|
|
|
assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
|
|
|
|
"This loop isn't canonicalized right");
|
|
|
|
BasicBlock *LatchBlock =
|
|
|
|
SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-08-04 07:30:08 +08:00
|
|
|
DEBUG(std::cerr << "INSERTING IVs of STRIDE " << *Stride << ":\n");
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// FIXME: This loop needs increasing levels of intelligence.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// STAGE 0: just emit everything as its own base.
|
2005-07-30 08:15:07 +08:00
|
|
|
// STAGE 1: factor out common vars from bases, and try and push resulting
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// constants into Imm field. <-- We are here
|
2005-07-30 08:15:07 +08:00
|
|
|
// STAGE 2: factor out large constants to try and make more constants
|
|
|
|
// acceptable for target loads and stores.
|
|
|
|
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// Sort by the base value, so that all IVs with identical bases are next to
|
|
|
|
// each other.
|
|
|
|
std::sort(UsersToProcess.begin(), UsersToProcess.end());
|
2005-07-30 08:15:07 +08:00
|
|
|
while (!UsersToProcess.empty()) {
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
SCEVHandle Base = UsersToProcess.front().first;
|
2005-08-04 07:30:08 +08:00
|
|
|
|
|
|
|
DEBUG(std::cerr << " INSERTING PHI with BASE = " << *Base << ":\n");
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Create a new Phi for this base, and stick it in the loop header.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
const Type *ReplacedTy = Base->getType();
|
|
|
|
PHINode *NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-07-31 02:33:25 +08:00
|
|
|
// Emit the initial base value into the loop preheader, and add it to the
|
2005-07-30 08:15:07 +08:00
|
|
|
// Phi node.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
Value *BaseV = Rewriter.expandCodeFor(Base, PreInsertPt, ReplacedTy);
|
2005-07-30 08:15:07 +08:00
|
|
|
NewPHI->addIncoming(BaseV, Preheader);
|
|
|
|
|
|
|
|
// Emit the increment of the base value before the terminator of the loop
|
|
|
|
// latch block, and add it to the Phi node.
|
|
|
|
SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
|
|
|
SCEVUnknown::get(Stride));
|
|
|
|
|
|
|
|
Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(),
|
|
|
|
ReplacedTy);
|
|
|
|
IncV->setName(NewPHI->getName()+".inc");
|
|
|
|
NewPHI->addIncoming(IncV, LatchBlock);
|
|
|
|
|
|
|
|
// Emit the code to add the immediate offset to the Phi value, just before
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// the instructions that we identified as using this stride and base.
|
|
|
|
while (!UsersToProcess.empty() && UsersToProcess.front().first == Base) {
|
|
|
|
BasedUser &User = UsersToProcess.front().second;
|
|
|
|
|
|
|
|
// Clear the SCEVExpander's expression map so that we are guaranteed
|
|
|
|
// to have the code emitted where we expect it.
|
|
|
|
Rewriter.clear();
|
|
|
|
SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
|
|
|
User.Imm);
|
2005-08-04 07:30:08 +08:00
|
|
|
Value *Replaced = User.OperandValToReplace;
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
Value *newVal = Rewriter.expandCodeFor(NewValSCEV, User.Inst,
|
|
|
|
Replaced->getType());
|
|
|
|
|
|
|
|
// Replace the use of the operand Value with the new Phi we just created.
|
|
|
|
User.Inst->replaceUsesOfWith(Replaced, newVal);
|
2005-08-04 07:30:08 +08:00
|
|
|
DEBUG(std::cerr << " CHANGED: IMM =" << *User.Imm << " Inst = "
|
|
|
|
<< *User.Inst);
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
|
|
|
|
// Mark old value we replaced as possibly dead, so that it is elminated
|
|
|
|
// if we just replaced the last use of that value.
|
|
|
|
DeadInsts.insert(cast<Instruction>(Replaced));
|
|
|
|
|
|
|
|
UsersToProcess.erase(UsersToProcess.begin());
|
|
|
|
++NumReduced;
|
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
// TODO: Next, find out which base index is the most common, pull it out.
|
|
|
|
}
|
|
|
|
|
|
|
|
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
|
|
|
|
// different starting values, into different PHIs.
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// BEFORE writing this, it's probably useful to handle GEP's.
|
|
|
|
|
|
|
|
// NOTE: pull all constants together, for REG+IMM addressing, include &GV in
|
|
|
|
// 'IMM' if the target supports it.
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
void LoopStrengthReduce::runOnLoop(Loop *L) {
|
|
|
|
// First step, transform all loops nesting inside of this loop.
|
|
|
|
for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
|
|
|
|
runOnLoop(*I);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Next, find all uses of induction variables in this loop, and catagorize
|
|
|
|
// them by stride. Start by finding all of the PHI nodes in the header for
|
|
|
|
// this loop. If they are induction variables, inspect their uses.
|
|
|
|
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
|
|
|
|
AddUsersIfInteresting(I, L);
|
|
|
|
|
|
|
|
// If we have nothing to do, return.
|
|
|
|
//if (IVUsesByStride.empty()) return;
|
|
|
|
|
|
|
|
// FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
|
|
|
|
// doing computation in byte values, promote to 32-bit values if safe.
|
|
|
|
|
|
|
|
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
|
|
|
|
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
|
|
|
|
// codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need
|
|
|
|
// to be careful that IV's are all the same type. Only works for intptr_t
|
|
|
|
// indvars.
|
|
|
|
|
|
|
|
// If we only have one stride, we can more aggressively eliminate some things.
|
|
|
|
bool HasOneStride = IVUsesByStride.size() == 1;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
for (std::map<Value*, IVUsersOfOneStride>::iterator SI
|
|
|
|
= IVUsesByStride.begin(), E = IVUsesByStride.end(); SI != E; ++SI)
|
2005-07-30 08:15:07 +08:00
|
|
|
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
|
2004-10-19 05:08:22 +08:00
|
|
|
|
|
|
|
// Clean up after ourselves
|
|
|
|
if (!DeadInsts.empty()) {
|
|
|
|
DeleteTriviallyDeadInstructions(DeadInsts);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
BasicBlock::iterator I = L->getHeader()->begin();
|
|
|
|
PHINode *PN;
|
2005-08-02 10:44:31 +08:00
|
|
|
while ((PN = dyn_cast<PHINode>(I))) {
|
2005-08-02 08:41:11 +08:00
|
|
|
++I; // Preincrement iterator to avoid invalidating it when deleting PN.
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// At this point, we know that we have killed one or more GEP instructions.
|
|
|
|
// It is worth checking to see if the cann indvar is also dead, so that we
|
|
|
|
// can remove it as well. The requirements for the cann indvar to be
|
|
|
|
// considered dead are:
|
|
|
|
// 1. the cann indvar has one use
|
|
|
|
// 2. the use is an add instruction
|
|
|
|
// 3. the add has one use
|
|
|
|
// 4. the add is used by the cann indvar
|
|
|
|
// If all four cases above are true, then we can remove both the add and
|
|
|
|
// the cann indvar.
|
|
|
|
// FIXME: this needs to eliminate an induction variable even if it's being
|
|
|
|
// compared against some value to decide loop termination.
|
|
|
|
if (PN->hasOneUse()) {
|
|
|
|
BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
|
2005-08-02 10:52:02 +08:00
|
|
|
if (BO && BO->hasOneUse()) {
|
|
|
|
if (PN == *(BO->use_begin())) {
|
|
|
|
DeadInsts.insert(BO);
|
|
|
|
// Break the cycle, then delete the PHI.
|
|
|
|
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
|
2005-08-04 05:36:09 +08:00
|
|
|
SE->deleteInstructionFromRecords(PN);
|
2005-08-02 10:52:02 +08:00
|
|
|
PN->eraseFromParent();
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-08-02 10:52:02 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
DeleteTriviallyDeadInstructions(DeadInsts);
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
IVUsesByStride.clear();
|
2005-08-02 10:44:31 +08:00
|
|
|
CastedBasePointers.clear();
|
2005-07-30 08:15:07 +08:00
|
|
|
return;
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|