forked from OSchip/llvm-project
Commit a new LoopStrengthReduce pass that can use scalar evolutions and
target data to decide which loop induction variables to strength reduce and how to do so. This work is mostly by Chris Lattner, with tweaks by me to get it working on some of MultiSource. llvm-svn: 22558
This commit is contained in:
parent
2bca4d9b7b
commit
e68bcd1946
|
@ -13,9 +13,6 @@
|
|||
// access for the first iteration, and then creating a new GEP instruction in
|
||||
// the loop to increment the value by the appropriate amount.
|
||||
//
|
||||
// There are currently several deficiencies in the implementation, marked with
|
||||
// FIXME in the code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
|
@ -25,10 +22,13 @@
|
|||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <set>
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -50,11 +50,40 @@ namespace {
|
|||
std::map<Value *, GEPCache> Map;
|
||||
};
|
||||
|
||||
struct IVUse {
|
||||
/// Users - Keep track of all of the users of this stride as well as the
|
||||
/// initial value.
|
||||
std::vector<std::pair<SCEVHandle, Instruction*> > Users;
|
||||
std::vector<Instruction *> UserOperands;
|
||||
|
||||
void addUser(SCEVHandle &SH, Instruction *U, Instruction *V) {
|
||||
Users.push_back(std::make_pair(SH, U));
|
||||
UserOperands.push_back(V);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class LoopStrengthReduce : public FunctionPass {
|
||||
LoopInfo *LI;
|
||||
DominatorSet *DS;
|
||||
ScalarEvolution *SE;
|
||||
const TargetData *TD;
|
||||
const Type *UIntPtrTy;
|
||||
bool Changed;
|
||||
unsigned MaxTargetAMSize;
|
||||
|
||||
/// IVUsesByStride - Keep track of all uses of induction variables that we
|
||||
/// are interested in. The key of the map is the stride of the access.
|
||||
std::map<Value*, IVUse> IVUsesByStride;
|
||||
|
||||
/// CastedBasePointers - As we need to lower getelementptr instructions, we
|
||||
/// cast the pointer input to uintptr_t. This keeps track of the casted
|
||||
/// values for the pointers we have processed so far.
|
||||
std::map<Value*, Value*> CastedBasePointers;
|
||||
|
||||
/// DeadInsts - Keep track of instructions we may have made dead, so that
|
||||
/// we can remove them after we are done working.
|
||||
std::set<Instruction*> DeadInsts;
|
||||
public:
|
||||
LoopStrengthReduce(unsigned MTAMS = 1)
|
||||
: MaxTargetAMSize(MTAMS) {
|
||||
|
@ -63,6 +92,9 @@ namespace {
|
|||
virtual bool runOnFunction(Function &) {
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
DS = &getAnalysis<DominatorSet>();
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
TD = &getAnalysis<TargetData>();
|
||||
UIntPtrTy = TD->getIntPtrType();
|
||||
Changed = false;
|
||||
|
||||
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
|
||||
|
@ -76,9 +108,17 @@ namespace {
|
|||
AU.addRequired<LoopInfo>();
|
||||
AU.addRequired<DominatorSet>();
|
||||
AU.addRequired<TargetData>();
|
||||
AU.addRequired<ScalarEvolution>();
|
||||
}
|
||||
private:
|
||||
void runOnLoop(Loop *L);
|
||||
bool AddUsersIfInteresting(Instruction *I, Loop *L);
|
||||
void AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP, Instruction *I,
|
||||
Loop *L);
|
||||
|
||||
void StrengthReduceStridedIVUsers(Value *Stride, IVUse &Uses, Loop *L,
|
||||
bool isOnlyStride);
|
||||
|
||||
void strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
|
||||
GEPCache* GEPCache,
|
||||
Instruction *InsertBefore,
|
||||
|
@ -111,207 +151,505 @@ DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
|
|||
}
|
||||
}
|
||||
|
||||
void LoopStrengthReduce::strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L,
|
||||
GEPCache *Cache,
|
||||
Instruction *InsertBefore,
|
||||
std::set<Instruction*> &DeadInsts) {
|
||||
// We will strength reduce the GEP by splitting it into two parts. The first
|
||||
// is a GEP to hold the initial value of the non-strength-reduced GEP upon
|
||||
// entering the loop, which we will insert at the end of the loop preheader.
|
||||
// The second is a GEP to hold the incremented value of the initial GEP.
|
||||
// The LoopIndVarSimplify pass guarantees that loop counts start at zero, so
|
||||
// we will replace the indvar with a constant zero value to create the first
|
||||
// GEP.
|
||||
//
|
||||
// We currently only handle GEP instructions that consist of zero or more
|
||||
// constants or loop invariable expressions prior to an instance of the
|
||||
// canonical induction variable.
|
||||
unsigned indvar = 0;
|
||||
std::vector<Value *> pre_op_vector;
|
||||
std::vector<Value *> inc_op_vector;
|
||||
const Type *ty = GEPI->getOperand(0)->getType();
|
||||
Value *CanonicalIndVar = L->getCanonicalInductionVariable();
|
||||
BasicBlock *Header = L->getHeader();
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
bool AllConstantOperands = true;
|
||||
Cache = Cache->get(GEPI->getOperand(0));
|
||||
|
||||
for (unsigned op = 1, e = GEPI->getNumOperands(); op != e; ++op) {
|
||||
Value *operand = GEPI->getOperand(op);
|
||||
if (ty->getTypeID() == Type::StructTyID) {
|
||||
assert(isa<ConstantUInt>(operand));
|
||||
ConstantUInt *c = dyn_cast<ConstantUInt>(operand);
|
||||
ty = ty->getContainedType(unsigned(c->getValue()));
|
||||
} else {
|
||||
ty = ty->getContainedType(0);
|
||||
}
|
||||
/// CanReduceSCEV - Return true if we can strength reduce this scalar evolution
|
||||
/// in the specified loop.
|
||||
static bool CanReduceSCEV(const SCEVHandle &SH, Loop *L) {
|
||||
SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH);
|
||||
if (!AddRec || AddRec->getLoop() != L) return false;
|
||||
|
||||
if (operand == CanonicalIndVar) {
|
||||
// FIXME: use getCanonicalInductionVariableIncrement to choose between
|
||||
// one and neg one maybe? We need to support int *foo = GEP base, -1
|
||||
const Type *Ty = CanonicalIndVar->getType();
|
||||
pre_op_vector.push_back(Constant::getNullValue(Ty));
|
||||
inc_op_vector.push_back(ConstantInt::get(Ty, 1));
|
||||
indvar = op;
|
||||
break;
|
||||
} else if (isa<Argument>(operand)) {
|
||||
pre_op_vector.push_back(operand);
|
||||
AllConstantOperands = false;
|
||||
} else if (isa<Constant>(operand)) {
|
||||
pre_op_vector.push_back(operand);
|
||||
} else if (Instruction *inst = dyn_cast<Instruction>(operand)) {
|
||||
if (!DS->dominates(inst, Preheader->getTerminator()))
|
||||
return;
|
||||
pre_op_vector.push_back(operand);
|
||||
AllConstantOperands = false;
|
||||
} else {
|
||||
return; // Cannot handle this.
|
||||
}
|
||||
Cache = Cache->get(operand);
|
||||
}
|
||||
assert(indvar > 0 && "Indvar used by GEP not found in operand list");
|
||||
// FIXME: Generalize to non-affine IV's.
|
||||
if (!AddRec->isAffine()) return false;
|
||||
|
||||
// Ensure the pointer base is loop invariant. While strength reduction
|
||||
// makes sense even if the pointer changed on every iteration, there is no
|
||||
// realistic way of handling it unless GEPs were completely decomposed into
|
||||
// their constituent operations so we have explicit multiplications to work
|
||||
// with.
|
||||
if (Instruction *GepPtrOp = dyn_cast<Instruction>(GEPI->getOperand(0)))
|
||||
if (!DS->dominates(GepPtrOp, Preheader->getTerminator()))
|
||||
return;
|
||||
// FIXME: generalize to IV's with more complex strides (must emit stride
|
||||
// expression outside of loop!)
|
||||
if (isa<SCEVConstant>(AddRec->getOperand(1)))
|
||||
return true;
|
||||
|
||||
// Don't reduce multiplies that the target can handle via addressing modes.
|
||||
uint64_t sz = getAnalysis<TargetData>().getTypeSize(ty);
|
||||
if (sz && (sz & (sz-1)) == 0) // Power of two?
|
||||
if (sz <= (1ULL << (MaxTargetAMSize-1)))
|
||||
return;
|
||||
|
||||
// If all operands of the GEP we are going to insert into the preheader
|
||||
// are constants, generate a GEP ConstantExpr instead.
|
||||
//
|
||||
// If there is only one operand after the initial non-constant one, we know
|
||||
// that it was the induction variable, and has been replaced by a constant
|
||||
// null value. In this case, replace the GEP with a use of pointer directly.
|
||||
PHINode *NewPHI;
|
||||
if (Cache->CachedPHINode == 0) {
|
||||
Value *PreGEP;
|
||||
if (AllConstantOperands && isa<Constant>(GEPI->getOperand(0))) {
|
||||
Constant *C = dyn_cast<Constant>(GEPI->getOperand(0));
|
||||
PreGEP = ConstantExpr::getGetElementPtr(C, pre_op_vector);
|
||||
} else if (pre_op_vector.size() == 1) {
|
||||
PreGEP = GEPI->getOperand(0);
|
||||
} else {
|
||||
PreGEP = new GetElementPtrInst(GEPI->getOperand(0),
|
||||
pre_op_vector, GEPI->getName()+".pre",
|
||||
Preheader->getTerminator());
|
||||
}
|
||||
|
||||
// The next step of the strength reduction is to create a PHI that will
|
||||
// choose between the initial GEP we created and inserted into the
|
||||
// preheader, and the incremented GEP that we will create below and insert
|
||||
// into the loop body.
|
||||
NewPHI = new PHINode(PreGEP->getType(),
|
||||
GEPI->getName()+".str", InsertBefore);
|
||||
NewPHI->addIncoming(PreGEP, Preheader);
|
||||
|
||||
// Now, create the GEP instruction to increment by one the value selected
|
||||
// by the PHI instruction we just created above, and add it as the second
|
||||
// incoming Value/BasicBlock pair to the PHINode. It is inserted before
|
||||
// the increment of the canonical induction variable.
|
||||
Instruction *IncrInst =
|
||||
const_cast<Instruction*>(L->getCanonicalInductionVariableIncrement());
|
||||
GetElementPtrInst *StrGEP = new GetElementPtrInst(NewPHI, inc_op_vector,
|
||||
GEPI->getName()+".inc",
|
||||
IncrInst);
|
||||
pred_iterator PI = pred_begin(Header);
|
||||
if (*PI == Preheader)
|
||||
++PI;
|
||||
NewPHI->addIncoming(StrGEP, *PI);
|
||||
Cache->CachedPHINode = NewPHI;
|
||||
} else {
|
||||
// Reuse previously created pointer, as it is identical to the one we were
|
||||
// about to create.
|
||||
NewPHI = Cache->CachedPHINode;
|
||||
}
|
||||
|
||||
if (GEPI->getNumOperands() - 1 == indvar) {
|
||||
// If there were no operands following the induction variable, replace all
|
||||
// uses of the old GEP instruction with the new PHI.
|
||||
GEPI->replaceAllUsesWith(NewPHI);
|
||||
} else {
|
||||
// Create a new GEP instruction using the new PHI as the base. The
|
||||
// operands of the original GEP past the induction variable become
|
||||
// operands of this new GEP.
|
||||
std::vector<Value *> op_vector;
|
||||
const Type *Ty = CanonicalIndVar->getType();
|
||||
op_vector.push_back(Constant::getNullValue(Ty));
|
||||
for (unsigned op = indvar + 1; op < GEPI->getNumOperands(); op++)
|
||||
op_vector.push_back(GEPI->getOperand(op));
|
||||
GetElementPtrInst *newGEP = new GetElementPtrInst(NewPHI, op_vector,
|
||||
GEPI->getName() + ".lsr",
|
||||
GEPI);
|
||||
GEPI->replaceAllUsesWith(newGEP);
|
||||
}
|
||||
|
||||
// The old GEP is now dead.
|
||||
DeadInsts.insert(GEPI);
|
||||
++NumReduced;
|
||||
// We handle steps by unsigned values, because we know we won't have to insert
|
||||
// a cast for them.
|
||||
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(AddRec->getOperand(1)))
|
||||
if (SU->getValue()->getType()->isUnsigned())
|
||||
return true;
|
||||
|
||||
// Otherwise, no, we can't handle it yet.
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// GetAdjustedIndex - Adjust the specified GEP sequential type index to match
|
||||
/// the size of the pointer type, and scale it by the type size.
|
||||
static SCEVHandle GetAdjustedIndex(const SCEVHandle &Idx, uint64_t TySize,
|
||||
const Type *UIntPtrTy) {
|
||||
SCEVHandle Result = Idx;
|
||||
if (Result->getType()->getUnsignedVersion() != UIntPtrTy) {
|
||||
if (UIntPtrTy->getPrimitiveSize() < Result->getType()->getPrimitiveSize())
|
||||
Result = SCEVTruncateExpr::get(Result, UIntPtrTy);
|
||||
else
|
||||
Result = SCEVZeroExtendExpr::get(Result, UIntPtrTy);
|
||||
}
|
||||
|
||||
// This index is scaled by the type size being indexed.
|
||||
if (TySize != 1)
|
||||
Result = SCEVMulExpr::get(Result,
|
||||
SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
|
||||
TySize)));
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// AnalyzeGetElementPtrUsers - Analyze all of the users of the specified
|
||||
/// getelementptr instruction, adding them to the IVUsesByStride table. Note
|
||||
/// that we only want to analyze a getelementptr instruction once, and it can
|
||||
/// have multiple operands that are uses of the indvar (e.g. A[i][i]). Because
|
||||
/// of this, we only process a GEP instruction if its first recurrent operand is
|
||||
/// "op", otherwise we will either have already processed it or we will sometime
|
||||
/// later.
|
||||
void LoopStrengthReduce::AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP,
|
||||
Instruction *Op, Loop *L) {
|
||||
// Analyze all of the subscripts of this getelementptr instruction, looking
|
||||
// for uses that are determined by the trip count of L. First, skip all
|
||||
// operands the are not dependent on the IV.
|
||||
|
||||
// Build up the base expression. Insert an LLVM cast of the pointer to
|
||||
// uintptr_t first.
|
||||
Value *BasePtr;
|
||||
if (Constant *CB = dyn_cast<Constant>(GEP->getOperand(0)))
|
||||
BasePtr = ConstantExpr::getCast(CB, UIntPtrTy);
|
||||
else {
|
||||
Value *&BP = CastedBasePointers[GEP->getOperand(0)];
|
||||
if (BP == 0) {
|
||||
BasicBlock::iterator InsertPt;
|
||||
if (isa<Argument>(GEP->getOperand(0))) {
|
||||
InsertPt = GEP->getParent()->getParent()->begin()->begin();
|
||||
} else {
|
||||
InsertPt = cast<Instruction>(GEP->getOperand(0));
|
||||
if (InvokeInst *II = dyn_cast<InvokeInst>(GEP->getOperand(0)))
|
||||
InsertPt = II->getNormalDest()->begin();
|
||||
else
|
||||
++InsertPt;
|
||||
}
|
||||
BP = new CastInst(GEP->getOperand(0), UIntPtrTy,
|
||||
GEP->getOperand(0)->getName(), InsertPt);
|
||||
}
|
||||
BasePtr = BP;
|
||||
}
|
||||
|
||||
SCEVHandle Base = SCEVUnknown::get(BasePtr);
|
||||
|
||||
gep_type_iterator GTI = gep_type_begin(GEP);
|
||||
unsigned i = 1;
|
||||
for (; GEP->getOperand(i) != Op; ++i, ++GTI) {
|
||||
// If this is a use of a recurrence that we can analyze, and it comes before
|
||||
// Op does in the GEP operand list, we will handle this when we process this
|
||||
// operand.
|
||||
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
||||
const StructLayout *SL = TD->getStructLayout(STy);
|
||||
unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
|
||||
uint64_t Offset = SL->MemberOffsets[Idx];
|
||||
Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
|
||||
UIntPtrTy));
|
||||
} else {
|
||||
SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
|
||||
if (CanReduceSCEV(Idx, L))
|
||||
return;
|
||||
Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
|
||||
TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
|
||||
}
|
||||
}
|
||||
|
||||
// Get the index, convert it to intptr_t.
|
||||
SCEVHandle GEPIndexExpr =
|
||||
GetAdjustedIndex(SE->getSCEV(Op), TD->getTypeSize(GTI.getIndexedType()),
|
||||
UIntPtrTy);
|
||||
|
||||
// Process all remaining subscripts in the GEP instruction.
|
||||
for (++i, ++GTI; i != GEP->getNumOperands(); ++i, ++GTI)
|
||||
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
||||
const StructLayout *SL = TD->getStructLayout(STy);
|
||||
unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
|
||||
uint64_t Offset = SL->MemberOffsets[Idx];
|
||||
Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset,
|
||||
UIntPtrTy));
|
||||
} else {
|
||||
SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i));
|
||||
if (CanReduceSCEV(Idx, L)) { // Another IV subscript
|
||||
GEPIndexExpr = SCEVAddExpr::get(GEPIndexExpr,
|
||||
GetAdjustedIndex(Idx, TD->getTypeSize(GTI.getIndexedType()),
|
||||
UIntPtrTy));
|
||||
assert(CanReduceSCEV(GEPIndexExpr, L) &&
|
||||
"Cannot reduce the sum of two reducible SCEV's??");
|
||||
} else {
|
||||
Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx,
|
||||
TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy));
|
||||
}
|
||||
}
|
||||
|
||||
assert(CanReduceSCEV(GEPIndexExpr, L) && "Non reducible idx??");
|
||||
|
||||
Base = SCEVAddExpr::get(Base, cast<SCEVAddRecExpr>(GEPIndexExpr)->getStart());
|
||||
SCEVHandle Stride = cast<SCEVAddRecExpr>(GEPIndexExpr)->getOperand(1);
|
||||
|
||||
DEBUG(std::cerr << "GEP BASE : " << *Base << "\n");
|
||||
DEBUG(std::cerr << "GEP STRIDE: " << *Stride << "\n");
|
||||
|
||||
Value *Step = 0; // Step of ISE.
|
||||
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride))
|
||||
/// Always get the step value as an unsigned value.
|
||||
Step = ConstantExpr::getCast(SC->getValue(),
|
||||
SC->getValue()->getType()->getUnsignedVersion());
|
||||
else
|
||||
Step = cast<SCEVUnknown>(Stride)->getValue();
|
||||
assert(Step->getType()->isUnsigned() && "Bad step value!");
|
||||
|
||||
|
||||
// Now that we know the base and stride contributed by the GEP instruction,
|
||||
// process all users.
|
||||
for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
|
||||
UI != E; ++UI) {
|
||||
Instruction *User = cast<Instruction>(*UI);
|
||||
|
||||
// Do not infinitely recurse on PHI nodes.
|
||||
if (isa<PHINode>(User) && User->getParent() == L->getHeader())
|
||||
continue;
|
||||
|
||||
// If this is an instruction defined in a nested loop, or outside this loop,
|
||||
// don't mess with it.
|
||||
if (LI->getLoopFor(User->getParent()) != L)
|
||||
continue;
|
||||
|
||||
DEBUG(std::cerr << "FOUND USER: " << *User
|
||||
<< " OF STRIDE: " << *Step << " BASE = " << *Base << "\n");
|
||||
|
||||
|
||||
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
||||
// and decide what to do with it.
|
||||
IVUsesByStride[Step].addUser(Base, User, GEP);
|
||||
}
|
||||
}
|
||||
|
||||
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
|
||||
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
|
||||
/// return true. Otherwise, return false.
|
||||
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L) {
|
||||
if (I->getType() == Type::VoidTy) return false
|
||||
SCEVHandle ISE = SE->getSCEV(I);
|
||||
if (!CanReduceSCEV(ISE, L)) return false;
|
||||
|
||||
SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(ISE);
|
||||
SCEVHandle Start = AR->getStart();
|
||||
|
||||
// Get the step value, canonicalizing to an unsigned integer type so that
|
||||
// lookups in the map will match.
|
||||
Value *Step = 0; // Step of ISE.
|
||||
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getOperand(1)))
|
||||
/// Always get the step value as an unsigned value.
|
||||
Step = ConstantExpr::getCast(SC->getValue(),
|
||||
SC->getValue()->getType()->getUnsignedVersion());
|
||||
else
|
||||
Step = cast<SCEVUnknown>(AR->getOperand(1))->getValue();
|
||||
assert(Step->getType()->isUnsigned() && "Bad step value!");
|
||||
|
||||
std::set<GetElementPtrInst*> AnalyzedGEPs;
|
||||
|
||||
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
|
||||
Instruction *User = cast<Instruction>(*UI);
|
||||
|
||||
// Do not infinitely recurse on PHI nodes.
|
||||
if (isa<PHINode>(User) && User->getParent() == L->getHeader())
|
||||
continue;
|
||||
|
||||
// If this is an instruction defined in a nested loop, or outside this loop,
|
||||
// don't mess with it.
|
||||
if (LI->getLoopFor(User->getParent()) != L)
|
||||
continue;
|
||||
|
||||
// Next, see if this user is analyzable itself!
|
||||
if (!AddUsersIfInteresting(User, L)) {
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
|
||||
// If this is a getelementptr instruction, figure out what linear
|
||||
// expression of induction variable is actually being used.
|
||||
//
|
||||
if (AnalyzedGEPs.insert(GEP).second) // Not already analyzed?
|
||||
AnalyzeGetElementPtrUsers(GEP, I, L);
|
||||
} else {
|
||||
DEBUG(std::cerr << "FOUND USER: " << *User
|
||||
<< " OF SCEV: " << *ISE << "\n");
|
||||
|
||||
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
||||
// and decide what to do with it.
|
||||
IVUsesByStride[Step].addUser(Start, User, I);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// BasedUser - For a particular base value, keep information about how we've
|
||||
/// partitioned the expression so far.
|
||||
struct BasedUser {
|
||||
/// Inst - The instruction using the induction variable.
|
||||
Instruction *Inst;
|
||||
|
||||
/// Op - The value to replace with the EmittedBase.
|
||||
Value *Op;
|
||||
|
||||
/// Imm - The immediate value that should be added to the base immediately
|
||||
/// before Inst, because it will be folded into the imm field of the
|
||||
/// instruction.
|
||||
SCEVHandle Imm;
|
||||
|
||||
/// EmittedBase - The actual value* to use for the base value of this
|
||||
/// operation. This is null if we should just use zero so far.
|
||||
Value *EmittedBase;
|
||||
|
||||
BasedUser(Instruction *I, Value *V, const SCEVHandle &IMM)
|
||||
: Inst(I), Op(V), Imm(IMM), EmittedBase(0) {}
|
||||
|
||||
|
||||
// No need to compare these.
|
||||
bool operator<(const BasedUser &BU) const { return 0; }
|
||||
|
||||
void dump() const;
|
||||
};
|
||||
}
|
||||
|
||||
void BasedUser::dump() const {
|
||||
std::cerr << " Imm=" << *Imm;
|
||||
if (EmittedBase)
|
||||
std::cerr << " EB=" << *EmittedBase;
|
||||
|
||||
std::cerr << " Inst: " << *Inst;
|
||||
}
|
||||
|
||||
/// isTargetConstant - Return true if the following can be referenced by the
|
||||
/// immediate field of a target instruction.
|
||||
static bool isTargetConstant(const SCEVHandle &V) {
|
||||
|
||||
// FIXME: Look at the target to decide if &GV is a legal constant immediate.
|
||||
if (isa<SCEVConstant>(V)) return true;
|
||||
|
||||
return false; // ENABLE this for x86
|
||||
|
||||
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
|
||||
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
|
||||
if (CE->getOpcode() == Instruction::Cast)
|
||||
if (isa<GlobalValue>(CE->getOperand(0)))
|
||||
// FIXME: should check to see that the dest is uintptr_t!
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// GetImmediateValues - Look at Val, and pull out any additions of constants
|
||||
/// that can fit into the immediate field of instructions in the target.
|
||||
static SCEVHandle GetImmediateValues(SCEVHandle Val, bool isAddress) {
|
||||
if (!isAddress)
|
||||
return SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
||||
if (isTargetConstant(Val))
|
||||
return Val;
|
||||
|
||||
SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val);
|
||||
if (SAE) {
|
||||
unsigned i = 0;
|
||||
for (; i != SAE->getNumOperands(); ++i)
|
||||
if (isTargetConstant(SAE->getOperand(i))) {
|
||||
SCEVHandle ImmVal = SAE->getOperand(i);
|
||||
|
||||
// If there are any other immediates that we can handle here, pull them
|
||||
// out too.
|
||||
for (++i; i != SAE->getNumOperands(); ++i)
|
||||
if (isTargetConstant(SAE->getOperand(i)))
|
||||
ImmVal = SCEVAddExpr::get(ImmVal, SAE->getOperand(i));
|
||||
return ImmVal;
|
||||
}
|
||||
}
|
||||
|
||||
return SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
||||
}
|
||||
|
||||
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
|
||||
/// stride of IV. All of the users may have different starting values, and this
|
||||
/// may not be the only stride (we know it is if isOnlyStride is true).
|
||||
void LoopStrengthReduce::StrengthReduceStridedIVUsers(Value *Stride,
|
||||
IVUse &Uses, Loop *L,
|
||||
bool isOnlyStride) {
|
||||
// Transform our list of users and offsets to a bit more complex table. In
|
||||
// this new vector, the first entry for each element is the base of the
|
||||
// strided access, and the second is the BasedUser object for the use. We
|
||||
// progressively move information from the first to the second entry, until we
|
||||
// eventually emit the object.
|
||||
std::vector<std::pair<SCEVHandle, BasedUser> > UsersToProcess;
|
||||
UsersToProcess.reserve(Uses.Users.size());
|
||||
|
||||
SCEVHandle ZeroBase = SCEVUnknown::getIntegerSCEV(0,
|
||||
Uses.Users[0].first->getType());
|
||||
|
||||
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i)
|
||||
UsersToProcess.push_back(std::make_pair(Uses.Users[i].first,
|
||||
BasedUser(Uses.Users[i].second,
|
||||
Uses.UserOperands[i],
|
||||
ZeroBase)));
|
||||
|
||||
// First pass, figure out what we can represent in the immediate fields of
|
||||
// instructions. If we can represent anything there, move it to the imm
|
||||
// fields of the BasedUsers.
|
||||
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
|
||||
bool isAddress = isa<LoadInst>(UsersToProcess[i].second.Inst) ||
|
||||
isa<StoreInst>(UsersToProcess[i].second.Inst);
|
||||
UsersToProcess[i].second.Imm = GetImmediateValues(UsersToProcess[i].first,
|
||||
isAddress);
|
||||
UsersToProcess[i].first = SCEV::getMinusSCEV(UsersToProcess[i].first,
|
||||
UsersToProcess[i].second.Imm);
|
||||
|
||||
DEBUG(std::cerr << "BASE: " << *UsersToProcess[i].first);
|
||||
DEBUG(UsersToProcess[i].second.dump());
|
||||
}
|
||||
|
||||
SCEVExpander Rewriter(*SE, *LI);
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
Instruction *PreInsertPt = Preheader->getTerminator();
|
||||
Instruction *PhiInsertBefore = L->getHeader()->begin();
|
||||
|
||||
assert(isa<PHINode>(PhiInsertBefore) &&
|
||||
"How could this loop have IV's without any phis?");
|
||||
PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
|
||||
assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
|
||||
"This loop isn't canonicalized right");
|
||||
BasicBlock *LatchBlock =
|
||||
SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
|
||||
|
||||
// FIXME: This loop needs increasing levels of intelligence.
|
||||
// STAGE 0: just emit everything as its own base. <-- We are here
|
||||
// STAGE 1: factor out common vars from bases, and try and push resulting
|
||||
// constants into Imm field.
|
||||
// STAGE 2: factor out large constants to try and make more constants
|
||||
// acceptable for target loads and stores.
|
||||
std::sort(UsersToProcess.begin(), UsersToProcess.end());
|
||||
|
||||
while (!UsersToProcess.empty()) {
|
||||
// Create a new Phi for this base, and stick it in the loop header.
|
||||
Value *Replaced = UsersToProcess.front().second.Op;
|
||||
const Type *ReplacedTy = Replaced->getType();
|
||||
PHINode *NewPHI = new PHINode(ReplacedTy, Replaced->getName()+".str",
|
||||
PhiInsertBefore);
|
||||
|
||||
// Emit the initial base value into the loop preheader, and add it to the
|
||||
// Phi node.
|
||||
Value *BaseV = Rewriter.expandCodeFor(UsersToProcess.front().first,
|
||||
PreInsertPt, ReplacedTy);
|
||||
NewPHI->addIncoming(BaseV, Preheader);
|
||||
|
||||
// Emit the increment of the base value before the terminator of the loop
|
||||
// latch block, and add it to the Phi node.
|
||||
SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
||||
SCEVUnknown::get(Stride));
|
||||
|
||||
Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(),
|
||||
ReplacedTy);
|
||||
IncV->setName(NewPHI->getName()+".inc");
|
||||
NewPHI->addIncoming(IncV, LatchBlock);
|
||||
|
||||
// Emit the code to add the immediate offset to the Phi value, just before
|
||||
// the instruction that we identified as using this stride and base.
|
||||
// First, empty the SCEVExpander's expression map so that we are guaranteed
|
||||
// to have the code emitted where we expect it.
|
||||
Rewriter.clear();
|
||||
SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
||||
UsersToProcess.front().second.Imm);
|
||||
Value *newVal = Rewriter.expandCodeFor(NewValSCEV,
|
||||
UsersToProcess.front().second.Inst,
|
||||
ReplacedTy);
|
||||
|
||||
// Replace the use of the operand Value with the new Phi we just created.
|
||||
DEBUG(std::cerr << "REPLACING: " << *Replaced << "IN: " <<
|
||||
*UsersToProcess.front().second.Inst << "WITH: "<< *newVal << '\n');
|
||||
UsersToProcess.front().second.Inst->replaceUsesOfWith(Replaced, newVal);
|
||||
|
||||
// Mark old value we replaced as possibly dead, so that it is elminated
|
||||
// if we just replaced the last use of that value.
|
||||
DeadInsts.insert(cast<Instruction>(Replaced));
|
||||
|
||||
UsersToProcess.erase(UsersToProcess.begin());
|
||||
++NumReduced;
|
||||
|
||||
// TODO: Next, find out which base index is the most common, pull it out.
|
||||
}
|
||||
|
||||
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
|
||||
// different starting values, into different PHIs.
|
||||
|
||||
// BEFORE writing this, it's probably useful to handle GEP's.
|
||||
|
||||
// NOTE: pull all constants together, for REG+IMM addressing, include &GV in
|
||||
// 'IMM' if the target supports it.
|
||||
}
|
||||
|
||||
|
||||
void LoopStrengthReduce::runOnLoop(Loop *L) {
|
||||
// First step, transform all loops nesting inside of this loop.
|
||||
for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
|
||||
runOnLoop(*I);
|
||||
|
||||
// Next, get the first PHINode since it is guaranteed to be the canonical
|
||||
// induction variable for the loop by the preceding IndVarSimplify pass.
|
||||
PHINode *PN = L->getCanonicalInductionVariable();
|
||||
if (0 == PN)
|
||||
return;
|
||||
// Next, find all uses of induction variables in this loop, and catagorize
|
||||
// them by stride. Start by finding all of the PHI nodes in the header for
|
||||
// this loop. If they are induction variables, inspect their uses.
|
||||
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
|
||||
AddUsersIfInteresting(I, L);
|
||||
|
||||
// FIXME: Need to use SCEV to detect GEP uses of the indvar, since indvars
|
||||
// pass creates code like this, which we can't currently detect:
|
||||
// %tmp.1 = sub uint 2000, %indvar
|
||||
// %tmp.8 = getelementptr int* %y, uint %tmp.1
|
||||
// If we have nothing to do, return.
|
||||
//if (IVUsesByStride.empty()) return;
|
||||
|
||||
// Strength reduce all GEPs in the Loop. Insert secondary PHI nodes for the
|
||||
// strength reduced pointers we'll be creating after the canonical induction
|
||||
// variable's PHI.
|
||||
std::set<Instruction*> DeadInsts;
|
||||
GEPCache Cache;
|
||||
for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
|
||||
UI != UE; ++UI)
|
||||
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
|
||||
strengthReduceGEP(GEPI, L, &Cache, PN->getNext(), DeadInsts);
|
||||
// FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
|
||||
// doing computation in byte values, promote to 32-bit values if safe.
|
||||
|
||||
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
|
||||
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
|
||||
// codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need
|
||||
// to be careful that IV's are all the same type. Only works for intptr_t
|
||||
// indvars.
|
||||
|
||||
// If we only have one stride, we can more aggressively eliminate some things.
|
||||
bool HasOneStride = IVUsesByStride.size() == 1;
|
||||
|
||||
for (std::map<Value*, IVUse>::iterator SI = IVUsesByStride.begin(),
|
||||
E = IVUsesByStride.end(); SI != E; ++SI)
|
||||
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
|
||||
|
||||
// Clean up after ourselves
|
||||
if (!DeadInsts.empty()) {
|
||||
DeleteTriviallyDeadInstructions(DeadInsts);
|
||||
|
||||
// At this point, we know that we have killed one or more GEP instructions.
|
||||
// It is worth checking to see if the cann indvar is also dead, so that we
|
||||
// can remove it as well. The requirements for the cann indvar to be
|
||||
// considered dead are:
|
||||
// 1. the cann indvar has one use
|
||||
// 2. the use is an add instruction
|
||||
// 3. the add has one use
|
||||
// 4. the add is used by the cann indvar
|
||||
// If all four cases above are true, then we can remove both the add and
|
||||
// the cann indvar.
|
||||
// FIXME: this needs to eliminate an induction variable even if it's being
|
||||
// compared against some value to decide loop termination.
|
||||
if (PN->hasOneUse()) {
|
||||
BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
|
||||
if (BO && BO->getOpcode() == Instruction::Add)
|
||||
if (BO->hasOneUse()) {
|
||||
if (PN == dyn_cast<PHINode>(*(BO->use_begin()))) {
|
||||
DeadInsts.insert(BO);
|
||||
// Break the cycle, then delete the PHI.
|
||||
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
|
||||
PN->eraseFromParent();
|
||||
DeleteTriviallyDeadInstructions(DeadInsts);
|
||||
BasicBlock::iterator I = L->getHeader()->begin();
|
||||
PHINode *PN;
|
||||
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
|
||||
// At this point, we know that we have killed one or more GEP instructions.
|
||||
// It is worth checking to see if the cann indvar is also dead, so that we
|
||||
// can remove it as well. The requirements for the cann indvar to be
|
||||
// considered dead are:
|
||||
// 1. the cann indvar has one use
|
||||
// 2. the use is an add instruction
|
||||
// 3. the add has one use
|
||||
// 4. the add is used by the cann indvar
|
||||
// If all four cases above are true, then we can remove both the add and
|
||||
// the cann indvar.
|
||||
// FIXME: this needs to eliminate an induction variable even if it's being
|
||||
// compared against some value to decide loop termination.
|
||||
if (PN->hasOneUse()) {
|
||||
BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
|
||||
if (BO && BO->getOpcode() == Instruction::Add)
|
||||
if (BO->hasOneUse()) {
|
||||
if (PN == dyn_cast<PHINode>(*(BO->use_begin()))) {
|
||||
DeadInsts.insert(BO);
|
||||
// Break the cycle, then delete the PHI.
|
||||
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
DeleteTriviallyDeadInstructions(DeadInsts);
|
||||
}
|
||||
|
||||
IVUsesByStride.clear();
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue