2004-10-19 05:08:22 +08:00
|
|
|
//===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2004-10-19 05:08:22 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file was developed by Nate Begeman and is distributed under the
|
|
|
|
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2004-10-19 05:08:22 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass performs a strength reduction on array references inside loops that
|
|
|
|
// have as one or more of their components the loop induction variable. This is
|
|
|
|
// accomplished by creating a new Value to hold the initial value of the array
|
|
|
|
// access for the first iteration, and then creating a new GEP instruction in
|
|
|
|
// the loop to increment the value by the appropriate amount.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2005-08-04 07:30:08 +08:00
|
|
|
#define DEBUG_TYPE "loop-reduce"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
|
|
|
#include "llvm/Constants.h"
|
|
|
|
#include "llvm/Instructions.h"
|
|
|
|
#include "llvm/Type.h"
|
2005-03-04 12:04:26 +08:00
|
|
|
#include "llvm/DerivedTypes.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Analysis/Dominators.h"
|
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Support/CFG.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2005-03-04 12:04:26 +08:00
|
|
|
#include "llvm/Target/TargetData.h"
|
2004-10-19 05:08:22 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2005-07-30 08:15:07 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2005-07-31 02:22:27 +08:00
|
|
|
#include <algorithm>
|
2004-10-19 05:08:22 +08:00
|
|
|
#include <set>
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced");
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
/// IVStrideUse - Keep track of one use of a strided induction variable, where
|
|
|
|
/// the stride is stored externally. The Offset member keeps track of the
|
|
|
|
/// offset from the IV, User is the actual user of the operand, and 'Operand'
|
|
|
|
/// is the operand # of the User that is the use.
|
|
|
|
struct IVStrideUse {
|
|
|
|
SCEVHandle Offset;
|
|
|
|
Instruction *User;
|
|
|
|
Value *OperandValToReplace;
|
|
|
|
|
|
|
|
IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
|
|
|
|
: Offset(Offs), User(U), OperandValToReplace(O) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// IVUsersOfOneStride - This structure keeps track of all instructions that
|
|
|
|
/// have an operand that is based on the trip count multiplied by some stride.
|
|
|
|
/// The stride for all of these users is common and kept external to this
|
|
|
|
/// structure.
|
|
|
|
struct IVUsersOfOneStride {
|
2005-07-30 08:15:07 +08:00
|
|
|
/// Users - Keep track of all of the users of this stride as well as the
|
2005-08-04 06:21:05 +08:00
|
|
|
/// initial value and the operand that uses the IV.
|
|
|
|
std::vector<IVStrideUse> Users;
|
|
|
|
|
|
|
|
void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
|
|
|
|
Users.push_back(IVStrideUse(Offset, User, Operand));
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
class LoopStrengthReduce : public FunctionPass {
|
|
|
|
LoopInfo *LI;
|
|
|
|
DominatorSet *DS;
|
2005-07-30 08:15:07 +08:00
|
|
|
ScalarEvolution *SE;
|
|
|
|
const TargetData *TD;
|
|
|
|
const Type *UIntPtrTy;
|
2004-10-19 05:08:22 +08:00
|
|
|
bool Changed;
|
2005-08-02 10:52:02 +08:00
|
|
|
|
|
|
|
/// MaxTargetAMSize - This is the maximum power-of-two scale value that the
|
|
|
|
/// target can handle for free with its addressing modes.
|
2005-03-04 12:04:26 +08:00
|
|
|
unsigned MaxTargetAMSize;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// IVUsesByStride - Keep track of all uses of induction variables that we
|
|
|
|
/// are interested in. The key of the map is the stride of the access.
|
2005-08-04 06:21:05 +08:00
|
|
|
std::map<Value*, IVUsersOfOneStride> IVUsesByStride;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-08-04 09:19:13 +08:00
|
|
|
/// CastedValues - As we need to cast values to uintptr_t, this keeps track
|
|
|
|
/// of the casted version of each value. This is accessed by
|
|
|
|
/// getCastedVersionOf.
|
|
|
|
std::map<Value*, Value*> CastedPointers;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// DeadInsts - Keep track of instructions we may have made dead, so that
|
|
|
|
/// we can remove them after we are done working.
|
|
|
|
std::set<Instruction*> DeadInsts;
|
2004-10-19 05:08:22 +08:00
|
|
|
public:
|
2005-03-04 12:04:26 +08:00
|
|
|
LoopStrengthReduce(unsigned MTAMS = 1)
|
|
|
|
: MaxTargetAMSize(MTAMS) {
|
|
|
|
}
|
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
virtual bool runOnFunction(Function &) {
|
|
|
|
LI = &getAnalysis<LoopInfo>();
|
|
|
|
DS = &getAnalysis<DominatorSet>();
|
2005-07-30 08:15:07 +08:00
|
|
|
SE = &getAnalysis<ScalarEvolution>();
|
|
|
|
TD = &getAnalysis<TargetData>();
|
|
|
|
UIntPtrTy = TD->getIntPtrType();
|
2004-10-19 05:08:22 +08:00
|
|
|
Changed = false;
|
|
|
|
|
|
|
|
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
|
|
|
|
runOnLoop(*I);
|
2005-08-04 09:19:13 +08:00
|
|
|
|
|
|
|
CastedPointers.clear();
|
2004-10-19 05:08:22 +08:00
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.setPreservesCFG();
|
2005-02-28 03:37:07 +08:00
|
|
|
AU.addRequiredID(LoopSimplifyID);
|
2004-10-19 05:08:22 +08:00
|
|
|
AU.addRequired<LoopInfo>();
|
|
|
|
AU.addRequired<DominatorSet>();
|
2005-03-04 12:04:26 +08:00
|
|
|
AU.addRequired<TargetData>();
|
2005-07-30 08:15:07 +08:00
|
|
|
AU.addRequired<ScalarEvolution>();
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-08-04 09:19:13 +08:00
|
|
|
|
|
|
|
/// getCastedVersionOf - Return the specified value casted to uintptr_t.
|
|
|
|
///
|
|
|
|
Value *getCastedVersionOf(Value *V);
|
|
|
|
private:
|
2004-10-19 05:08:22 +08:00
|
|
|
void runOnLoop(Loop *L);
|
2005-08-05 01:40:30 +08:00
|
|
|
bool AddUsersIfInteresting(Instruction *I, Loop *L,
|
|
|
|
std::set<Instruction*> &Processed);
|
|
|
|
SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
void StrengthReduceStridedIVUsers(Value *Stride, IVUsersOfOneStride &Uses,
|
|
|
|
Loop *L, bool isOnlyStride);
|
2004-10-19 05:08:22 +08:00
|
|
|
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
|
|
|
|
};
|
2005-04-22 07:48:37 +08:00
|
|
|
RegisterOpt<LoopStrengthReduce> X("loop-reduce",
|
2004-10-19 05:08:22 +08:00
|
|
|
"Strength Reduce GEP Uses of Ind. Vars");
|
|
|
|
}
|
|
|
|
|
2005-03-04 12:04:26 +08:00
|
|
|
FunctionPass *llvm::createLoopStrengthReducePass(unsigned MaxTargetAMSize) {
|
|
|
|
return new LoopStrengthReduce(MaxTargetAMSize);
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
|
|
|
|
2005-08-04 09:19:13 +08:00
|
|
|
/// getCastedVersionOf - Return the specified value casted to uintptr_t.
|
|
|
|
///
|
|
|
|
Value *LoopStrengthReduce::getCastedVersionOf(Value *V) {
|
|
|
|
if (V->getType() == UIntPtrTy) return V;
|
|
|
|
if (Constant *CB = dyn_cast<Constant>(V))
|
|
|
|
return ConstantExpr::getCast(CB, UIntPtrTy);
|
|
|
|
|
|
|
|
Value *&New = CastedPointers[V];
|
|
|
|
if (New) return New;
|
|
|
|
|
|
|
|
BasicBlock::iterator InsertPt;
|
|
|
|
if (Argument *Arg = dyn_cast<Argument>(V)) {
|
|
|
|
// Insert into the entry of the function, after any allocas.
|
|
|
|
InsertPt = Arg->getParent()->begin()->begin();
|
|
|
|
while (isa<AllocaInst>(InsertPt)) ++InsertPt;
|
|
|
|
} else {
|
|
|
|
if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
|
|
|
|
InsertPt = II->getNormalDest()->begin();
|
|
|
|
} else {
|
|
|
|
InsertPt = cast<Instruction>(V);
|
|
|
|
++InsertPt;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do not insert casts into the middle of PHI node blocks.
|
|
|
|
while (isa<PHINode>(InsertPt)) ++InsertPt;
|
|
|
|
}
|
2005-08-05 03:08:16 +08:00
|
|
|
|
|
|
|
New = new CastInst(V, UIntPtrTy, V->getName(), InsertPt);
|
|
|
|
DeadInsts.insert(cast<Instruction>(New));
|
|
|
|
return New;
|
2005-08-04 09:19:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
|
|
|
|
/// specified set are trivially dead, delete them and see if this makes any of
|
|
|
|
/// their operands subsequently dead.
|
|
|
|
void LoopStrengthReduce::
|
|
|
|
DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
|
|
|
|
while (!Insts.empty()) {
|
|
|
|
Instruction *I = *Insts.begin();
|
|
|
|
Insts.erase(Insts.begin());
|
|
|
|
if (isInstructionTriviallyDead(I)) {
|
2005-03-01 11:46:11 +08:00
|
|
|
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
|
|
|
|
if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
|
|
|
|
Insts.insert(U);
|
2005-08-04 05:36:09 +08:00
|
|
|
SE->deleteInstructionFromRecords(I);
|
|
|
|
I->eraseFromParent();
|
2004-10-19 05:08:22 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-08-05 01:40:30 +08:00
|
|
|
/// GetExpressionSCEV - Compute and return the SCEV for the specified
|
|
|
|
/// instruction.
|
|
|
|
SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) {
|
|
|
|
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp);
|
|
|
|
if (!GEP)
|
|
|
|
return SE->getSCEV(Exp);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Analyze all of the subscripts of this getelementptr instruction, looking
|
|
|
|
// for uses that are determined by the trip count of L. First, skip all
|
|
|
|
// operands the are not dependent on the IV.
|
|
|
|
|
|
|
|
// Build up the base expression. Insert an LLVM cast of the pointer to
|
|
|
|
// uintptr_t first.
|
2005-08-05 01:40:30 +08:00
|
|
|
SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0)));
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
gep_type_iterator GTI = gep_type_begin(GEP);
|
2005-08-05 01:40:30 +08:00
|
|
|
|
|
|
|
for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
|
2005-07-30 08:15:07 +08:00
|
|
|
// If this is a use of a recurrence that we can analyze, and it comes before
|
|
|
|
// Op does in the GEP operand list, we will handle this when we process this
|
|
|
|
// operand.
|
|
|
|
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
|
|
|
|
const StructLayout *SL = TD->getStructLayout(STy);
|
|
|
|
unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
|
|
|
|
uint64_t Offset = SL->MemberOffsets[Idx];
|
2005-08-05 01:40:30 +08:00
|
|
|
GEPVal = SCEVAddExpr::get(GEPVal,
|
|
|
|
SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy));
|
2005-03-07 06:52:29 +08:00
|
|
|
} else {
|
2005-08-05 03:08:16 +08:00
|
|
|
Value *OpVal = getCastedVersionOf(GEP->getOperand(i));
|
|
|
|
SCEVHandle Idx = SE->getSCEV(OpVal);
|
|
|
|
|
2005-08-05 01:40:30 +08:00
|
|
|
uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType());
|
|
|
|
if (TypeSize != 1)
|
|
|
|
Idx = SCEVMulExpr::get(Idx,
|
|
|
|
SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
|
|
|
|
TypeSize)));
|
|
|
|
GEPVal = SCEVAddExpr::get(GEPVal, Idx);
|
2005-03-07 06:52:29 +08:00
|
|
|
}
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-08-05 01:40:30 +08:00
|
|
|
return GEPVal;
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
|
2005-08-05 03:08:16 +08:00
|
|
|
/// getSCEVStartAndStride - Compute the start and stride of this expression,
|
|
|
|
/// returning false if the expression is not a start/stride pair, or true if it
|
|
|
|
/// is. The stride must be a loop invariant expression, but the start may be
|
|
|
|
/// a mix of loop invariant and loop variant expressions.
|
|
|
|
static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
|
|
|
|
SCEVHandle &Start, Value *&Stride) {
|
|
|
|
SCEVHandle TheAddRec = Start; // Initialize to zero.
|
|
|
|
|
|
|
|
// If the outer level is an AddExpr, the operands are all start values except
|
|
|
|
// for a nested AddRecExpr.
|
|
|
|
if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
|
|
|
|
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
|
|
|
|
if (SCEVAddRecExpr *AddRec =
|
|
|
|
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
|
|
|
|
if (AddRec->getLoop() == L)
|
|
|
|
TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec);
|
|
|
|
else
|
|
|
|
return false; // Nested IV of some sort?
|
|
|
|
} else {
|
|
|
|
Start = SCEVAddExpr::get(Start, AE->getOperand(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH)) {
|
|
|
|
TheAddRec = SH;
|
|
|
|
} else {
|
|
|
|
return false; // not analyzable.
|
|
|
|
}
|
|
|
|
|
|
|
|
SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
|
|
|
|
if (!AddRec || AddRec->getLoop() != L) return false;
|
|
|
|
|
|
|
|
// FIXME: Generalize to non-affine IV's.
|
|
|
|
if (!AddRec->isAffine()) return false;
|
|
|
|
|
|
|
|
Start = SCEVAddExpr::get(Start, AddRec->getOperand(0));
|
|
|
|
|
|
|
|
// FIXME: generalize to IV's with more complex strides (must emit stride
|
|
|
|
// expression outside of loop!)
|
|
|
|
if (!isa<SCEVConstant>(AddRec->getOperand(1)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
SCEVConstant *StrideC = cast<SCEVConstant>(AddRec->getOperand(1));
|
|
|
|
Stride = StrideC->getValue();
|
|
|
|
|
|
|
|
assert(Stride->getType()->isUnsigned() &&
|
|
|
|
"Constants should be canonicalized to unsigned!");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
|
|
|
|
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
|
|
|
|
/// return true. Otherwise, return false.
|
2005-08-05 01:40:30 +08:00
|
|
|
bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
|
|
|
|
std::set<Instruction*> &Processed) {
|
2005-07-30 08:21:31 +08:00
|
|
|
if (I->getType() == Type::VoidTy) return false;
|
2005-08-05 01:40:30 +08:00
|
|
|
if (!Processed.insert(I).second)
|
|
|
|
return true; // Instruction already handled.
|
|
|
|
|
2005-08-05 03:08:16 +08:00
|
|
|
// Get the symbolic expression for this instruction.
|
2005-08-05 01:40:30 +08:00
|
|
|
SCEVHandle ISE = GetExpressionSCEV(I, L);
|
2005-08-05 03:08:16 +08:00
|
|
|
if (isa<SCEVCouldNotCompute>(ISE)) return false;
|
|
|
|
|
|
|
|
// Get the start and stride for this expression.
|
|
|
|
SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType());
|
|
|
|
Value *Stride = 0;
|
|
|
|
if (!getSCEVStartAndStride(ISE, L, Start, Stride))
|
|
|
|
return false; // Non-reducible symbolic expression, bail out.
|
2005-08-05 01:40:30 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
|
|
|
|
Instruction *User = cast<Instruction>(*UI);
|
|
|
|
|
|
|
|
// Do not infinitely recurse on PHI nodes.
|
|
|
|
if (isa<PHINode>(User) && User->getParent() == L->getHeader())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// If this is an instruction defined in a nested loop, or outside this loop,
|
When processing outer loops and we find uses of an IV in inner loops, make
sure to handle the use, just don't recurse into it.
This permits us to generate this code for a simple nested loop case:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r29, 44(r1)
stw r30, 40(r1)
mflr r11
stw r11, 56(r1)
lis r2, ha16(L_A$non_lazy_ptr)
lwz r30, lo16(L_A$non_lazy_ptr)(r2)
li r29, 1
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
li r2, 1
or r3, r30, r30
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r3)
stfd f0, 0(r3)
addi r4, r2, 1
addi r3, r3, 8
cmpwi cr0, r2, 100
or r2, r4, r4
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r30, r30, 800
addi r2, r29, 1
cmpwi cr0, r29, 100
or r29, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 40(r1)
lwz r29, 44(r1)
lwz r1, 0(r1)
blr
instead of this:
_foo:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r28, 44(r1) ;; uses an extra register.
stw r29, 40(r1)
stw r30, 36(r1)
mflr r11
stw r11, 56(r1)
li r30, 1
li r29, 0
or r28, r29, r29
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
mulli r2, r28, 800 ;; unstrength-reduced multiply
lis r3, ha16(L_A$non_lazy_ptr) ;; loop invariant address computation
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
mulli r4, r29, 800 ;; unstrength-reduced multiply
addi r3, r3, 8
add r3, r4, r3
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8 ;; multiple stride 8 IV's
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r28, r28, 1 ;;; Many IV's with stride 1
addi r29, r29, 1
addi r2, r30, 1
cmpwi cr0, r30, 100
or r30, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 36(r1)
lwz r29, 40(r1)
lwz r28, 44(r1)
lwz r1, 0(r1)
blr
llvm-svn: 22640
2005-08-04 08:14:11 +08:00
|
|
|
// don't recurse into it.
|
2005-08-05 03:08:16 +08:00
|
|
|
bool AddUserToIVUsers = false;
|
When processing outer loops and we find uses of an IV in inner loops, make
sure to handle the use, just don't recurse into it.
This permits us to generate this code for a simple nested loop case:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r29, 44(r1)
stw r30, 40(r1)
mflr r11
stw r11, 56(r1)
lis r2, ha16(L_A$non_lazy_ptr)
lwz r30, lo16(L_A$non_lazy_ptr)(r2)
li r29, 1
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
li r2, 1
or r3, r30, r30
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r3)
stfd f0, 0(r3)
addi r4, r2, 1
addi r3, r3, 8
cmpwi cr0, r2, 100
or r2, r4, r4
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r30, r30, 800
addi r2, r29, 1
cmpwi cr0, r29, 100
or r29, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 40(r1)
lwz r29, 44(r1)
lwz r1, 0(r1)
blr
instead of this:
_foo:
.LBB_foo_0: ; entry
stwu r1, -48(r1)
stw r28, 44(r1) ;; uses an extra register.
stw r29, 40(r1)
stw r30, 36(r1)
mflr r11
stw r11, 56(r1)
li r30, 1
li r29, 0
or r28, r29, r29
.LBB_foo_1: ; no_exit.0
bl L_bar$stub
mulli r2, r28, 800 ;; unstrength-reduced multiply
lis r3, ha16(L_A$non_lazy_ptr) ;; loop invariant address computation
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
mulli r4, r29, 800 ;; unstrength-reduced multiply
addi r3, r3, 8
add r3, r4, r3
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8 ;; multiple stride 8 IV's
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
.LBB_foo_3: ; loopexit.1
addi r28, r28, 1 ;;; Many IV's with stride 1
addi r29, r29, 1
addi r2, r30, 1
cmpwi cr0, r30, 100
or r30, r2, r2
bne .LBB_foo_1 ; no_exit.0
.LBB_foo_4: ; return
lwz r11, 56(r1)
mtlr r11
lwz r30, 36(r1)
lwz r29, 40(r1)
lwz r28, 44(r1)
lwz r1, 0(r1)
blr
llvm-svn: 22640
2005-08-04 08:14:11 +08:00
|
|
|
if (LI->getLoopFor(User->getParent()) != L) {
|
|
|
|
DEBUG(std::cerr << "FOUND USER in nested loop: " << *User
|
|
|
|
<< " OF SCEV: " << *ISE << "\n");
|
2005-08-05 03:08:16 +08:00
|
|
|
AddUserToIVUsers = true;
|
2005-08-05 01:40:30 +08:00
|
|
|
} else if (!AddUsersIfInteresting(User, L, Processed)) {
|
2005-08-04 08:40:47 +08:00
|
|
|
DEBUG(std::cerr << "FOUND USER: " << *User
|
|
|
|
<< " OF SCEV: " << *ISE << "\n");
|
2005-08-05 03:08:16 +08:00
|
|
|
AddUserToIVUsers = true;
|
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2005-08-05 03:08:16 +08:00
|
|
|
if (AddUserToIVUsers) {
|
2005-08-04 08:40:47 +08:00
|
|
|
// Okay, we found a user that we cannot reduce. Analyze the instruction
|
|
|
|
// and decide what to do with it.
|
2005-08-05 03:08:16 +08:00
|
|
|
IVUsesByStride[Stride].addUser(Start, User, I);
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// BasedUser - For a particular base value, keep information about how we've
|
|
|
|
/// partitioned the expression so far.
|
|
|
|
struct BasedUser {
|
|
|
|
/// Inst - The instruction using the induction variable.
|
|
|
|
Instruction *Inst;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
/// OperandValToReplace - The operand value of Inst to replace with the
|
|
|
|
/// EmittedBase.
|
|
|
|
Value *OperandValToReplace;
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
/// Imm - The immediate value that should be added to the base immediately
|
|
|
|
/// before Inst, because it will be folded into the imm field of the
|
|
|
|
/// instruction.
|
|
|
|
SCEVHandle Imm;
|
|
|
|
|
|
|
|
/// EmittedBase - The actual value* to use for the base value of this
|
|
|
|
/// operation. This is null if we should just use zero so far.
|
|
|
|
Value *EmittedBase;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
BasedUser(Instruction *I, Value *Op, const SCEVHandle &IMM)
|
|
|
|
: Inst(I), OperandValToReplace(Op), Imm(IMM), EmittedBase(0) {}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
|
|
|
|
// No need to compare these.
|
|
|
|
bool operator<(const BasedUser &BU) const { return 0; }
|
|
|
|
|
|
|
|
void dump() const;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
void BasedUser::dump() const {
|
|
|
|
std::cerr << " Imm=" << *Imm;
|
|
|
|
if (EmittedBase)
|
|
|
|
std::cerr << " EB=" << *EmittedBase;
|
|
|
|
|
|
|
|
std::cerr << " Inst: " << *Inst;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isTargetConstant - Return true if the following can be referenced by the
|
|
|
|
/// immediate field of a target instruction.
|
|
|
|
static bool isTargetConstant(const SCEVHandle &V) {
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// FIXME: Look at the target to decide if &GV is a legal constant immediate.
|
|
|
|
if (isa<SCEVConstant>(V)) return true;
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
return false; // ENABLE this for x86
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
|
|
|
|
if (CE->getOpcode() == Instruction::Cast)
|
|
|
|
if (isa<GlobalValue>(CE->getOperand(0)))
|
|
|
|
// FIXME: should check to see that the dest is uintptr_t!
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// GetImmediateValues - Look at Val, and pull out any additions of constants
|
|
|
|
/// that can fit into the immediate field of instructions in the target.
|
2005-08-05 03:08:16 +08:00
|
|
|
static SCEVHandle GetImmediateValues(SCEVHandle Val, bool isAddress, Loop *L) {
|
|
|
|
if (isAddress && isTargetConstant(Val))
|
2005-07-30 08:15:07 +08:00
|
|
|
return Val;
|
|
|
|
|
Teach loop-reduce to see into nested loops, to pull out immediate values
pushed down by SCEV.
In a nested loop case, this allows us to emit this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
li r3, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r2) ;; Uses offset of 8 instead of 0
stfd f0, 0(r2)
addi r4, r3, 1
addi r2, r2, 8
cmpwi cr0, r3, 100
or r3, r4, r4
bne .LBB_foo_2 ; no_exit.1
instead of this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
addi r3, r3, 8
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
llvm-svn: 22639
2005-08-04 07:44:42 +08:00
|
|
|
if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
|
2005-07-30 08:15:07 +08:00
|
|
|
unsigned i = 0;
|
2005-08-05 03:08:16 +08:00
|
|
|
SCEVHandle Imm = SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
for (; i != SAE->getNumOperands(); ++i)
|
2005-08-05 03:08:16 +08:00
|
|
|
if (isAddress && isTargetConstant(SAE->getOperand(i))) {
|
|
|
|
Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i));
|
|
|
|
} else if (!SAE->getOperand(i)->isLoopInvariant(L)) {
|
|
|
|
// If this is a loop-variant expression, it must stay in the immediate
|
|
|
|
// field of the expression.
|
|
|
|
Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i));
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
2005-08-05 03:08:16 +08:00
|
|
|
|
|
|
|
return Imm;
|
Teach loop-reduce to see into nested loops, to pull out immediate values
pushed down by SCEV.
In a nested loop case, this allows us to emit this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
li r3, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 8(r2) ;; Uses offset of 8 instead of 0
stfd f0, 0(r2)
addi r4, r3, 1
addi r2, r2, 8
cmpwi cr0, r3, 100
or r3, r4, r4
bne .LBB_foo_2 ; no_exit.1
instead of this:
lis r3, ha16(L_A$non_lazy_ptr)
lwz r3, lo16(L_A$non_lazy_ptr)(r3)
add r2, r2, r3
addi r3, r3, 8
li r4, 1
.LBB_foo_2: ; no_exit.1
lfd f0, 0(r3)
stfd f0, 0(r2)
addi r5, r4, 1
addi r2, r2, 8
addi r3, r3, 8
cmpwi cr0, r4, 100
or r4, r5, r5
bne .LBB_foo_2 ; no_exit.1
llvm-svn: 22639
2005-08-04 07:44:42 +08:00
|
|
|
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
|
|
|
|
// Try to pull immediates out of the start value of nested addrec's.
|
2005-08-05 03:08:16 +08:00
|
|
|
return GetImmediateValues(SARE->getStart(), isAddress, L);
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return SCEVUnknown::getIntegerSCEV(0, Val->getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
|
|
|
|
/// stride of IV. All of the users may have different starting values, and this
|
|
|
|
/// may not be the only stride (we know it is if isOnlyStride is true).
|
|
|
|
void LoopStrengthReduce::StrengthReduceStridedIVUsers(Value *Stride,
|
2005-08-04 06:21:05 +08:00
|
|
|
IVUsersOfOneStride &Uses,
|
|
|
|
Loop *L,
|
2005-07-30 08:15:07 +08:00
|
|
|
bool isOnlyStride) {
|
|
|
|
// Transform our list of users and offsets to a bit more complex table. In
|
|
|
|
// this new vector, the first entry for each element is the base of the
|
|
|
|
// strided access, and the second is the BasedUser object for the use. We
|
|
|
|
// progressively move information from the first to the second entry, until we
|
|
|
|
// eventually emit the object.
|
|
|
|
std::vector<std::pair<SCEVHandle, BasedUser> > UsersToProcess;
|
|
|
|
UsersToProcess.reserve(Uses.Users.size());
|
2005-07-31 02:33:25 +08:00
|
|
|
|
|
|
|
SCEVHandle ZeroBase = SCEVUnknown::getIntegerSCEV(0,
|
2005-08-04 06:21:05 +08:00
|
|
|
Uses.Users[0].Offset->getType());
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i)
|
2005-08-04 06:21:05 +08:00
|
|
|
UsersToProcess.push_back(std::make_pair(Uses.Users[i].Offset,
|
|
|
|
BasedUser(Uses.Users[i].User,
|
|
|
|
Uses.Users[i].OperandValToReplace,
|
2005-07-30 08:15:07 +08:00
|
|
|
ZeroBase)));
|
|
|
|
|
|
|
|
// First pass, figure out what we can represent in the immediate fields of
|
|
|
|
// instructions. If we can represent anything there, move it to the imm
|
|
|
|
// fields of the BasedUsers.
|
|
|
|
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
|
2005-08-05 03:08:16 +08:00
|
|
|
// Addressing modes can be folded into loads and stores. Be careful that
|
|
|
|
// the store is through the expression, not of the expression though.
|
|
|
|
bool isAddress = isa<LoadInst>(UsersToProcess[i].second.Inst);
|
|
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].second.Inst))
|
|
|
|
if (SI->getOperand(1) == UsersToProcess[i].second.OperandValToReplace)
|
|
|
|
isAddress = true;
|
|
|
|
|
|
|
|
UsersToProcess[i].second.Imm =
|
|
|
|
GetImmediateValues(UsersToProcess[i].first, isAddress, L);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
UsersToProcess[i].first = SCEV::getMinusSCEV(UsersToProcess[i].first,
|
|
|
|
UsersToProcess[i].second.Imm);
|
|
|
|
|
|
|
|
DEBUG(std::cerr << "BASE: " << *UsersToProcess[i].first);
|
|
|
|
DEBUG(UsersToProcess[i].second.dump());
|
2005-03-07 05:58:22 +08:00
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
SCEVExpander Rewriter(*SE, *LI);
|
|
|
|
BasicBlock *Preheader = L->getLoopPreheader();
|
|
|
|
Instruction *PreInsertPt = Preheader->getTerminator();
|
|
|
|
Instruction *PhiInsertBefore = L->getHeader()->begin();
|
|
|
|
|
2005-07-31 02:33:25 +08:00
|
|
|
assert(isa<PHINode>(PhiInsertBefore) &&
|
2005-07-30 08:15:07 +08:00
|
|
|
"How could this loop have IV's without any phis?");
|
|
|
|
PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
|
|
|
|
assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
|
|
|
|
"This loop isn't canonicalized right");
|
|
|
|
BasicBlock *LatchBlock =
|
|
|
|
SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-08-04 07:30:08 +08:00
|
|
|
DEBUG(std::cerr << "INSERTING IVs of STRIDE " << *Stride << ":\n");
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// FIXME: This loop needs increasing levels of intelligence.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// STAGE 0: just emit everything as its own base.
|
2005-07-30 08:15:07 +08:00
|
|
|
// STAGE 1: factor out common vars from bases, and try and push resulting
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// constants into Imm field. <-- We are here
|
2005-07-30 08:15:07 +08:00
|
|
|
// STAGE 2: factor out large constants to try and make more constants
|
|
|
|
// acceptable for target loads and stores.
|
|
|
|
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// Sort by the base value, so that all IVs with identical bases are next to
|
|
|
|
// each other.
|
|
|
|
std::sort(UsersToProcess.begin(), UsersToProcess.end());
|
2005-07-30 08:15:07 +08:00
|
|
|
while (!UsersToProcess.empty()) {
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
SCEVHandle Base = UsersToProcess.front().first;
|
2005-08-04 07:30:08 +08:00
|
|
|
|
|
|
|
DEBUG(std::cerr << " INSERTING PHI with BASE = " << *Base << ":\n");
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Create a new Phi for this base, and stick it in the loop header.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
const Type *ReplacedTy = Base->getType();
|
|
|
|
PHINode *NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2005-07-31 02:33:25 +08:00
|
|
|
// Emit the initial base value into the loop preheader, and add it to the
|
2005-07-30 08:15:07 +08:00
|
|
|
// Phi node.
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
Value *BaseV = Rewriter.expandCodeFor(Base, PreInsertPt, ReplacedTy);
|
2005-07-30 08:15:07 +08:00
|
|
|
NewPHI->addIncoming(BaseV, Preheader);
|
|
|
|
|
|
|
|
// Emit the increment of the base value before the terminator of the loop
|
|
|
|
// latch block, and add it to the Phi node.
|
|
|
|
SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
|
|
|
SCEVUnknown::get(Stride));
|
|
|
|
|
|
|
|
Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(),
|
|
|
|
ReplacedTy);
|
|
|
|
IncV->setName(NewPHI->getName()+".inc");
|
|
|
|
NewPHI->addIncoming(IncV, LatchBlock);
|
|
|
|
|
|
|
|
// Emit the code to add the immediate offset to the Phi value, just before
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
// the instructions that we identified as using this stride and base.
|
|
|
|
while (!UsersToProcess.empty() && UsersToProcess.front().first == Base) {
|
|
|
|
BasedUser &User = UsersToProcess.front().second;
|
|
|
|
|
|
|
|
// Clear the SCEVExpander's expression map so that we are guaranteed
|
|
|
|
// to have the code emitted where we expect it.
|
|
|
|
Rewriter.clear();
|
|
|
|
SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
|
|
|
|
User.Imm);
|
2005-08-04 07:30:08 +08:00
|
|
|
Value *Replaced = User.OperandValToReplace;
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
Value *newVal = Rewriter.expandCodeFor(NewValSCEV, User.Inst,
|
|
|
|
Replaced->getType());
|
|
|
|
|
|
|
|
// Replace the use of the operand Value with the new Phi we just created.
|
|
|
|
User.Inst->replaceUsesOfWith(Replaced, newVal);
|
2005-08-04 07:30:08 +08:00
|
|
|
DEBUG(std::cerr << " CHANGED: IMM =" << *User.Imm << " Inst = "
|
|
|
|
<< *User.Inst);
|
Move from Stage 0 to Stage 1.
Only emit one PHI node for IV uses with identical bases and strides (after
moving foldable immediates to the load/store instruction).
This implements LoopStrengthReduce/dont_insert_redundant_ops.ll, allowing
us to generate this PPC code for test1:
or r30, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r30)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
instead of this code:
or r30, r3, r3
or r29, r3, r3
.LBB_test1_1: ; Loop
li r2, 0
stw r2, 0(r29)
stw r2, 4(r30)
bl L_pred$stub
addi r30, r30, 8 ;; Two iv's with step of 8
addi r29, r29, 8
cmplwi cr0, r3, 0
bne .LBB_test1_1 ; Loop
llvm-svn: 22635
2005-08-04 06:51:21 +08:00
|
|
|
|
|
|
|
// Mark old value we replaced as possibly dead, so that it is elminated
|
|
|
|
// if we just replaced the last use of that value.
|
|
|
|
DeadInsts.insert(cast<Instruction>(Replaced));
|
|
|
|
|
|
|
|
UsersToProcess.erase(UsersToProcess.begin());
|
|
|
|
++NumReduced;
|
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
// TODO: Next, find out which base index is the most common, pull it out.
|
|
|
|
}
|
|
|
|
|
|
|
|
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
|
|
|
|
// different starting values, into different PHIs.
|
2005-07-31 02:33:25 +08:00
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// BEFORE writing this, it's probably useful to handle GEP's.
|
|
|
|
|
|
|
|
// NOTE: pull all constants together, for REG+IMM addressing, include &GV in
|
|
|
|
// 'IMM' if the target supports it.
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
|
2004-10-19 05:08:22 +08:00
|
|
|
void LoopStrengthReduce::runOnLoop(Loop *L) {
|
|
|
|
// First step, transform all loops nesting inside of this loop.
|
|
|
|
for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
|
|
|
|
runOnLoop(*I);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// Next, find all uses of induction variables in this loop, and catagorize
|
|
|
|
// them by stride. Start by finding all of the PHI nodes in the header for
|
|
|
|
// this loop. If they are induction variables, inspect their uses.
|
2005-08-05 01:40:30 +08:00
|
|
|
std::set<Instruction*> Processed; // Don't reprocess instructions.
|
2005-07-30 08:15:07 +08:00
|
|
|
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
|
2005-08-05 01:40:30 +08:00
|
|
|
AddUsersIfInteresting(I, L, Processed);
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
// If we have nothing to do, return.
|
|
|
|
//if (IVUsesByStride.empty()) return;
|
|
|
|
|
|
|
|
// FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
|
|
|
|
// doing computation in byte values, promote to 32-bit values if safe.
|
|
|
|
|
|
|
|
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
|
|
|
|
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
|
|
|
|
// codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need
|
|
|
|
// to be careful that IV's are all the same type. Only works for intptr_t
|
|
|
|
// indvars.
|
|
|
|
|
|
|
|
// If we only have one stride, we can more aggressively eliminate some things.
|
|
|
|
bool HasOneStride = IVUsesByStride.size() == 1;
|
|
|
|
|
2005-08-04 06:21:05 +08:00
|
|
|
for (std::map<Value*, IVUsersOfOneStride>::iterator SI
|
|
|
|
= IVUsesByStride.begin(), E = IVUsesByStride.end(); SI != E; ++SI)
|
2005-07-30 08:15:07 +08:00
|
|
|
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
|
2004-10-19 05:08:22 +08:00
|
|
|
|
|
|
|
// Clean up after ourselves
|
|
|
|
if (!DeadInsts.empty()) {
|
|
|
|
DeleteTriviallyDeadInstructions(DeadInsts);
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
BasicBlock::iterator I = L->getHeader()->begin();
|
|
|
|
PHINode *PN;
|
2005-08-02 10:44:31 +08:00
|
|
|
while ((PN = dyn_cast<PHINode>(I))) {
|
2005-08-02 08:41:11 +08:00
|
|
|
++I; // Preincrement iterator to avoid invalidating it when deleting PN.
|
|
|
|
|
2005-07-30 08:15:07 +08:00
|
|
|
// At this point, we know that we have killed one or more GEP instructions.
|
|
|
|
// It is worth checking to see if the cann indvar is also dead, so that we
|
|
|
|
// can remove it as well. The requirements for the cann indvar to be
|
|
|
|
// considered dead are:
|
|
|
|
// 1. the cann indvar has one use
|
|
|
|
// 2. the use is an add instruction
|
|
|
|
// 3. the add has one use
|
|
|
|
// 4. the add is used by the cann indvar
|
|
|
|
// If all four cases above are true, then we can remove both the add and
|
|
|
|
// the cann indvar.
|
|
|
|
// FIXME: this needs to eliminate an induction variable even if it's being
|
|
|
|
// compared against some value to decide loop termination.
|
|
|
|
if (PN->hasOneUse()) {
|
|
|
|
BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
|
2005-08-02 10:52:02 +08:00
|
|
|
if (BO && BO->hasOneUse()) {
|
|
|
|
if (PN == *(BO->use_begin())) {
|
|
|
|
DeadInsts.insert(BO);
|
|
|
|
// Break the cycle, then delete the PHI.
|
|
|
|
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
|
2005-08-04 05:36:09 +08:00
|
|
|
SE->deleteInstructionFromRecords(PN);
|
2005-08-02 10:52:02 +08:00
|
|
|
PN->eraseFromParent();
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-08-02 10:52:02 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
}
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
DeleteTriviallyDeadInstructions(DeadInsts);
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|
2005-07-30 08:15:07 +08:00
|
|
|
|
|
|
|
IVUsesByStride.clear();
|
|
|
|
return;
|
2004-10-19 05:08:22 +08:00
|
|
|
}
|