forked from OSchip/llvm-project
LoopVectorizer: Add initial support for pointer induction variables (for example: *dst++ = *src++).
At the moment we still require to have an integer induction variable (for example: i++). llvm-svn: 168231
This commit is contained in:
parent
ef83919b4c
commit
c3c07e62e8
|
@ -260,6 +260,10 @@ public:
|
|||
/// of the reductions that were found in the loop.
|
||||
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
|
||||
|
||||
/// InductionList saves induction variables and maps them to the initial
|
||||
/// value entring the loop.
|
||||
typedef DenseMap<PHINode*, Value*> InductionList;
|
||||
|
||||
/// Returns true if it is legal to vectorize this loop.
|
||||
/// This does not mean that it is profitable to vectorize this
|
||||
/// loop, only that it is legal to do so.
|
||||
|
@ -271,6 +275,9 @@ public:
|
|||
/// Returns the reduction variables found in the loop.
|
||||
ReductionList *getReductionVars() { return &Reductions; }
|
||||
|
||||
/// Returns the induction variables found in the loop.
|
||||
InductionList *getInductionVars() { return &Inductions; }
|
||||
|
||||
/// Check if the pointer returned by this GEP is consecutive
|
||||
/// when the index is vectorized. This happens when the last
|
||||
/// index of the GEP is consecutive, like the induction variable.
|
||||
|
@ -317,10 +324,16 @@ private:
|
|||
|
||||
// --- vectorization state --- //
|
||||
|
||||
/// Holds the induction variable.
|
||||
/// Holds the integer induction variable. This is the counter of the
|
||||
/// loop.
|
||||
PHINode *Induction;
|
||||
/// Holds the reduction variables.
|
||||
ReductionList Reductions;
|
||||
/// Holds all of the induction variables that we found in the loop.
|
||||
/// Notice that inductions don't need to start at zero and that induction
|
||||
/// variables can be pointers.
|
||||
InductionList Inductions;
|
||||
|
||||
/// Allowed outside users. This holds the reduction
|
||||
/// vars which can be accessed from outside the loop.
|
||||
SmallPtrSet<Value*, 4> AllowedExit;
|
||||
|
@ -791,14 +804,50 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||
Loc->eraseFromParent();
|
||||
|
||||
// We are going to resume the execution of the scalar loop.
|
||||
// This PHI decides on what number to start. If we come from the
|
||||
// vector loop then we need to start with the end index minus the
|
||||
// index modulo VF. If we come from a bypass edge then we need to start
|
||||
// from the real start.
|
||||
PHINode* ResumeIndex = PHINode::Create(IdxTy, 2, "resume.idx",
|
||||
// Go over all of the induction variables that we found and fix the
|
||||
// PHIs that are left in the scalar version of the loop.
|
||||
// The starting values of PHI nodes depend on the counter of the last
|
||||
// iteration in the vectorized loop.
|
||||
// If we come from a bypass edge then we need to start from the original start
|
||||
// value.
|
||||
|
||||
// This variable saves the new starting index for the scalar loop.
|
||||
Value *ResumeIndex = 0;
|
||||
LoopVectorizationLegality::InductionList::iterator I, E;
|
||||
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
|
||||
for (I = List->begin(), E = List->end(); I != E; ++I) {
|
||||
PHINode *OrigPhi = I->first;
|
||||
PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
|
||||
MiddleBlock->getTerminator());
|
||||
ResumeIndex->addIncoming(StartIdx, BypassBlock);
|
||||
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
|
||||
Value *EndValue = 0;
|
||||
if (OrigPhi->getType()->isIntegerTy()) {
|
||||
// Handle the integer induction counter:
|
||||
assert(OrigPhi == OldInduction && "Unknown integer PHI");
|
||||
// We know what the end value is.
|
||||
EndValue = IdxEndRoundDown;
|
||||
// We also know which PHI node holds it.
|
||||
ResumeIndex = ResumeVal;
|
||||
} else {
|
||||
// For pointer induction variables, calculate the offset using
|
||||
// the end index.
|
||||
EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown,
|
||||
"ptr.ind.end",
|
||||
BypassBlock->getTerminator());
|
||||
}
|
||||
|
||||
// The new PHI merges the original incoming value, in case of a bypass,
|
||||
// or the value at the end of the vectorized loop.
|
||||
ResumeVal->addIncoming(I->second, BypassBlock);
|
||||
ResumeVal->addIncoming(EndValue, VecBody);
|
||||
|
||||
// Fix the scalar body counter (PHI node).
|
||||
unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
|
||||
OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
|
||||
}
|
||||
|
||||
// Make sure that we found the index where scalar loop needs to continue.
|
||||
assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
|
||||
"Invalid resume Index");
|
||||
|
||||
// Add a check in the middle block to see if we have completed
|
||||
// all of the iterations in the first vector loop.
|
||||
|
@ -822,10 +871,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||
// Now we have two terminators. Remove the old one from the block.
|
||||
VecBody->getTerminator()->eraseFromParent();
|
||||
|
||||
// Fix the scalar body iteration count.
|
||||
unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
|
||||
OldInduction->setIncomingValue(BlockIdx, ResumeIndex);
|
||||
|
||||
// Get ready to start creating new instructions into the vectorized body.
|
||||
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
|
||||
|
||||
|
@ -895,7 +940,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
// add the new incoming edges to the PHI. At this point all of the
|
||||
// instructions in the basic block are vectorized, so we can use them to
|
||||
// construct the PHI.
|
||||
PhiVector PHIsToFix;
|
||||
PhiVector RdxPHIsToFix;
|
||||
|
||||
// For each instruction in the old loop.
|
||||
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
|
||||
|
@ -911,14 +956,41 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
// Special handling for the induction var.
|
||||
if (OldInduction == Inst)
|
||||
continue;
|
||||
|
||||
// Handle reduction variables:
|
||||
if (Legal->getReductionVars()->count(P)) {
|
||||
// This is phase one of vectorizing PHIs.
|
||||
// This has to be a reduction variable.
|
||||
assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
|
||||
Type *VecTy = VectorType::get(Inst->getType(), VF);
|
||||
WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
|
||||
PHIsToFix.push_back(P);
|
||||
RdxPHIsToFix.push_back(P);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle pointer inductions:
|
||||
if (Legal->getInductionVars()->count(P)) {
|
||||
Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction);
|
||||
Value *StartPtr = Legal->getInductionVars()->lookup(P);
|
||||
// This is the normalized GEP that starts counting at zero.
|
||||
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
|
||||
"normalized.idx");
|
||||
// This is the first GEP in the sequence.
|
||||
Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx,
|
||||
"induc.ptr");
|
||||
// This is the vector of results. Notice that we don't generate vector
|
||||
// geps because scalar geps result in better code.
|
||||
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
|
||||
for (unsigned int i = 0; i < VF; ++i) {
|
||||
Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i),
|
||||
"next.gep");
|
||||
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
|
||||
Builder.getInt32(i),
|
||||
"insert.gep");
|
||||
}
|
||||
|
||||
WidenMap[Inst] = VecVal;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
|
@ -1092,7 +1164,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
// Create the 'reduced' values for each of the induction vars.
|
||||
// The reduced values are the vector values that we scalarize and combine
|
||||
// after the loop is finished.
|
||||
for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
|
||||
for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
|
||||
it != e; ++it) {
|
||||
PHINode *RdxPhi = *it;
|
||||
PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
|
||||
|
@ -1124,7 +1196,6 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||
Value *VectorStart = Builder.CreateInsertElement(Identity,
|
||||
RdxDesc.StartValue, Zero);
|
||||
|
||||
|
||||
// Fix the vector-loop phi.
|
||||
// We created the induction variable so we know that the
|
||||
// preheader is the first entry.
|
||||
|
@ -1276,23 +1347,33 @@ bool LoopVectorizationLegality::canVectorize() {
|
|||
}
|
||||
|
||||
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
||||
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
|
||||
|
||||
// Scan the instructions in the block and look for hazards.
|
||||
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
|
||||
Instruction *I = it;
|
||||
|
||||
PHINode *Phi = dyn_cast<PHINode>(I);
|
||||
if (Phi) {
|
||||
if (PHINode *Phi = dyn_cast<PHINode>(I)) {
|
||||
// This should not happen because the loop should be normalized.
|
||||
if (Phi->getNumIncomingValues() != 2) {
|
||||
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
|
||||
return false;
|
||||
}
|
||||
// We only look at integer phi nodes.
|
||||
if (!Phi->getType()->isIntegerTy()) {
|
||||
DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
|
||||
|
||||
// This is the value coming from the preheader.
|
||||
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
|
||||
|
||||
// We only look at integer and pointer phi nodes.
|
||||
if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
|
||||
DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
|
||||
Inductions[Phi] = StartValue;
|
||||
continue;
|
||||
} else if (!Phi->getType()->isIntegerTy()) {
|
||||
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handle integer PHIs:
|
||||
if (isInductionVariable(Phi)) {
|
||||
if (Induction) {
|
||||
DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
|
||||
|
@ -1300,6 +1381,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
|||
}
|
||||
DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
|
||||
Induction = Phi;
|
||||
Inductions[Phi] = StartValue;
|
||||
continue;
|
||||
}
|
||||
if (AddReductionVar(Phi, IntegerAdd)) {
|
||||
|
@ -1682,6 +1764,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
|
|||
}
|
||||
|
||||
bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
|
||||
Type *PhiTy = Phi->getType();
|
||||
// We only handle integer and pointer inductions variables.
|
||||
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
|
||||
return false;
|
||||
|
||||
// Check that the PHI is consecutive and starts at zero.
|
||||
const SCEV *PhiScev = SE->getSCEV(Phi);
|
||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
|
||||
|
@ -1691,11 +1778,17 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
|
|||
}
|
||||
const SCEV *Step = AR->getStepRecurrence(*SE);
|
||||
|
||||
if (!Step->isOne()) {
|
||||
DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
// Integer inductions need to have a stride of one.
|
||||
if (PhiTy->isIntegerTy())
|
||||
return Step->isOne();
|
||||
|
||||
// Calculate the pointer stride and check if it is consecutive.
|
||||
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
|
||||
if (!C) return false;
|
||||
|
||||
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
|
||||
uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
|
||||
return (C->getValue()->equalsInt(Size));
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
|
||||
|
|
|
@ -565,9 +565,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
|
|||
ret i32 %a.0.lcssa
|
||||
}
|
||||
|
||||
; Can't vectorize because there are multiple PHIs.
|
||||
;CHECK: @example23
|
||||
;CHECK-NOT: <4 x i32>
|
||||
;CHECK: <4 x i32>
|
||||
;CHECK: ret void
|
||||
define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
|
||||
br label %1
|
||||
|
|
Loading…
Reference in New Issue