forked from OSchip/llvm-project
Clang-format the SLP vectorizer. No functionality change.
llvm-svn: 184446
This commit is contained in:
parent
f81d036ea7
commit
b488beefeb
|
@ -41,14 +41,14 @@
|
|||
using namespace llvm;
|
||||
|
||||
static cl::opt<int>
|
||||
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
|
||||
cl::desc("Only vectorize trees if the gain is above this "
|
||||
"number. (gain = -cost of vectorization)"));
|
||||
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
|
||||
cl::desc("Only vectorize trees if the gain is above this "
|
||||
"number. (gain = -cost of vectorization)"));
|
||||
namespace {
|
||||
|
||||
/// The SLPVectorizer Pass.
|
||||
struct SLPVectorizer : public FunctionPass {
|
||||
typedef MapVector<Value*, BoUpSLP::StoreList> StoreListMap;
|
||||
typedef MapVector<Value *, BoUpSLP::StoreList> StoreListMap;
|
||||
|
||||
/// Pass identification, replacement for typeid
|
||||
static char ID;
|
||||
|
@ -78,7 +78,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
if (!DL)
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Analyzing blocks in " << F.getName() << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
|
||||
|
||||
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
|
||||
BasicBlock *BB = it;
|
||||
|
@ -94,7 +94,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
// Vectorize trees that end at stores.
|
||||
if (unsigned count = collectStores(BB, R)) {
|
||||
(void)count;
|
||||
DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
|
||||
DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");
|
||||
BBChanged |= vectorizeStoreChains(R);
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
}
|
||||
|
||||
if (Changed) {
|
||||
DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
|
||||
DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
|
||||
DEBUG(verifyFunction(F));
|
||||
}
|
||||
return Changed;
|
||||
|
@ -131,7 +131,7 @@ private:
|
|||
unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
|
||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a list of operands. If \p NeedExtracts is true
|
||||
/// then we calculate the cost of extracting the scalars from the vector.
|
||||
|
@ -139,7 +139,7 @@ private:
|
|||
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool NeedExtracts);
|
||||
|
||||
/// \brief Try to vectorize a chain that may start at the operands of \V;
|
||||
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
|
||||
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
|
||||
|
||||
/// \brief Vectorize the stores that were collected in StoreRefs.
|
||||
bool vectorizeStoreChains(BoUpSLP &R);
|
||||
|
@ -188,8 +188,9 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
|
|||
return count;
|
||||
}
|
||||
|
||||
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||
if (!A || !B) return false;
|
||||
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||
if (!A || !B)
|
||||
return false;
|
||||
Value *VL[] = { A, B };
|
||||
return tryToVectorizeList(VL, R, true);
|
||||
}
|
||||
|
@ -199,11 +200,12 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
|||
if (VL.size() < 2)
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
|
||||
|
||||
// Check that all of the parts are scalar instructions of the same type.
|
||||
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
|
||||
if (!I0) return 0;
|
||||
if (!I0)
|
||||
return 0;
|
||||
|
||||
unsigned Opcode0 = I0->getOpcode();
|
||||
|
||||
|
@ -217,17 +219,20 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
|||
}
|
||||
|
||||
int Cost = R.getTreeCost(VL);
|
||||
int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
|
||||
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
|
||||
" Cost of extract:" << ExtrCost << ".\n");
|
||||
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
|
||||
DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
|
||||
int ExtrCost = NeedExtracts ? R.getScalarizationCost(VL) : 0;
|
||||
DEBUG(dbgs() << "SLP: Cost of pair:" << Cost
|
||||
<< " Cost of extract:" << ExtrCost << ".\n");
|
||||
if ((Cost + ExtrCost) >= -SLPCostThreshold)
|
||||
return false;
|
||||
DEBUG(dbgs() << "SLP: Vectorizing pair.\n");
|
||||
R.vectorizeArith(VL);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
||||
if (!V) return false;
|
||||
bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
||||
if (!V)
|
||||
return false;
|
||||
|
||||
// Try to vectorize V.
|
||||
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
|
||||
return true;
|
||||
|
@ -267,22 +272,27 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
|||
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
bool Changed = false;
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||
if (isa<DbgInfoIntrinsic>(it)) continue;
|
||||
if (isa<DbgInfoIntrinsic>(it))
|
||||
continue;
|
||||
|
||||
// Try to vectorize reductions that use PHINodes.
|
||||
if (PHINode *P = dyn_cast<PHINode>(it)) {
|
||||
// Check that the PHI is a reduction PHI.
|
||||
if (P->getNumIncomingValues() != 2) return Changed;
|
||||
Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
|
||||
(P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
|
||||
0));
|
||||
if (P->getNumIncomingValues() != 2)
|
||||
return Changed;
|
||||
Value *Rdx =
|
||||
(P->getIncomingBlock(0) == BB
|
||||
? (P->getIncomingValue(0))
|
||||
: (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));
|
||||
// Check if this is a Binary Operator.
|
||||
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
|
||||
if (!BI)
|
||||
continue;
|
||||
|
||||
Value *Inst = BI->getOperand(0);
|
||||
if (Inst == P) Inst = BI->getOperand(1);
|
||||
if (Inst == P)
|
||||
Inst = BI->getOperand(1);
|
||||
|
||||
Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
|
||||
continue;
|
||||
}
|
||||
|
@ -295,7 +305,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
}
|
||||
for (int i = 0; i < 2; ++i)
|
||||
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
|
||||
Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
|
||||
Changed |=
|
||||
tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -303,7 +314,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
// Scan the PHINodes in our successors in search for pairing hints.
|
||||
for (succ_iterator it = succ_begin(BB), e = succ_end(BB); it != e; ++it) {
|
||||
BasicBlock *Succ = *it;
|
||||
SmallVector<Value*, 4> Incoming;
|
||||
SmallVector<Value *, 4> Incoming;
|
||||
|
||||
// Collect the incoming values from the PHIs.
|
||||
for (BasicBlock::iterator instr = Succ->begin(), ie = Succ->end();
|
||||
|
@ -322,7 +333,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
if (Incoming.size() > 1)
|
||||
Changed |= tryToVectorizeList(Incoming, R, true);
|
||||
}
|
||||
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -334,8 +345,8 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
|
|||
if (it->second.size() < 2)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
|
||||
it->second.size() << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
|
||||
<< it->second.size() << ".\n");
|
||||
|
||||
Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
|
||||
}
|
||||
|
@ -343,7 +354,7 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
|
|||
}
|
||||
|
||||
bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
|
||||
SmallVector<Value*, 4> Seq;
|
||||
SmallVector<Value *, 4> Seq;
|
||||
bool Changed = false;
|
||||
for (int i = 0, e = Gathers.size(); i < e; ++i) {
|
||||
InsertElementInst *IEI = dyn_cast_or_null<InsertElementInst>(Gathers[i]);
|
||||
|
@ -359,13 +370,13 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
|
|||
Instruction *I = cast<Instruction>(Seq[0]);
|
||||
BasicBlock *BB = I->getParent();
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Inspecting a gather list of size " << Seq.size() <<
|
||||
" in " << BB->getName() << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Inspecting a gather list of size " << Seq.size()
|
||||
<< " in " << BB->getName() << ".\n");
|
||||
|
||||
// Check if the gathered values have multiple uses. If they only have one
|
||||
// user then we know that the insert/extract pair will go away.
|
||||
bool HasMultipleUsers = false;
|
||||
for (int i=0; e = Seq.size(), i < e; ++i) {
|
||||
for (int i = 0; e = Seq.size(), i < e; ++i) {
|
||||
if (!Seq[i]->hasOneUse()) {
|
||||
HasMultipleUsers = true;
|
||||
break;
|
||||
|
@ -375,8 +386,8 @@ bool SLPVectorizer::vectorizeUsingGatherHints(BoUpSLP::InstrList &Gathers) {
|
|||
BoUpSLP BO(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
|
||||
|
||||
if (tryToVectorizeList(Seq, BO, HasMultipleUsers)) {
|
||||
DEBUG(dbgs()<<"SLP: Vectorized a gather list of len " << Seq.size() <<
|
||||
" in " << BB->getName() << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Vectorized a gather list of len " << Seq.size()
|
||||
<< " in " << BB->getName() << ".\n");
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
|
@ -418,8 +429,10 @@ void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
|
|||
// hoist this instruction.
|
||||
Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
|
||||
Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
|
||||
if (CurrVec && L->contains(CurrVec)) continue;
|
||||
if (NewElem && L->contains(NewElem)) continue;
|
||||
if (CurrVec && L->contains(CurrVec))
|
||||
continue;
|
||||
if (NewElem && L->contains(NewElem))
|
||||
continue;
|
||||
|
||||
// We can hoist this instruction. Move it to the pre-header.
|
||||
Insert->moveBefore(Location);
|
||||
|
@ -438,8 +451,5 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
|||
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
|
||||
|
||||
namespace llvm {
|
||||
Pass *createSLPVectorizerPass() {
|
||||
return new SLPVectorizer();
|
||||
}
|
||||
Pass *createSLPVectorizerPass() { return new SLPVectorizer(); }
|
||||
}
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ static const unsigned RecursionMaxDepth = 6;
|
|||
namespace llvm {
|
||||
|
||||
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
|
||||
Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp)
|
||||
: Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
|
||||
numberInstructions();
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ void BoUpSLP::numberInstructions() {
|
|||
InstrIdx.clear();
|
||||
InstrVec.clear();
|
||||
// Number the instructions in the block.
|
||||
for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||
InstrIdx[it] = Loc++;
|
||||
InstrVec.push_back(it);
|
||||
assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
|
||||
|
@ -63,14 +63,18 @@ void BoUpSLP::numberInstructions() {
|
|||
}
|
||||
|
||||
Value *BoUpSLP::getPointerOperand(Value *I) {
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
||||
return LI->getPointerOperand();
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
return SI->getPointerOperand();
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
|
||||
if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
|
||||
if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(I))
|
||||
return L->getPointerAddressSpace();
|
||||
if (StoreInst *S = dyn_cast<StoreInst>(I))
|
||||
return S->getPointerAddressSpace();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -81,10 +85,12 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
|
|||
unsigned ASB = getAddressSpaceOperand(B);
|
||||
|
||||
// Check that the address spaces match and that the pointers are valid.
|
||||
if (!PtrA || !PtrB || (ASA != ASB)) return false;
|
||||
if (!PtrA || !PtrB || (ASA != ASB))
|
||||
return false;
|
||||
|
||||
// Check that A and B are of the same type.
|
||||
if (PtrA->getType() != PtrB->getType()) return false;
|
||||
if (PtrA->getType() != PtrB->getType())
|
||||
return false;
|
||||
|
||||
// Calculate the distance.
|
||||
const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
|
||||
|
@ -93,7 +99,8 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
|
|||
const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
|
||||
|
||||
// Non constant distance.
|
||||
if (!ConstOffSCEV) return false;
|
||||
if (!ConstOffSCEV)
|
||||
return false;
|
||||
|
||||
int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
|
||||
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
|
||||
|
@ -105,25 +112,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
|
|||
|
||||
bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
|
||||
unsigned ChainLen = Chain.size();
|
||||
DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<ChainLen<< "\n");
|
||||
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
|
||||
<< "\n");
|
||||
Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
|
||||
unsigned Sz = DL->getTypeSizeInBits(StoreTy);
|
||||
unsigned VF = MinVecRegSize / Sz;
|
||||
|
||||
if (!isPowerOf2_32(Sz) || VF < 2) return false;
|
||||
if (!isPowerOf2_32(Sz) || VF < 2)
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
// Look for profitable vectorizable trees at all offsets, starting at zero.
|
||||
for (unsigned i = 0, e = ChainLen; i < e; ++i) {
|
||||
if (i + VF > e) break;
|
||||
DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
|
||||
if (i + VF > e)
|
||||
break;
|
||||
DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
|
||||
<< "\n");
|
||||
ArrayRef<Value *> Operands = Chain.slice(i, VF);
|
||||
|
||||
int Cost = getTreeCost(Operands);
|
||||
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
|
||||
if (Cost < CostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
|
||||
Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
|
||||
Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands, VF)));
|
||||
vectorizeTree(Operands, VF);
|
||||
i += VF - 1;
|
||||
Changed = true;
|
||||
|
@ -135,8 +146,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
|
|||
|
||||
int Cost = getTreeCost(Chain);
|
||||
if (Cost < CostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Found store chain cost = "<< Cost <<" for size = " <<
|
||||
ChainLen << "\n");
|
||||
DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost
|
||||
<< " for size = " << ChainLen << "\n");
|
||||
Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Chain, ChainLen)));
|
||||
vectorizeTree(Chain, ChainLen);
|
||||
return true;
|
||||
|
@ -146,8 +157,8 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
|
|||
}
|
||||
|
||||
bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
||||
SetVector<Value*> Heads, Tails;
|
||||
SmallDenseMap<Value*, Value*> ConsecutiveChain;
|
||||
SetVector<Value *> Heads, Tails;
|
||||
SmallDenseMap<Value *, Value *> ConsecutiveChain;
|
||||
|
||||
// We may run into multiple chains that merge into a single chain. We mark the
|
||||
// stores that we vectorized so that we don't visit the same store twice.
|
||||
|
@ -158,7 +169,9 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
|||
// all of the pairs of loads that follow each other.
|
||||
for (unsigned i = 0, e = Stores.size(); i < e; ++i)
|
||||
for (unsigned j = 0; j < e; ++j) {
|
||||
if (i == j) continue;
|
||||
if (i == j)
|
||||
continue;
|
||||
|
||||
if (isConsecutiveAccess(Stores[i], Stores[j])) {
|
||||
Tails.insert(Stores[j]);
|
||||
Heads.insert(Stores[i]);
|
||||
|
@ -167,9 +180,10 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
|||
}
|
||||
|
||||
// For stores that start but don't end a link in the chain:
|
||||
for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
|
||||
for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
|
||||
it != e; ++it) {
|
||||
if (Tails.count(*it)) continue;
|
||||
if (Tails.count(*it))
|
||||
continue;
|
||||
|
||||
// We found a store instr that starts a chain. Now follow the chain and try
|
||||
// to vectorize it.
|
||||
|
@ -177,7 +191,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
|||
Value *I = *it;
|
||||
// Collect the chain into a list.
|
||||
while (Tails.count(I) || Heads.count(I)) {
|
||||
if (VectorizedStores.count(I)) break;
|
||||
if (VectorizedStores.count(I))
|
||||
break;
|
||||
Operands.push_back(I);
|
||||
// Move to the next value in the chain.
|
||||
I = ConsecutiveChain[I];
|
||||
|
@ -212,8 +227,10 @@ int BoUpSLP::getScalarizationCost(Type *Ty) {
|
|||
}
|
||||
|
||||
AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
return AA->getLocation(SI);
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
||||
return AA->getLocation(LI);
|
||||
return AliasAnalysis::Location();
|
||||
}
|
||||
|
||||
|
@ -224,11 +241,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
|
|||
/// the source may alias.
|
||||
for (++I; I != E; ++I) {
|
||||
// Ignore store instructions that are marked as 'ignore'.
|
||||
if (MemBarrierIgnoreList.count(I)) continue;
|
||||
if (MemBarrierIgnoreList.count(I))
|
||||
continue;
|
||||
if (Src->mayWriteToMemory()) /* Write */ {
|
||||
if (!I->mayReadOrWriteMemory()) continue;
|
||||
if (!I->mayReadOrWriteMemory())
|
||||
continue;
|
||||
} else /* Read */ {
|
||||
if (!I->mayWriteToMemory()) continue;
|
||||
if (!I->mayWriteToMemory())
|
||||
continue;
|
||||
}
|
||||
AliasAnalysis::Location A = getLocation(&*I);
|
||||
AliasAnalysis::Location B = getLocation(Src);
|
||||
|
@ -244,7 +264,7 @@ Value *BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
|
|||
Instruction *Loc = getInsertionPoint(LastIdx);
|
||||
Builder.SetInsertPoint(Loc);
|
||||
|
||||
assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
|
||||
assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
|
||||
"Vectorizing with in-tree users");
|
||||
|
||||
Value *Vec = vectorizeTree(Operands, Operands.size());
|
||||
|
@ -283,15 +303,16 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
|
|||
|
||||
// Check that instructions with multiple users can be vectorized. Mark unsafe
|
||||
// instructions.
|
||||
for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
|
||||
e = MultiUserVals.end(); it != e; ++it) {
|
||||
for (SetVector<Value *>::iterator it = MultiUserVals.begin(),
|
||||
e = MultiUserVals.end();
|
||||
it != e; ++it) {
|
||||
// Check that all of the users of this instr are within the tree
|
||||
// and that they are all from the same lane.
|
||||
int Lane = -1;
|
||||
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
|
||||
I != E; ++I) {
|
||||
if (LaneMap.find(*I) == LaneMap.end()) {
|
||||
DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
|
||||
DEBUG(dbgs() << "SLP: Instr " << **it << " has multiple users.\n");
|
||||
|
||||
// We don't have an ordering problem if the user is not in this basic
|
||||
// block.
|
||||
|
@ -305,23 +326,23 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
|
|||
int Idx = InstrIdx[Inst];
|
||||
if (Idx < LastRootIndex) {
|
||||
MustScalarize.insert(*it);
|
||||
DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
|
||||
"because of an unsafe out of tree usage.\n");
|
||||
DEBUG(dbgs() << "SLP: Adding to MustScalarize "
|
||||
"because of an unsafe out of tree usage.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Adding to MustExtract "
|
||||
"because of a safe out of tree usage.\n");
|
||||
DEBUG(dbgs() << "SLP: Adding to MustExtract "
|
||||
"because of a safe out of tree usage.\n");
|
||||
MustExtract.insert(*it);
|
||||
continue;
|
||||
}
|
||||
if (Lane == -1) Lane = LaneMap[*I];
|
||||
if (Lane == -1)
|
||||
Lane = LaneMap[*I];
|
||||
if (Lane != LaneMap[*I]) {
|
||||
MustScalarize.insert(*it);
|
||||
DEBUG(dbgs()<<"SLP: Adding " << **it <<
|
||||
" to MustScalarize because multiple lane use it: "
|
||||
<< Lane << " and " << LaneMap[*I] << ".\n");
|
||||
DEBUG(dbgs() << "SLP: Adding " << **it
|
||||
<< " to MustScalarize because multiple lane use it: "
|
||||
<< Lane << " and " << LaneMap[*I] << ".\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -360,12 +381,16 @@ static bool CanReuseExtract(ArrayRef<Value *> VL, unsigned VF,
|
|||
}
|
||||
|
||||
void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
if (Depth == RecursionMaxDepth) return;
|
||||
if (Depth == RecursionMaxDepth)
|
||||
return;
|
||||
|
||||
// Don't handle vectors.
|
||||
if (VL[0]->getType()->isVectorTy()) return;
|
||||
if (VL[0]->getType()->isVectorTy())
|
||||
return;
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
|
||||
if (SI->getValueOperand()->getType()->isVectorTy()) return;
|
||||
if (SI->getValueOperand()->getType()->isVectorTy())
|
||||
return;
|
||||
|
||||
// Check if all of the operands are constants.
|
||||
bool AllConst = true;
|
||||
|
@ -375,27 +400,32 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
AllSameScalar &= (VL[0] == VL[i]);
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// If one of the instructions is out of this BB, we need to scalarize all.
|
||||
if (I && I->getParent() != BB) return;
|
||||
if (I && I->getParent() != BB)
|
||||
return;
|
||||
}
|
||||
|
||||
// If all of the operands are identical or constant we have a simple solution.
|
||||
if (AllConst || AllSameScalar) return;
|
||||
if (AllConst || AllSameScalar)
|
||||
return;
|
||||
|
||||
// Scalarize unknown structures.
|
||||
Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
|
||||
if (!VL0) return;
|
||||
if (!VL0)
|
||||
return;
|
||||
|
||||
unsigned Opcode = VL0->getOpcode();
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// If not all of the instructions are identical then we have to scalarize.
|
||||
if (!I || Opcode != I->getOpcode()) return;
|
||||
if (!I || Opcode != I->getOpcode())
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0, e = VL.size(); i < e; ++i) {
|
||||
// Check that the instruction is only used within
|
||||
// one lane.
|
||||
if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
|
||||
if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i)
|
||||
return;
|
||||
// Make this instruction as 'seen' and remember the lane.
|
||||
LaneMap[VL[i]] = i;
|
||||
}
|
||||
|
@ -407,70 +437,71 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
// within our tree. At depth zero we have no local users, only external
|
||||
// users that we don't care about.
|
||||
if (Depth && I && I->getNumUses() > 1) {
|
||||
DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
|
||||
"because it has multiple users:" << *I << " \n");
|
||||
DEBUG(dbgs() << "SLP: Adding to MultiUserVals "
|
||||
"because it has multiple users:" << *I << " \n");
|
||||
MultiUserVals.insert(I);
|
||||
}
|
||||
}
|
||||
|
||||
switch (Opcode) {
|
||||
case Instruction::ExtractElement: {
|
||||
VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
|
||||
// No need to follow ExtractElements that are going to be optimized away.
|
||||
if (CanReuseExtract(VL, VL.size(), VecTy)) return;
|
||||
// Fall through.
|
||||
}
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
case Instruction::FPToSI:
|
||||
case Instruction::FPExt:
|
||||
case Instruction::PtrToInt:
|
||||
case Instruction::IntToPtr:
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::UIToFP:
|
||||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast:
|
||||
case Instruction::Select:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
||||
ValueList Operands;
|
||||
// Prepare the operand vector.
|
||||
for (unsigned j = 0; j < VL.size(); ++j)
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
|
||||
|
||||
getTreeUses_rec(Operands, Depth+1);
|
||||
}
|
||||
case Instruction::ExtractElement: {
|
||||
VectorType *VecTy = VectorType::get(VL[0]->getType(), VL.size());
|
||||
// No need to follow ExtractElements that are going to be optimized away.
|
||||
if (CanReuseExtract(VL, VL.size(), VecTy))
|
||||
return;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
// Fall through.
|
||||
}
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
case Instruction::FPToUI:
|
||||
case Instruction::FPToSI:
|
||||
case Instruction::FPExt:
|
||||
case Instruction::PtrToInt:
|
||||
case Instruction::IntToPtr:
|
||||
case Instruction::SIToFP:
|
||||
case Instruction::UIToFP:
|
||||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast:
|
||||
case Instruction::Select:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
case Instruction::FSub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::FMul:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
case Instruction::FRem:
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
||||
ValueList Operands;
|
||||
// Prepare the operand vector.
|
||||
for (unsigned j = 0; j < VL.size(); ++j)
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
|
||||
getTreeUses_rec(Operands, Depth+1);
|
||||
return;
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
|
||||
|
||||
getTreeUses_rec(Operands, Depth + 1);
|
||||
}
|
||||
default:
|
||||
return;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
ValueList Operands;
|
||||
for (unsigned j = 0; j < VL.size(); ++j)
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
|
||||
getTreeUses_rec(Operands, Depth + 1);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -482,10 +513,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
ScalarTy = SI->getValueOperand()->getType();
|
||||
|
||||
/// Don't mess with vectors.
|
||||
if (ScalarTy->isVectorTy()) return max_cost;
|
||||
if (ScalarTy->isVectorTy())
|
||||
return max_cost;
|
||||
|
||||
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
|
||||
|
||||
if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
|
||||
if (Depth == RecursionMaxDepth)
|
||||
return getScalarizationCost(VecTy);
|
||||
|
||||
// Check if all of the operands are constants.
|
||||
bool AllConst = true;
|
||||
|
@ -503,7 +537,8 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
}
|
||||
|
||||
// Is this a simple vector constant.
|
||||
if (AllConst) return 0;
|
||||
if (AllConst)
|
||||
return 0;
|
||||
|
||||
// If all of the operands are identical we can broadcast them.
|
||||
Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
|
||||
|
@ -523,14 +558,16 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
if (MustScalarizeFlag)
|
||||
return getScalarizationCost(VecTy);
|
||||
|
||||
if (!VL0) return getScalarizationCost(VecTy);
|
||||
if (!VL0)
|
||||
return getScalarizationCost(VecTy);
|
||||
assert(VL0->getParent() == BB && "Wrong BB");
|
||||
|
||||
unsigned Opcode = VL0->getOpcode();
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// If not all of the instructions are identical then we have to scalarize.
|
||||
if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
|
||||
if (!I || Opcode != I->getOpcode())
|
||||
return getScalarizationCost(VecTy);
|
||||
}
|
||||
|
||||
// Check if it is safe to sink the loads or the stores.
|
||||
|
@ -538,12 +575,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
int MaxIdx = getLastIndex(VL, VL.size());
|
||||
Instruction *Last = InstrVec[MaxIdx];
|
||||
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
|
||||
if (VL[i] == Last) continue;
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
if (VL[i] == Last)
|
||||
continue;
|
||||
Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
|
||||
if (Barrier) {
|
||||
DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
|
||||
*Last << "\n because of " << *Barrier << "\n");
|
||||
DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last
|
||||
<< "\n because of " << *Barrier << "\n");
|
||||
return max_cost;
|
||||
}
|
||||
}
|
||||
|
@ -554,7 +592,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
for (unsigned i = 0, e = VL.size(); i < e; ++i)
|
||||
if (MustExtract.count(VL[i]))
|
||||
ExternalUserExtractCost +=
|
||||
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
|
||||
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
|
||||
|
||||
switch (Opcode) {
|
||||
case Instruction::ExtractElement: {
|
||||
|
@ -585,8 +623,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
return getScalarizationCost(VecTy);
|
||||
}
|
||||
|
||||
Cost += getTreeCost_rec(Operands, Depth+1);
|
||||
if (Cost >= max_cost) return max_cost;
|
||||
Cost += getTreeCost_rec(Operands, Depth + 1);
|
||||
if (Cost >= max_cost)
|
||||
return max_cost;
|
||||
|
||||
// Calculate the cost of this instruction.
|
||||
int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
|
||||
|
@ -635,8 +674,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
for (unsigned j = 0; j < VL.size(); ++j)
|
||||
Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
|
||||
|
||||
Cost += getTreeCost_rec(Operands, Depth+1);
|
||||
if (Cost >= max_cost) return max_cost;
|
||||
Cost += getTreeCost_rec(Operands, Depth + 1);
|
||||
if (Cost >= max_cost)
|
||||
return max_cost;
|
||||
}
|
||||
|
||||
// Calculate the cost of this instruction.
|
||||
|
@ -645,12 +685,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
|
||||
Opcode == Instruction::Select) {
|
||||
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
|
||||
ScalarCost = VecTy->getNumElements() *
|
||||
TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
|
||||
ScalarCost =
|
||||
VecTy->getNumElements() *
|
||||
TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
|
||||
VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
|
||||
} else {
|
||||
ScalarCost = VecTy->getNumElements() *
|
||||
TTI->getArithmeticInstrCost(Opcode, ScalarTy);
|
||||
TTI->getArithmeticInstrCost(Opcode, ScalarTy);
|
||||
VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
|
||||
}
|
||||
Cost += (VecCost - ScalarCost);
|
||||
|
@ -658,21 +699,21 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
}
|
||||
case Instruction::Load: {
|
||||
// If we are scalarize the loads, add the cost of forming the vector.
|
||||
for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
|
||||
if (!isConsecutiveAccess(VL[i], VL[i+1]))
|
||||
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
||||
if (!isConsecutiveAccess(VL[i], VL[i + 1]))
|
||||
return getScalarizationCost(VecTy);
|
||||
|
||||
// Cost of wide load - cost of scalar loads.
|
||||
int ScalarLdCost = VecTy->getNumElements() *
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
|
||||
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
|
||||
return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
// We know that we can merge the stores. Calculate the cost.
|
||||
int ScalarStCost = VecTy->getNumElements() *
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
|
||||
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
|
||||
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
|
||||
int StoreCost = VecStCost - ScalarStCost;
|
||||
|
||||
ValueList Operands;
|
||||
|
@ -692,7 +733,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
|
||||
int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
|
||||
int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
|
||||
for (unsigned i = 0; i < VF; ++i )
|
||||
for (unsigned i = 0; i < VF; ++i)
|
||||
MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
|
||||
return MaxIdx;
|
||||
}
|
||||
|
@ -716,7 +757,7 @@ int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
|
|||
int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
|
||||
assert(I->getParent() == BB && "Invalid parent for instruction I");
|
||||
assert(J->getParent() == BB && "Invalid parent for instruction J");
|
||||
return std::max(InstrIdx[I],InstrIdx[J]);
|
||||
return std::max(InstrIdx[I], InstrIdx[J]);
|
||||
}
|
||||
|
||||
Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
|
||||
|
@ -725,7 +766,7 @@ Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
|
|||
|
||||
Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||
Value *Vec = UndefValue::get(Ty);
|
||||
for (unsigned i=0; i < Ty->getNumElements(); ++i) {
|
||||
for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
|
||||
// Generate the 'InsertElement' instruction.
|
||||
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
|
||||
// Remember that this instruction is used as part of a 'gather' sequence.
|
||||
|
@ -748,8 +789,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
|
|||
Value *V = vectorizeTree_rec(VL, VF);
|
||||
|
||||
int LastInstrIdx = getLastIndex(VL, VL.size());
|
||||
for (SetVector<Value*>::iterator it = MustExtract.begin(),
|
||||
e = MustExtract.end(); it != e; ++it) {
|
||||
for (SetVector<Value *>::iterator it = MustExtract.begin(),
|
||||
e = MustExtract.end();
|
||||
it != e; ++it) {
|
||||
Instruction *I = cast<Instruction>(*it);
|
||||
|
||||
// This is a scalarized value, so we can use the original value.
|
||||
|
@ -770,7 +812,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
|
|||
++U) {
|
||||
Instruction *UI = cast<Instruction>(*U);
|
||||
if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
|
||||
UI->replaceUsesOfWith(I ,Extract);
|
||||
UI->replaceUsesOfWith(I, Extract);
|
||||
Replaced = true;
|
||||
}
|
||||
assert(Replaced && "Must replace at least one outside user");
|
||||
|
@ -814,7 +856,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||
return Scalarize(VL, VecTy);
|
||||
|
||||
if (VectorizedValues.count(VL0)) {
|
||||
Value * Vec = VectorizedValues[VL0];
|
||||
Value *Vec = VectorizedValues[VL0];
|
||||
for (int i = 0; i < VF; ++i)
|
||||
VectorizedValues[VL[i]] = Vec;
|
||||
return Vec;
|
||||
|
@ -886,7 +928,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||
VectorizedValues[VL[i]] = V;
|
||||
|
||||
return V;
|
||||
|
||||
}
|
||||
case Instruction::Select: {
|
||||
ValueList TrueVec, FalseVec, CondVec;
|
||||
|
@ -933,7 +974,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||
Value *LHS = vectorizeTree_rec(LHSVL, VF);
|
||||
Value *RHS = vectorizeTree_rec(RHSVL, VF);
|
||||
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
|
||||
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
|
||||
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
|
||||
|
||||
for (int i = 0; i < VF; ++i)
|
||||
VectorizedValues[VL[i]] = V;
|
||||
|
@ -946,7 +987,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||
|
||||
// Check if all of the loads are consecutive.
|
||||
for (unsigned i = 1, e = VF; i < e; ++i)
|
||||
if (!isConsecutiveAccess(VL[i-1], VL[i]))
|
||||
if (!isConsecutiveAccess(VL[i - 1], VL[i]))
|
||||
return Scalarize(VL, VecTy);
|
||||
|
||||
// Loads are inserted at the head of the tree because we don't want to sink
|
||||
|
@ -972,8 +1013,8 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
|||
ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
|
||||
|
||||
Value *VecValue = vectorizeTree_rec(ValueOp, VF);
|
||||
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
|
||||
VecTy->getPointerTo());
|
||||
Value *VecPtr =
|
||||
Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());
|
||||
Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
|
||||
|
||||
for (int i = 0; i < VF; ++i)
|
||||
|
|
|
@ -25,23 +25,29 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class BasicBlock; class Instruction; class Type;
|
||||
class VectorType; class StoreInst; class Value;
|
||||
class ScalarEvolution; class DataLayout;
|
||||
class TargetTransformInfo; class AliasAnalysis;
|
||||
class BasicBlock;
|
||||
class Instruction;
|
||||
class Type;
|
||||
class VectorType;
|
||||
class StoreInst;
|
||||
class Value;
|
||||
class ScalarEvolution;
|
||||
class DataLayout;
|
||||
class TargetTransformInfo;
|
||||
class AliasAnalysis;
|
||||
class Loop;
|
||||
|
||||
/// Bottom Up SLP vectorization utility class.
|
||||
struct BoUpSLP {
|
||||
typedef SmallVector<Value*, 8> ValueList;
|
||||
typedef SmallVector<Instruction*, 16> InstrList;
|
||||
typedef SmallPtrSet<Value*, 16> ValueSet;
|
||||
typedef SmallVector<StoreInst*, 8> StoreList;
|
||||
static const int max_cost = 1<<20;
|
||||
struct BoUpSLP {
|
||||
typedef SmallVector<Value *, 8> ValueList;
|
||||
typedef SmallVector<Instruction *, 16> InstrList;
|
||||
typedef SmallPtrSet<Value *, 16> ValueSet;
|
||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||
static const int max_cost = 1 << 20;
|
||||
|
||||
// \brief C'tor.
|
||||
BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
|
||||
|
||||
/// \brief Take the pointer operand from the Load/Store instruction.
|
||||
/// \returns NULL if this is not a valid Load/Store instruction.
|
||||
|
@ -73,13 +79,13 @@ struct BoUpSLP {
|
|||
bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
|
||||
|
||||
/// \brief Vectorize a group of scalars into a vector tree.
|
||||
/// \returns the vectorized value.
|
||||
/// \returns the vectorized value.
|
||||
Value *vectorizeArith(ArrayRef<Value *> Operands);
|
||||
|
||||
/// \returns the list of new instructions that were added in order to collect
|
||||
/// scalars into vectors. This list can be used to further optimize the gather
|
||||
/// sequences.
|
||||
InstrList &getGatherSeqInstructions() {return GatherInstructions; }
|
||||
InstrList &getGatherSeqInstructions() { return GatherInstructions; }
|
||||
|
||||
private:
|
||||
/// \brief This method contains the recursive part of getTreeCost.
|
||||
|
@ -130,9 +136,9 @@ private:
|
|||
|
||||
private:
|
||||
/// Maps instructions to numbers and back.
|
||||
SmallDenseMap<Value*, int> InstrIdx;
|
||||
SmallDenseMap<Value *, int> InstrIdx;
|
||||
/// Maps integers to Instructions.
|
||||
std::vector<Instruction*> InstrVec;
|
||||
std::vector<Instruction *> InstrVec;
|
||||
|
||||
// -- containers that are used during getTreeCost -- //
|
||||
|
||||
|
@ -144,14 +150,14 @@ private:
|
|||
/// Contains values that have users outside of the vectorized graph.
|
||||
/// We need to generate extract instructions for these values.
|
||||
/// NOTICE: The vectorization methods also use this set.
|
||||
SetVector<Value*> MustExtract;
|
||||
SetVector<Value *> MustExtract;
|
||||
|
||||
/// Contains a list of values that are used outside the current tree. This
|
||||
/// set must be reset between runs.
|
||||
SetVector<Value*> MultiUserVals;
|
||||
SetVector<Value *> MultiUserVals;
|
||||
/// Maps values in the tree to the vector lanes that uses them. This map must
|
||||
/// be reset between runs of getCost.
|
||||
std::map<Value*, int> LaneMap;
|
||||
std::map<Value *, int> LaneMap;
|
||||
/// A list of instructions to ignore while sinking
|
||||
/// memory instructions. This map must be reset between runs of getCost.
|
||||
ValueSet MemBarrierIgnoreList;
|
||||
|
@ -159,8 +165,8 @@ private:
|
|||
// -- Containers that are used during vectorizeTree -- //
|
||||
|
||||
/// Maps between the first scalar to the vector. This map must be reset
|
||||
///between runs.
|
||||
DenseMap<Value*, Value*> VectorizedValues;
|
||||
/// between runs.
|
||||
DenseMap<Value *, Value *> VectorizedValues;
|
||||
|
||||
// -- Containers that are used after vectorization by the caller -- //
|
||||
|
||||
|
@ -169,7 +175,7 @@ private:
|
|||
/// Iterating over this list is faster than calling LICM.
|
||||
/// Notice: We insert NULL ptrs to separate between the different gather
|
||||
/// sequences.
|
||||
InstrList GatherInstructions;
|
||||
InstrList GatherInstructions;
|
||||
|
||||
/// Instruction builder to construct the vectorized tree.
|
||||
IRBuilder<> Builder;
|
||||
|
|
Loading…
Reference in New Issue