forked from OSchip/llvm-project
Revert rL355906: [SLP] Remove redundancy of performing operand reordering twice: once in buildTree() and later in vectorizeTree().
This is a refactoring patch that removes the redundancy of performing operand reordering twice, once in buildTree() and later in vectorizeTree(). To achieve this we need to keep track of the operands within the TreeEntry struct while building the tree, and later in vectorizeTree() we are just accessing them from the TreeEntry in the right order. This patch is the first in a series of patches that will allow for better operand reordering across chains of instructions (e.g., a chain of ADDs), as presented here: https://www.youtube.com/watch?v=gIEn34LvyNo Patch by: @vporpo (Vasileios Porpodas) Differential Revision: https://reviews.llvm.org/D59059 ........ Reverted due to buildbot failures that I don't have time to track down. llvm-svn: 355913
This commit is contained in:
parent
5db95efdbd
commit
d3a8fd8bfb
|
@ -603,23 +603,6 @@ public:
|
|||
|
||||
OptimizationRemarkEmitter *getORE() { return ORE; }
|
||||
|
||||
/// This structure holds any data we need about the edges being traversed
|
||||
/// during buildTree_rec(). We keep track of:
|
||||
/// (i) the user TreeEntry index, and
|
||||
/// (ii) the index of the edge.
|
||||
struct EdgeInfo {
|
||||
EdgeInfo() = default;
|
||||
/// The index of the user TreeEntry in VectorizableTree.
|
||||
int Idx = -1;
|
||||
/// The operand index of the use.
|
||||
unsigned EdgeIdx = UINT_MAX;
|
||||
|
||||
/// Debug print.
|
||||
void dump(raw_ostream &OS) const {
|
||||
OS << "{User:" << Idx << " EdgeIdx:" << EdgeIdx << "}";
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
struct TreeEntry;
|
||||
|
||||
|
@ -630,7 +613,7 @@ private:
|
|||
int getEntryCost(TreeEntry *E);
|
||||
|
||||
/// This is the recursive part of buildTree.
|
||||
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, EdgeInfo EI);
|
||||
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
|
||||
|
||||
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
|
||||
/// be vectorized to use the original vector (or aggregate "bitcast" to a
|
||||
|
@ -717,92 +700,11 @@ private:
|
|||
|
||||
/// The TreeEntry index containing the user of this entry. We can actually
|
||||
/// have multiple users so the data structure is not truly a tree.
|
||||
SmallVector<EdgeInfo, 1> UserTreeIndices;
|
||||
|
||||
private:
|
||||
/// The operands of each instruction in each lane Operands[op_index][lane].
|
||||
/// Note: This helps avoid the replication of the code that performs the
|
||||
/// reordering of operands during buildTree_rec() and vectorizeTree().
|
||||
SmallVector<ValueList, 2> Operands;
|
||||
|
||||
public:
|
||||
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
|
||||
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
|
||||
ArrayRef<unsigned> ReuseShuffleIndices) {
|
||||
if (Operands.size() < OpIdx + 1)
|
||||
Operands.resize(OpIdx + 1);
|
||||
assert(Operands[OpIdx].size() == 0 && "Already resized?");
|
||||
Operands[OpIdx].resize(Scalars.size());
|
||||
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
|
||||
Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
|
||||
? OpVL[ReuseShuffleIndices[Lane]]
|
||||
: OpVL[Lane];
|
||||
}
|
||||
|
||||
/// If there is a user TreeEntry, then set its operand.
|
||||
void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
|
||||
ArrayRef<Value *> OpVL,
|
||||
ArrayRef<unsigned> ReuseShuffleIndices) {
|
||||
if (UserTreeIdx.Idx >= 0) {
|
||||
auto &VectorizableTree = Container;
|
||||
VectorizableTree[UserTreeIdx.Idx].setOperand(UserTreeIdx.EdgeIdx, OpVL,
|
||||
ReuseShuffleIndices);
|
||||
}
|
||||
}
|
||||
|
||||
/// \returns the \p OpIdx operand of this TreeEntry.
|
||||
ValueList &getOperand(unsigned OpIdx) {
|
||||
assert(OpIdx < Operands.size() && "Off bounds");
|
||||
return Operands[OpIdx];
|
||||
}
|
||||
|
||||
/// \return the single \p OpIdx operand.
|
||||
Value *getSingleOperand(unsigned OpIdx) const {
|
||||
assert(OpIdx < Operands.size() && "Off bounds");
|
||||
assert(!Operands[OpIdx].empty() && "No operand availabe");
|
||||
return Operands[OpIdx][0];
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Debug printer.
|
||||
LLVM_DUMP_METHOD void dump() const {
|
||||
for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
|
||||
dbgs() << "Operand " << OpI << ":\n";
|
||||
for (const Value *V : Operands[OpI])
|
||||
dbgs().indent(2) << *V << "\n";
|
||||
}
|
||||
dbgs() << "Scalars: \n";
|
||||
for (Value *V : Scalars)
|
||||
dbgs().indent(2) << *V << "\n";
|
||||
dbgs() << "NeedToGather: " << NeedToGather << "\n";
|
||||
dbgs() << "VectorizedValue: ";
|
||||
if (VectorizedValue)
|
||||
dbgs() << *VectorizedValue;
|
||||
else
|
||||
dbgs() << "NULL";
|
||||
dbgs() << "\n";
|
||||
dbgs() << "ReuseShuffleIndices: ";
|
||||
if (ReuseShuffleIndices.empty())
|
||||
dbgs() << "Emtpy";
|
||||
else
|
||||
for (unsigned Idx : ReuseShuffleIndices)
|
||||
dbgs() << Idx << ", ";
|
||||
dbgs() << "\n";
|
||||
dbgs() << "ReorderIndices: ";
|
||||
for (unsigned Idx : ReorderIndices)
|
||||
dbgs() << Idx << ", ";
|
||||
dbgs() << "\n";
|
||||
dbgs() << "UserTreeIndices: ";
|
||||
for (const auto &EInfo : UserTreeIndices)
|
||||
dbgs() << EInfo << ", ";
|
||||
dbgs() << "\n";
|
||||
}
|
||||
#endif
|
||||
SmallVector<int, 1> UserTreeIndices;
|
||||
};
|
||||
|
||||
/// Create a new VectorizableTree entry.
|
||||
void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
|
||||
EdgeInfo &UserTreeIdx,
|
||||
void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
|
||||
ArrayRef<unsigned> ReuseShuffleIndices = None,
|
||||
ArrayRef<unsigned> ReorderIndices = None) {
|
||||
VectorizableTree.emplace_back(VectorizableTree);
|
||||
|
@ -822,29 +724,15 @@ private:
|
|||
MustGather.insert(VL.begin(), VL.end());
|
||||
}
|
||||
|
||||
if (UserTreeIdx.Idx >= 0)
|
||||
if (UserTreeIdx >= 0)
|
||||
Last->UserTreeIndices.push_back(UserTreeIdx);
|
||||
|
||||
Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
|
||||
|
||||
UserTreeIdx.Idx = idx;
|
||||
UserTreeIdx = idx;
|
||||
}
|
||||
|
||||
/// -- Vectorization State --
|
||||
/// Holds all of the tree entries.
|
||||
std::vector<TreeEntry> VectorizableTree;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Debug printer.
|
||||
LLVM_DUMP_METHOD void dumpVectorizableTree() const {
|
||||
for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
|
||||
dbgs() << Id << ".\n";
|
||||
VectorizableTree[Id].dump();
|
||||
dbgs() << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TreeEntry *getTreeEntry(Value *V) {
|
||||
auto I = ScalarToTreeEntry.find(V);
|
||||
if (I != ScalarToTreeEntry.end())
|
||||
|
@ -1068,11 +956,6 @@ private:
|
|||
SD.dump(os);
|
||||
return os;
|
||||
}
|
||||
friend inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const BoUpSLP::EdgeInfo &EI) {
|
||||
EI.dump(OS);
|
||||
return OS;
|
||||
}
|
||||
#endif
|
||||
|
||||
friend struct GraphTraits<BoUpSLP *>;
|
||||
|
@ -1366,15 +1249,15 @@ template <> struct GraphTraits<BoUpSLP *> {
|
|||
/// Add the VectorizableTree to the index iterator to be able to return
|
||||
/// TreeEntry pointers.
|
||||
struct ChildIteratorType
|
||||
: public iterator_adaptor_base<
|
||||
ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
|
||||
: public iterator_adaptor_base<ChildIteratorType,
|
||||
SmallVector<int, 1>::iterator> {
|
||||
std::vector<TreeEntry> &VectorizableTree;
|
||||
|
||||
ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
|
||||
ChildIteratorType(SmallVector<int, 1>::iterator W,
|
||||
std::vector<TreeEntry> &VT)
|
||||
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
|
||||
|
||||
NodeRef operator*() { return &VectorizableTree[I->Idx]; }
|
||||
NodeRef operator*() { return &VectorizableTree[*I]; }
|
||||
};
|
||||
|
||||
static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
|
||||
|
@ -1448,7 +1331,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
|||
UserIgnoreList = UserIgnoreLst;
|
||||
if (!allSameType(Roots))
|
||||
return;
|
||||
buildTree_rec(Roots, 0, EdgeInfo());
|
||||
buildTree_rec(Roots, 0, -1);
|
||||
|
||||
// Collect the values that we need to extract from the tree.
|
||||
for (TreeEntry &EIdx : VectorizableTree) {
|
||||
|
@ -1510,7 +1393,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
|||
}
|
||||
|
||||
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
EdgeInfo UserTreeIdx) {
|
||||
int UserTreeIdx) {
|
||||
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
|
||||
|
||||
InstructionsState S = getSameOpcode(VL);
|
||||
|
@ -1567,7 +1450,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
E->UserTreeIndices.push_back(UserTreeIdx);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
|
||||
<< ".\n");
|
||||
E->trySetUserTEOperand(UserTreeIdx, VL, None);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1677,7 +1559,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
|
||||
PH->getIncomingBlock(i)));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -1691,11 +1572,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
++NumOpsWantToKeepOriginalOrder;
|
||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
// This is a special case, as it does not gather, but at the same time
|
||||
// we are not extending buildTree_rec() towards the operands.
|
||||
ValueList Op0;
|
||||
Op0.assign(VL.size(), VL0->getOperand(0));
|
||||
VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
if (!CurrentOrder.empty()) {
|
||||
|
@ -1713,11 +1589,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
++StoredCurrentOrderAndNum->getSecond();
|
||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
|
||||
StoredCurrentOrderAndNum->getFirst());
|
||||
// This is a special case, as it does not gather, but at the same time
|
||||
// we are not extending buildTree_rec() towards the operands.
|
||||
ValueList Op0;
|
||||
Op0.assign(VL.size(), VL0->getOperand(0));
|
||||
VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
|
||||
|
@ -1832,7 +1703,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(i));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -1863,7 +1733,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(i));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -1895,9 +1764,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
||||
ValueList Left, Right;
|
||||
reorderInputsAccordingToOpcode(S.getOpcode(), VL, Left, Right);
|
||||
UserTreeIdx.EdgeIdx = 0;
|
||||
buildTree_rec(Left, Depth + 1, UserTreeIdx);
|
||||
UserTreeIdx.EdgeIdx = 1;
|
||||
buildTree_rec(Right, Depth + 1, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
@ -1908,7 +1775,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(i));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -1958,7 +1824,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(i));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -1980,7 +1845,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(0));
|
||||
|
||||
UserTreeIdx.EdgeIdx = 0;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
@ -2049,7 +1913,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
CallInst *CI2 = dyn_cast<CallInst>(j);
|
||||
Operands.push_back(CI2->getArgOperand(i));
|
||||
}
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -2070,9 +1933,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
if (isa<BinaryOperator>(VL0)) {
|
||||
ValueList Left, Right;
|
||||
reorderAltShuffleOperands(S, VL, Left, Right);
|
||||
UserTreeIdx.EdgeIdx = 0;
|
||||
buildTree_rec(Left, Depth + 1, UserTreeIdx);
|
||||
UserTreeIdx.EdgeIdx = 1;
|
||||
buildTree_rec(Right, Depth + 1, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
@ -2083,7 +1944,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
for (Value *j : VL)
|
||||
Operands.push_back(cast<Instruction>(j)->getOperand(i));
|
||||
|
||||
UserTreeIdx.EdgeIdx = i;
|
||||
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
|
||||
}
|
||||
return;
|
||||
|
@ -3228,9 +3088,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Prepare the operand vector.
|
||||
for (Value *V : E->Scalars)
|
||||
Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
|
||||
|
||||
Builder.SetInsertPoint(IBB->getTerminator());
|
||||
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
|
||||
Value *Vec = vectorizeTree(E->getOperand(i));
|
||||
Value *Vec = vectorizeTree(Operands);
|
||||
NewPhi->addIncoming(Vec, IBB);
|
||||
}
|
||||
|
||||
|
@ -3241,7 +3105,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
|
||||
case Instruction::ExtractElement: {
|
||||
if (!E->NeedToGather) {
|
||||
Value *V = E->getSingleOperand(0);
|
||||
Value *V = VL0->getOperand(0);
|
||||
if (!E->ReorderIndices.empty()) {
|
||||
OrdersType Mask;
|
||||
inversePermutation(E->ReorderIndices, Mask);
|
||||
|
@ -3274,7 +3138,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::ExtractValue: {
|
||||
if (!E->NeedToGather) {
|
||||
LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
|
||||
LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
|
||||
Builder.SetInsertPoint(LI);
|
||||
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
|
||||
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
|
||||
|
@ -3319,9 +3183,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast: {
|
||||
ValueList INVL;
|
||||
for (Value *V : E->Scalars)
|
||||
INVL.push_back(cast<Instruction>(V)->getOperand(0));
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *InVec = vectorizeTree(E->getOperand(0));
|
||||
Value *InVec = vectorizeTree(INVL);
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
|
||||
|
@ -3340,10 +3208,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::FCmp:
|
||||
case Instruction::ICmp: {
|
||||
ValueList LHSV, RHSV;
|
||||
for (Value *V : E->Scalars) {
|
||||
LHSV.push_back(cast<Instruction>(V)->getOperand(0));
|
||||
RHSV.push_back(cast<Instruction>(V)->getOperand(1));
|
||||
}
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *L = vectorizeTree(E->getOperand(0));
|
||||
Value *R = vectorizeTree(E->getOperand(1));
|
||||
Value *L = vectorizeTree(LHSV);
|
||||
Value *R = vectorizeTree(RHSV);
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
|
||||
|
@ -3367,11 +3241,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
return V;
|
||||
}
|
||||
case Instruction::Select: {
|
||||
ValueList TrueVec, FalseVec, CondVec;
|
||||
for (Value *V : E->Scalars) {
|
||||
CondVec.push_back(cast<Instruction>(V)->getOperand(0));
|
||||
TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
|
||||
FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
|
||||
}
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *Cond = vectorizeTree(E->getOperand(0));
|
||||
Value *True = vectorizeTree(E->getOperand(1));
|
||||
Value *False = vectorizeTree(E->getOperand(2));
|
||||
Value *Cond = vectorizeTree(CondVec);
|
||||
Value *True = vectorizeTree(TrueVec);
|
||||
Value *False = vectorizeTree(FalseVec);
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
|
||||
|
@ -3405,10 +3286,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
ValueList LHSVL, RHSVL;
|
||||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
|
||||
reorderInputsAccordingToOpcode(S.getOpcode(), E->Scalars, LHSVL,
|
||||
RHSVL);
|
||||
else
|
||||
for (Value *V : E->Scalars) {
|
||||
auto *I = cast<Instruction>(V);
|
||||
LHSVL.push_back(I->getOperand(0));
|
||||
RHSVL.push_back(I->getOperand(1));
|
||||
}
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *LHS = vectorizeTree(E->getOperand(0));
|
||||
Value *RHS = vectorizeTree(E->getOperand(1));
|
||||
Value *LHS = vectorizeTree(LHSVL);
|
||||
Value *RHS = vectorizeTree(RHSVL);
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
|
||||
|
@ -3481,9 +3373,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
unsigned Alignment = SI->getAlignment();
|
||||
unsigned AS = SI->getPointerAddressSpace();
|
||||
|
||||
ValueList ScalarStoreValues;
|
||||
for (Value *V : E->Scalars)
|
||||
ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *VecValue = vectorizeTree(E->getOperand(0));
|
||||
Value *VecValue = vectorizeTree(ScalarStoreValues);
|
||||
Value *ScalarPtr = SI->getPointerOperand();
|
||||
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
|
||||
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
|
||||
|
@ -3510,12 +3406,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
case Instruction::GetElementPtr: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *Op0 = vectorizeTree(E->getOperand(0));
|
||||
ValueList Op0VL;
|
||||
for (Value *V : E->Scalars)
|
||||
Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
|
||||
|
||||
Value *Op0 = vectorizeTree(Op0VL);
|
||||
|
||||
std::vector<Value *> OpVecs;
|
||||
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
|
||||
++j) {
|
||||
Value *OpVec = vectorizeTree(E->getOperand(j));
|
||||
ValueList OpVL;
|
||||
for (Value *V : E->Scalars)
|
||||
OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
|
||||
|
||||
Value *OpVec = vectorizeTree(OpVL);
|
||||
OpVecs.push_back(OpVec);
|
||||
}
|
||||
|
||||
|
@ -3553,8 +3457,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
OpVecs.push_back(CEI->getArgOperand(j));
|
||||
continue;
|
||||
}
|
||||
for (Value *V : E->Scalars) {
|
||||
CallInst *CEI = cast<CallInst>(V);
|
||||
OpVL.push_back(CEI->getArgOperand(j));
|
||||
}
|
||||
|
||||
Value *OpVec = vectorizeTree(E->getOperand(j));
|
||||
Value *OpVec = vectorizeTree(OpVL);
|
||||
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
|
||||
OpVecs.push_back(OpVec);
|
||||
}
|
||||
|
@ -3583,6 +3491,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
return V;
|
||||
}
|
||||
case Instruction::ShuffleVector: {
|
||||
ValueList LHSVL, RHSVL;
|
||||
assert(S.isAltShuffle() &&
|
||||
((Instruction::isBinaryOp(S.getOpcode()) &&
|
||||
Instruction::isBinaryOp(S.getAltOpcode())) ||
|
||||
|
@ -3592,12 +3501,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
|
||||
Value *LHS, *RHS;
|
||||
if (Instruction::isBinaryOp(S.getOpcode())) {
|
||||
reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
LHS = vectorizeTree(E->getOperand(0));
|
||||
RHS = vectorizeTree(E->getOperand(1));
|
||||
LHS = vectorizeTree(LHSVL);
|
||||
RHS = vectorizeTree(RHSVL);
|
||||
} else {
|
||||
ValueList INVL;
|
||||
for (Value *V : E->Scalars)
|
||||
INVL.push_back(cast<Instruction>(V)->getOperand(0));
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
LHS = vectorizeTree(E->getOperand(0));
|
||||
LHS = vectorizeTree(INVL);
|
||||
}
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
|
|
Loading…
Reference in New Issue