[SLP] Visualize SLP trees with -view-slp-tree

Analyzing larger trees is extremely difficult with the current debug output so
this adds GraphTraits and DOTGraphTraits on top of the VectorizableTree data
structure.  We can now display the SLP trees with Graphviz as in
https://reviews.llvm.org/F3132765.

I decorated the graph where a value needs to be gathered for one reason or
another.  These are the red nodes.

There are other improvement I am planning to make as I work through my case
here.  For example, I would also like to mark nodes that need to be extracted.

Differential Revision: https://reviews.llvm.org/D30731

llvm-svn: 297303
This commit is contained in:
Adam Nemet 2017-03-08 18:47:50 +00:00
parent 3388de1349
commit 95da05c3f5
1 changed files with 167 additions and 62 deletions

View File

@ -39,6 +39,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
@ -90,6 +91,10 @@ static cl::opt<unsigned> MinTreeSize(
"slp-min-tree-size", cl::init(3), cl::Hidden,
cl::desc("Only vectorize small trees if they are fully vectorizable"));
static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@ -417,7 +422,7 @@ private:
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
/// \returns True if the ExtractElement/ExtractValue instructions in VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
@ -466,8 +471,9 @@ private:
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
struct TreeEntry {
TreeEntry() : Scalars(), VectorizedValue(nullptr),
NeedToGather(0), NeedToShuffle(0) {}
TreeEntry(std::vector<TreeEntry> &Container)
: Scalars(), VectorizedValue(nullptr), NeedToGather(0),
NeedToShuffle(0), Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
@ -497,12 +503,24 @@ private:
/// Do we need to shuffle the load ?
bool NeedToShuffle;
/// Points back to the VectorizableTree.
///
/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
/// to be a pointer and needs to be able to initialize the child iterator.
/// Thus we need a reference back to the container to translate the indices
/// to entries.
std::vector<TreeEntry> &Container;
/// The TreeEntry index containing the user of this entry. We can actually
/// have multiple users so the data structure is not truly a tree.
SmallVector<int, 1> UserTreeIndices;
};
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
bool NeedToShuffle) {
VectorizableTree.emplace_back();
bool NeedToShuffle, int &UserTreeIdx) {
VectorizableTree.emplace_back(VectorizableTree);
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
@ -516,6 +534,10 @@ private:
} else {
MustGather.insert(VL.begin(), VL.end());
}
if (UserTreeIdx >= 0)
Last->UserTreeIndices.push_back(UserTreeIdx);
UserTreeIdx = idx;
return Last;
}
@ -739,6 +761,8 @@ private:
return os;
}
#endif
friend struct GraphTraits<BoUpSLP *>;
friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
///
@ -949,9 +973,78 @@ private:
/// original width.
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
} // end namespace slpvectorizer
template <> struct GraphTraits<BoUpSLP *> {
typedef BoUpSLP::TreeEntry TreeEntry;
/// NodeRef has to be a pointer per the GraphWriter.
typedef TreeEntry *NodeRef;
/// \brief Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
: public iterator_adaptor_base<ChildIteratorType,
SmallVector<int, 1>::iterator> {
std::vector<TreeEntry> &VectorizableTree;
ChildIteratorType(SmallVector<int, 1>::iterator W,
std::vector<TreeEntry> &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
NodeRef operator*() { return &VectorizableTree[*I]; }
};
static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
static ChildIteratorType child_begin(NodeRef N) {
return {N->UserTreeIndices.begin(), N->Container};
}
static ChildIteratorType child_end(NodeRef N) {
return {N->UserTreeIndices.end(), N->Container};
}
/// For the node iterator we just need to turn the TreeEntry iterator into a
/// TreeEntry* iterator so that it dereferences to NodeRef.
typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator;
static nodes_iterator nodes_begin(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.begin());
}
static nodes_iterator nodes_end(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.end());
}
static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
};
template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
typedef BoUpSLP::TreeEntry TreeEntry;
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *) {
std::string Str;
raw_string_ostream OS(Str);
if (isSplat(Entry->Scalars)) {
OS << "<splat> " << *Entry->Scalars[0];
return Str;
}
for (auto V : Entry->Scalars)
OS << *V << "\n";
return Str;
}
static std::string getNodeAttributes(const TreeEntry *Entry,
const BoUpSLP *) {
if (Entry->NeedToGather)
return "color=red";
return "";
}
};
} // end namespace llvm
} // end namespace slpvectorizer
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
@ -965,7 +1058,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
buildTree_rec(Roots, 0);
buildTree_rec(Roots, 0, -1);
// Collect the values that we need to extract from the tree.
for (TreeEntry &EIdx : VectorizableTree) {
@ -1023,28 +1116,28 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
int UserTreeIdx) {
bool isAltShuffle = false;
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
if (Depth == RecursionMaxDepth) {
DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
// Don't handle vectors.
if (VL[0]->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
if (SI->getValueOperand()->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
unsigned Opcode = getSameOpcode(VL);
@ -1061,7 +1154,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
@ -1073,7 +1166,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (EphValues.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is ephemeral.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
@ -1086,10 +1179,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
// Record the reuse of the tree node. FIXME, currently this is only used to
// properly draw the graph rather than for the actual vectorization.
E->UserTreeIndices.push_back(UserTreeIdx);
DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
return;
}
@ -1099,7 +1195,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (ScalarToTreeEntry.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is already in tree.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
@ -1109,7 +1205,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i])) {
DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
@ -1123,7 +1219,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
@ -1132,7 +1228,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned j = i+1; j < e; ++j)
if (VL[i] == VL[j]) {
DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
@ -1147,7 +1243,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
assert((!BS.getScheduleData(VL[0]) ||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@ -1164,12 +1260,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@ -1179,7 +1275,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
buildTree_rec(Operands, Depth + 1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1191,7 +1287,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
} else {
BS.cancelScheduling(VL);
}
newTreeEntry(VL, Reuse, false);
newTreeEntry(VL, Reuse, false, UserTreeIdx);
return;
}
case Instruction::Load: {
@ -1207,7 +1303,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
@ -1218,7 +1314,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
@ -1238,7 +1334,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Consecutive) {
++NumLoadsWantToKeepOrder;
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
}
@ -1264,14 +1360,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
if (ShuffledLoads) {
newTreeEntry(NewVL, true, true);
newTreeEntry(NewVL, true, true, UserTreeIdx);
return;
}
}
}
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
if (ReverseConsecutive) {
++NumLoadsWantToChangeOrder;
@ -1298,12 +1394,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Type *Ty = cast<Instruction>(Val)->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
}
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@ -1312,7 +1408,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
buildTree_rec(Operands, Depth+1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1326,13 +1422,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
}
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of compares.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@ -1341,7 +1437,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
buildTree_rec(Operands, Depth+1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1364,7 +1460,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
// Sort operands of the instructions so that each side is more likely to
@ -1372,8 +1468,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
buildTree_rec(Left, Depth + 1, UserTreeIdx);
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
@ -1383,7 +1479,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
buildTree_rec(Operands, Depth+1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1393,7 +1489,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (cast<Instruction>(Val)->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
@ -1406,7 +1502,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
@ -1418,12 +1514,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
@ -1431,7 +1527,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
buildTree_rec(Operands, Depth + 1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1440,19 +1536,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
buildTree_rec(Operands, Depth + 1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
return;
}
case Instruction::Call: {
@ -1463,7 +1559,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
@ -1477,7 +1573,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
@ -1488,7 +1584,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
<< "\n");
@ -1501,14 +1597,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
<< *VL[i] << '\n');
return;
}
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@ -1516,7 +1612,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
buildTree_rec(Operands, Depth + 1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@ -1525,19 +1621,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// then do not vectorize this instruction.
if (!isAltShuffle) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
newTreeEntry(VL, true, false);
newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderAltShuffleOperands(VL, Left, Right);
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
buildTree_rec(Left, Depth + 1, UserTreeIdx);
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
@ -1547,13 +1643,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
buildTree_rec(Operands, Depth + 1);
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
default:
BS.cancelScheduling(VL);
newTreeEntry(VL, false, false);
newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
@ -2020,9 +2116,18 @@ int BoUpSLP::getTreeCost() {
int SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n"
<< "SLP: Extract Cost = " << ExtractCost << ".\n"
<< "SLP: Total Cost = " << Cost << ".\n");
std::string Str;
{
raw_string_ostream OS(Str);
OS << "SLP: Spill Cost = " << SpillCost << ".\n"
<< "SLP: Extract Cost = " << ExtractCost << ".\n"
<< "SLP: Total Cost = " << Cost << ".\n";
}
DEBUG(dbgs() << Str);
if (ViewSLPTree)
ViewGraph(this, "SLP" + F->getName(), false, Str);
return Cost;
}