forked from OSchip/llvm-project
[SLPVectorizer] Make the scheduler aware of the TreeEntry operands.
Summary: The scheduler's dependence graph gets the use-def dependencies by accessing the operands of the instructions in a bundle. However, buildTree_rec() may change the order of the operands in TreeEntry, and the scheduler is currently not aware of this. This is not causing any functional issues currently, because reordering is restricted to the operands of a single instruction. Once we support operand reordering across multiple TreeEntries, as shown here: http://www.llvm.org/devmtg/2019-04/slides/Poster-Porpodas-Supernode_SLP.pdf , the scheduler will need to get the correct operands from TreeEntry and not from the individual instructions. In short, this patch: - Connects the scheduler's bundle with the corresponding TreeEntry. It introduces new TE and Lane fields in ScheduleData. - Moves the location where the operands of the TreeEntry are initialized. This used to take place in newTreeEntry() setting one operand at a time, but is now moved pre-order just before the recursion of buildTree_rec(). This is required because the scheduler needs to access both operands of the TreeEntry in tryScheduleBundle(). - Updates the scheduler to access the instruction operands through the TreeEntry operands instead of accessing the instruction operands directly. Reviewers: ABataev, RKSimon, dtemirbulatov, Ayal, dorit, hfinkel Reviewed By: ABataev Subscribers: hiraditya, llvm-commits, lebedev.ri, rcorcs Tags: #llvm Differential Revision: https://reviews.llvm.org/D62432 llvm-svn: 369131
This commit is contained in:
parent
7e106445ef
commit
1d254f3dae
|
@ -486,6 +486,7 @@ namespace slpvectorizer {
|
||||||
/// Bottom Up SLP Vectorizer.
|
/// Bottom Up SLP Vectorizer.
|
||||||
class BoUpSLP {
|
class BoUpSLP {
|
||||||
struct TreeEntry;
|
struct TreeEntry;
|
||||||
|
struct ScheduleData;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using ValueList = SmallVector<Value *, 8>;
|
using ValueList = SmallVector<Value *, 8>;
|
||||||
|
@ -1222,25 +1223,31 @@ private:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
|
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
|
||||||
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
|
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
|
||||||
ArrayRef<unsigned> ReuseShuffleIndices) {
|
|
||||||
if (Operands.size() < OpIdx + 1)
|
if (Operands.size() < OpIdx + 1)
|
||||||
Operands.resize(OpIdx + 1);
|
Operands.resize(OpIdx + 1);
|
||||||
assert(Operands[OpIdx].size() == 0 && "Already resized?");
|
assert(Operands[OpIdx].size() == 0 && "Already resized?");
|
||||||
Operands[OpIdx].resize(Scalars.size());
|
Operands[OpIdx].resize(Scalars.size());
|
||||||
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
|
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
|
||||||
Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
|
Operands[OpIdx][Lane] = OpVL[Lane];
|
||||||
? OpVL[ReuseShuffleIndices[Lane]]
|
|
||||||
: OpVL[Lane];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If there is a user TreeEntry, then set its operand.
|
/// Set the operands of this bundle in their original order.
|
||||||
void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
|
void setOperandsInOrder() {
|
||||||
ArrayRef<Value *> OpVL,
|
assert(Operands.empty() && "Already initialized?");
|
||||||
ArrayRef<unsigned> ReuseShuffleIndices) {
|
auto *I0 = cast<Instruction>(Scalars[0]);
|
||||||
if (UserTreeIdx.UserTE)
|
Operands.resize(I0->getNumOperands());
|
||||||
UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
|
unsigned NumLanes = Scalars.size();
|
||||||
ReuseShuffleIndices);
|
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
|
||||||
|
OpIdx != NumOperands; ++OpIdx) {
|
||||||
|
Operands[OpIdx].resize(NumLanes);
|
||||||
|
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
|
||||||
|
auto *I = cast<Instruction>(Scalars[Lane]);
|
||||||
|
assert(I->getNumOperands() == NumOperands &&
|
||||||
|
"Expected same number of operands");
|
||||||
|
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns the \p OpIdx operand of this TreeEntry.
|
/// \returns the \p OpIdx operand of this TreeEntry.
|
||||||
|
@ -1249,6 +1256,9 @@ private:
|
||||||
return Operands[OpIdx];
|
return Operands[OpIdx];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \returns the number of operands.
|
||||||
|
unsigned getNumOperands() const { return Operands.size(); }
|
||||||
|
|
||||||
/// \return the single \p OpIdx operand.
|
/// \return the single \p OpIdx operand.
|
||||||
Value *getSingleOperand(unsigned OpIdx) const {
|
Value *getSingleOperand(unsigned OpIdx) const {
|
||||||
assert(OpIdx < Operands.size() && "Off bounds");
|
assert(OpIdx < Operands.size() && "Off bounds");
|
||||||
|
@ -1295,10 +1305,12 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Create a new VectorizableTree entry.
|
/// Create a new VectorizableTree entry.
|
||||||
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
|
TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
|
||||||
|
Optional<ScheduleData *> Bundle,
|
||||||
const EdgeInfo &UserTreeIdx,
|
const EdgeInfo &UserTreeIdx,
|
||||||
ArrayRef<unsigned> ReuseShuffleIndices = None,
|
ArrayRef<unsigned> ReuseShuffleIndices = None,
|
||||||
ArrayRef<unsigned> ReorderIndices = None) {
|
ArrayRef<unsigned> ReorderIndices = None) {
|
||||||
|
bool Vectorized = (bool)Bundle;
|
||||||
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
|
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
|
||||||
TreeEntry *Last = VectorizableTree.back().get();
|
TreeEntry *Last = VectorizableTree.back().get();
|
||||||
Last->Idx = VectorizableTree.size() - 1;
|
Last->Idx = VectorizableTree.size() - 1;
|
||||||
|
@ -1312,6 +1324,16 @@ private:
|
||||||
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
|
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
|
||||||
ScalarToTreeEntry[VL[i]] = Last;
|
ScalarToTreeEntry[VL[i]] = Last;
|
||||||
}
|
}
|
||||||
|
// Update the scheduler bundle to point to this TreeEntry.
|
||||||
|
unsigned Lane = 0;
|
||||||
|
for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
|
||||||
|
BundleMember = BundleMember->NextInBundle) {
|
||||||
|
BundleMember->TE = Last;
|
||||||
|
BundleMember->Lane = Lane;
|
||||||
|
++Lane;
|
||||||
|
}
|
||||||
|
assert((!Bundle.getValue() || Lane == VL.size()) &&
|
||||||
|
"Bundle and VL out of sync");
|
||||||
} else {
|
} else {
|
||||||
MustGather.insert(VL.begin(), VL.end());
|
MustGather.insert(VL.begin(), VL.end());
|
||||||
}
|
}
|
||||||
|
@ -1319,7 +1341,6 @@ private:
|
||||||
if (UserTreeIdx.UserTE)
|
if (UserTreeIdx.UserTE)
|
||||||
Last->UserTreeIndices.push_back(UserTreeIdx);
|
Last->UserTreeIndices.push_back(UserTreeIdx);
|
||||||
|
|
||||||
Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
|
|
||||||
return Last;
|
return Last;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1453,6 +1474,8 @@ private:
|
||||||
UnscheduledDepsInBundle = UnscheduledDeps;
|
UnscheduledDepsInBundle = UnscheduledDeps;
|
||||||
clearDependencies();
|
clearDependencies();
|
||||||
OpValue = OpVal;
|
OpValue = OpVal;
|
||||||
|
TE = nullptr;
|
||||||
|
Lane = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if the dependency information has been calculated.
|
/// Returns true if the dependency information has been calculated.
|
||||||
|
@ -1559,6 +1582,12 @@ private:
|
||||||
|
|
||||||
/// Opcode of the current instruction in the schedule data.
|
/// Opcode of the current instruction in the schedule data.
|
||||||
Value *OpValue = nullptr;
|
Value *OpValue = nullptr;
|
||||||
|
|
||||||
|
/// The TreeEntry that this instruction corresponds to.
|
||||||
|
TreeEntry *TE = nullptr;
|
||||||
|
|
||||||
|
/// The lane of this node in the TreeEntry.
|
||||||
|
int Lane = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
@ -1633,10 +1662,9 @@ private:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Handle the def-use chain dependencies.
|
// Handle the def-use chain dependencies.
|
||||||
for (Use &U : BundleMember->Inst->operands()) {
|
|
||||||
auto *I = dyn_cast<Instruction>(U.get());
|
// Decrement the unscheduled counter and insert to ready list if ready.
|
||||||
if (!I)
|
auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
|
||||||
continue;
|
|
||||||
doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
|
doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
|
||||||
if (OpDef && OpDef->hasValidDependencies() &&
|
if (OpDef && OpDef->hasValidDependencies() &&
|
||||||
OpDef->incrementUnscheduledDeps(-1) == 0) {
|
OpDef->incrementUnscheduledDeps(-1) == 0) {
|
||||||
|
@ -1651,6 +1679,24 @@ private:
|
||||||
<< "SLP: gets ready (def): " << *DepBundle << "\n");
|
<< "SLP: gets ready (def): " << *DepBundle << "\n");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// If BundleMember is a vector bundle, its operands may have been
|
||||||
|
// reordered duiring buildTree(). We therefore need to get its operands
|
||||||
|
// through the TreeEntry.
|
||||||
|
if (TreeEntry *TE = BundleMember->TE) {
|
||||||
|
int Lane = BundleMember->Lane;
|
||||||
|
assert(Lane >= 0 && "Lane not set");
|
||||||
|
for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
|
||||||
|
OpIdx != NumOperands; ++OpIdx)
|
||||||
|
if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
|
||||||
|
DecrUnsched(I);
|
||||||
|
} else {
|
||||||
|
// If BundleMember is a stand-alone instruction, no operand reordering
|
||||||
|
// has taken place, so we directly access its operands.
|
||||||
|
for (Use &U : BundleMember->Inst->operands())
|
||||||
|
if (auto *I = dyn_cast<Instruction>(U.get()))
|
||||||
|
DecrUnsched(I);
|
||||||
}
|
}
|
||||||
// Handle the memory dependencies.
|
// Handle the memory dependencies.
|
||||||
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
|
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
|
||||||
|
@ -1697,8 +1743,11 @@ private:
|
||||||
/// Checks if a bundle of instructions can be scheduled, i.e. has no
|
/// Checks if a bundle of instructions can be scheduled, i.e. has no
|
||||||
/// cyclic dependencies. This is only a dry-run, no instructions are
|
/// cyclic dependencies. This is only a dry-run, no instructions are
|
||||||
/// actually moved at this stage.
|
/// actually moved at this stage.
|
||||||
bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
|
/// \returns the scheduling bundle. The returned Optional value is non-None
|
||||||
const InstructionsState &S);
|
/// if \p VL is allowed to be scheduled.
|
||||||
|
Optional<ScheduleData *>
|
||||||
|
tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
|
||||||
|
const InstructionsState &S);
|
||||||
|
|
||||||
/// Un-bundles a group of instructions.
|
/// Un-bundles a group of instructions.
|
||||||
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
|
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
|
||||||
|
@ -2026,28 +2075,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
InstructionsState S = getSameOpcode(VL);
|
InstructionsState S = getSameOpcode(VL);
|
||||||
if (Depth == RecursionMaxDepth) {
|
if (Depth == RecursionMaxDepth) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't handle vectors.
|
// Don't handle vectors.
|
||||||
if (S.OpValue->getType()->isVectorTy()) {
|
if (S.OpValue->getType()->isVectorTy()) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
|
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
|
||||||
if (SI->getValueOperand()->getType()->isVectorTy()) {
|
if (SI->getValueOperand()->getType()->isVectorTy()) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If all of the operands are identical or constant we have a simple solution.
|
// If all of the operands are identical or constant we have a simple solution.
|
||||||
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
|
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2059,7 +2108,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (EphValues.count(VL[i])) {
|
if (EphValues.count(VL[i])) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
||||||
<< ") is ephemeral.\n");
|
<< ") is ephemeral.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2069,7 +2118,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
|
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
|
||||||
if (!E->isSame(VL)) {
|
if (!E->isSame(VL)) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Record the reuse of the tree node. FIXME, currently this is only used to
|
// Record the reuse of the tree node. FIXME, currently this is only used to
|
||||||
|
@ -2077,7 +2126,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
E->UserTreeIndices.push_back(UserTreeIdx);
|
E->UserTreeIndices.push_back(UserTreeIdx);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
|
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
|
||||||
<< ".\n");
|
<< ".\n");
|
||||||
E->trySetUserTEOperand(UserTreeIdx, VL, None);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2089,7 +2137,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (getTreeEntry(I)) {
|
if (getTreeEntry(I)) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
||||||
<< ") is already in tree.\n");
|
<< ") is already in tree.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2100,7 +2148,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
|
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
|
||||||
if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
|
if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2114,7 +2162,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
// Don't go into unreachable blocks. They may contain instructions with
|
// Don't go into unreachable blocks. They may contain instructions with
|
||||||
// dependency cycles which confuse the final scheduling.
|
// dependency cycles which confuse the final scheduling.
|
||||||
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
|
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2134,7 +2182,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
|
||||||
if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) {
|
if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
|
||||||
newTreeEntry(VL, false, UserTreeIdx);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
VL = UniqueValues;
|
VL = UniqueValues;
|
||||||
|
@ -2146,12 +2194,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
|
|
||||||
BlockScheduling &BS = *BSRef.get();
|
BlockScheduling &BS = *BSRef.get();
|
||||||
|
|
||||||
if (!BS.tryScheduleBundle(VL, this, S)) {
|
Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
|
||||||
|
if (!Bundle) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
|
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
|
||||||
assert((!BS.getScheduleData(VL0) ||
|
assert((!BS.getScheduleData(VL0) ||
|
||||||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
|
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
|
||||||
"tryScheduleBundle should cancelScheduling on failure");
|
"tryScheduleBundle should cancelScheduling on failure");
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
|
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
|
||||||
|
@ -2172,23 +2222,29 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs()
|
||||||
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
|
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE =
|
||||||
|
newTreeEntry(VL, Bundle, UserTreeIdx, ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
|
||||||
|
|
||||||
|
// Keeps the reordered operands to avoid code duplication.
|
||||||
|
SmallVector<ValueList, 2> OperandsVec;
|
||||||
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
|
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
for (Value *j : VL)
|
for (Value *j : VL)
|
||||||
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
|
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
|
||||||
PH->getIncomingBlock(i)));
|
PH->getIncomingBlock(i)));
|
||||||
|
TE->setOperand(i, Operands);
|
||||||
buildTree_rec(Operands, Depth + 1, {TE, i});
|
OperandsVec.push_back(Operands);
|
||||||
}
|
}
|
||||||
|
for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
|
||||||
|
buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case Instruction::ExtractValue:
|
case Instruction::ExtractValue:
|
||||||
|
@ -2198,13 +2254,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (Reuse) {
|
if (Reuse) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
|
||||||
++NumOpsWantToKeepOriginalOrder;
|
++NumOpsWantToKeepOriginalOrder;
|
||||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
|
newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
ReuseShuffleIndicies);
|
ReuseShuffleIndicies);
|
||||||
// This is a special case, as it does not gather, but at the same time
|
// This is a special case, as it does not gather, but at the same time
|
||||||
// we are not extending buildTree_rec() towards the operands.
|
// we are not extending buildTree_rec() towards the operands.
|
||||||
ValueList Op0;
|
ValueList Op0;
|
||||||
Op0.assign(VL.size(), VL0->getOperand(0));
|
Op0.assign(VL.size(), VL0->getOperand(0));
|
||||||
VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
|
VectorizableTree.back()->setOperand(0, Op0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!CurrentOrder.empty()) {
|
if (!CurrentOrder.empty()) {
|
||||||
|
@ -2220,17 +2276,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
auto StoredCurrentOrderAndNum =
|
auto StoredCurrentOrderAndNum =
|
||||||
NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
||||||
++StoredCurrentOrderAndNum->getSecond();
|
++StoredCurrentOrderAndNum->getSecond();
|
||||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
|
newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies,
|
||||||
StoredCurrentOrderAndNum->getFirst());
|
StoredCurrentOrderAndNum->getFirst());
|
||||||
// This is a special case, as it does not gather, but at the same time
|
// This is a special case, as it does not gather, but at the same time
|
||||||
// we are not extending buildTree_rec() towards the operands.
|
// we are not extending buildTree_rec() towards the operands.
|
||||||
ValueList Op0;
|
ValueList Op0;
|
||||||
Op0.assign(VL.size(), VL0->getOperand(0));
|
Op0.assign(VL.size(), VL0->getOperand(0));
|
||||||
VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
|
VectorizableTree.back()->setOperand(0, Op0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
|
||||||
newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2246,7 +2304,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (DL->getTypeSizeInBits(ScalarTy) !=
|
if (DL->getTypeSizeInBits(ScalarTy) !=
|
||||||
DL->getTypeAllocSizeInBits(ScalarTy)) {
|
DL->getTypeAllocSizeInBits(ScalarTy)) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2259,7 +2318,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
auto *L = cast<LoadInst>(V);
|
auto *L = cast<LoadInst>(V);
|
||||||
if (!L->isSimple()) {
|
if (!L->isSimple()) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2289,15 +2349,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (CurrentOrder.empty()) {
|
if (CurrentOrder.empty()) {
|
||||||
// Original loads are consecutive and does not require reordering.
|
// Original loads are consecutive and does not require reordering.
|
||||||
++NumOpsWantToKeepOriginalOrder;
|
++NumOpsWantToKeepOriginalOrder;
|
||||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
ReuseShuffleIndicies);
|
ReuseShuffleIndicies);
|
||||||
|
TE->setOperandsInOrder();
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
|
||||||
} else {
|
} else {
|
||||||
// Need to reorder.
|
// Need to reorder.
|
||||||
auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
||||||
++I->getSecond();
|
++I->getSecond();
|
||||||
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
ReuseShuffleIndicies, I->getFirst());
|
ReuseShuffleIndicies, I->getFirst());
|
||||||
|
TE->setOperandsInOrder();
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -2306,7 +2368,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case Instruction::ZExt:
|
case Instruction::ZExt:
|
||||||
|
@ -2326,15 +2389,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
|
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
|
||||||
if (Ty != SrcTy || !isValidElementType(Ty)) {
|
if (Ty != SrcTy || !isValidElementType(Ty)) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs()
|
||||||
<< "SLP: Gathering casts with different src types.\n");
|
<< "SLP: Gathering casts with different src types.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
|
||||||
|
|
||||||
|
TE->setOperandsInOrder();
|
||||||
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
|
@ -2356,14 +2422,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
|
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
|
||||||
Cmp->getOperand(0)->getType() != ComparedTy) {
|
Cmp->getOperand(0)->getType() != ComparedTy) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs()
|
||||||
<< "SLP: Gathering cmp with different predicate.\n");
|
<< "SLP: Gathering cmp with different predicate.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
|
||||||
|
|
||||||
ValueList Left, Right;
|
ValueList Left, Right;
|
||||||
|
@ -2384,7 +2452,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
Right.push_back(RHS);
|
Right.push_back(RHS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TE->setOperand(0, Left);
|
||||||
|
TE->setOperand(1, Right);
|
||||||
buildTree_rec(Left, Depth + 1, {TE, 0});
|
buildTree_rec(Left, Depth + 1, {TE, 0});
|
||||||
buildTree_rec(Right, Depth + 1, {TE, 1});
|
buildTree_rec(Right, Depth + 1, {TE, 1});
|
||||||
return;
|
return;
|
||||||
|
@ -2409,7 +2478,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
case Instruction::And:
|
case Instruction::And:
|
||||||
case Instruction::Or:
|
case Instruction::Or:
|
||||||
case Instruction::Xor: {
|
case Instruction::Xor: {
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
|
||||||
|
|
||||||
// Sort operands of the instructions so that each side is more likely to
|
// Sort operands of the instructions so that each side is more likely to
|
||||||
|
@ -2417,11 +2487,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
|
||||||
ValueList Left, Right;
|
ValueList Left, Right;
|
||||||
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
|
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
|
||||||
|
TE->setOperand(0, Left);
|
||||||
|
TE->setOperand(1, Right);
|
||||||
buildTree_rec(Left, Depth + 1, {TE, 0});
|
buildTree_rec(Left, Depth + 1, {TE, 0});
|
||||||
buildTree_rec(Right, Depth + 1, {TE, 1});
|
buildTree_rec(Right, Depth + 1, {TE, 1});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TE->setOperandsInOrder();
|
||||||
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
|
@ -2438,7 +2511,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
|
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
|
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2452,7 +2526,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs()
|
||||||
<< "SLP: not-vectorizable GEP (different types).\n");
|
<< "SLP: not-vectorizable GEP (different types).\n");
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2464,13 +2539,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs()
|
||||||
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
|
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
|
||||||
|
TE->setOperandsInOrder();
|
||||||
for (unsigned i = 0, e = 2; i < e; ++i) {
|
for (unsigned i = 0, e = 2; i < e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
|
@ -2486,18 +2564,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
||||||
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
|
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
|
||||||
|
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
for (Value *j : VL)
|
for (Value *j : VL)
|
||||||
Operands.push_back(cast<Instruction>(j)->getOperand(0));
|
Operands.push_back(cast<Instruction>(j)->getOperand(0));
|
||||||
|
TE->setOperandsInOrder();
|
||||||
buildTree_rec(Operands, Depth + 1, {TE, 0});
|
buildTree_rec(Operands, Depth + 1, {TE, 0});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2509,7 +2589,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||||
if (!isTriviallyVectorizable(ID)) {
|
if (!isTriviallyVectorizable(ID)) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2525,7 +2606,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
|
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
|
||||||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
|
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
|
LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
|
||||||
<< "\n");
|
<< "\n");
|
||||||
return;
|
return;
|
||||||
|
@ -2537,7 +2619,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
Value *A1J = CI2->getArgOperand(j);
|
Value *A1J = CI2->getArgOperand(j);
|
||||||
if (ScalarArgs[j] != A1J) {
|
if (ScalarArgs[j] != A1J) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
||||||
<< " argument " << ScalarArgs[j] << "!=" << A1J
|
<< " argument " << ScalarArgs[j] << "!=" << A1J
|
||||||
<< "\n");
|
<< "\n");
|
||||||
|
@ -2551,14 +2634,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
CI->op_begin() + CI->getBundleOperandsEndIndex(),
|
CI->op_begin() + CI->getBundleOperandsEndIndex(),
|
||||||
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
|
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
|
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
|
||||||
<< *CI << "!=" << *VL[i] << '\n');
|
<< *CI << "!=" << *VL[i] << '\n');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
|
TE->setOperandsInOrder();
|
||||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
|
@ -2575,22 +2661,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
// then do not vectorize this instruction.
|
// then do not vectorize this instruction.
|
||||||
if (!S.isAltShuffle()) {
|
if (!S.isAltShuffle()) {
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
|
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
|
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
|
||||||
|
|
||||||
// Reorder operands if reordering would enable vectorization.
|
// Reorder operands if reordering would enable vectorization.
|
||||||
if (isa<BinaryOperator>(VL0)) {
|
if (isa<BinaryOperator>(VL0)) {
|
||||||
ValueList Left, Right;
|
ValueList Left, Right;
|
||||||
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
|
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
|
||||||
|
TE->setOperand(0, Left);
|
||||||
|
TE->setOperand(1, Right);
|
||||||
buildTree_rec(Left, Depth + 1, {TE, 0});
|
buildTree_rec(Left, Depth + 1, {TE, 0});
|
||||||
buildTree_rec(Right, Depth + 1, {TE, 1});
|
buildTree_rec(Right, Depth + 1, {TE, 1});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TE->setOperandsInOrder();
|
||||||
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
|
||||||
ValueList Operands;
|
ValueList Operands;
|
||||||
// Prepare the operand vector.
|
// Prepare the operand vector.
|
||||||
|
@ -2603,7 +2694,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
BS.cancelScheduling(VL, VL0);
|
BS.cancelScheduling(VL, VL0);
|
||||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||||
|
ReuseShuffleIndicies);
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
|
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -4245,11 +4337,11 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||||
|
|
||||||
// Groups the instructions to a bundle (which is then a single scheduling entity)
|
// Groups the instructions to a bundle (which is then a single scheduling entity)
|
||||||
// and schedules instructions until the bundle gets ready.
|
// and schedules instructions until the bundle gets ready.
|
||||||
bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
Optional<BoUpSLP::ScheduleData *>
|
||||||
BoUpSLP *SLP,
|
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
|
||||||
const InstructionsState &S) {
|
const InstructionsState &S) {
|
||||||
if (isa<PHINode>(S.OpValue))
|
if (isa<PHINode>(S.OpValue))
|
||||||
return true;
|
return nullptr;
|
||||||
|
|
||||||
// Initialize the instruction bundle.
|
// Initialize the instruction bundle.
|
||||||
Instruction *OldScheduleEnd = ScheduleEnd;
|
Instruction *OldScheduleEnd = ScheduleEnd;
|
||||||
|
@ -4262,7 +4354,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
||||||
// instructions of the bundle.
|
// instructions of the bundle.
|
||||||
for (Value *V : VL) {
|
for (Value *V : VL) {
|
||||||
if (!extendSchedulingRegion(V, S))
|
if (!extendSchedulingRegion(V, S))
|
||||||
return false;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Value *V : VL) {
|
for (Value *V : VL) {
|
||||||
|
@ -4330,9 +4422,9 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
||||||
}
|
}
|
||||||
if (!Bundle->isReady()) {
|
if (!Bundle->isReady()) {
|
||||||
cancelScheduling(VL, S.OpValue);
|
cancelScheduling(VL, S.OpValue);
|
||||||
return false;
|
return None;
|
||||||
}
|
}
|
||||||
return true;
|
return Bundle;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
|
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
|
||||||
|
|
Loading…
Reference in New Issue