forked from OSchip/llvm-project
Revert r205965, which essentially reverts r205018 for the second time.
=[ Turns out that this was the root cause of PR19621. We found a crasher only recently (likely due to improvements elsewhere in the SLP vectorizer) but the reduced test case failed all the way back to here. I've confirmed that reverting this patch both fixes the reduced test case in PR19621 and the actual source file that led to it, so it seems to really be rooted here. I've replied to the commit thread with discussion of my (feeble) attempts to debug this. Didn't make it very far, so reverting now that we have a good test case so that things can get back to healthy while the debugging carries on. llvm-svn: 207746
This commit is contained in:
parent
4e480de0cc
commit
18c2fbb143
|
@ -366,13 +366,13 @@ public:
|
|||
/// A negative number means that this is profitable.
|
||||
int getTreeCost();
|
||||
|
||||
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
|
||||
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
|
||||
void buildTree(ArrayRef<Value *> Roots,
|
||||
ArrayRef<Value *> UserIgnoreLst = None);
|
||||
/// Construct a vectorizable tree that starts at \p Roots and is possibly
|
||||
/// used by a reduction of \p RdxOps.
|
||||
void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
|
||||
|
||||
/// Clear the internal data structures that are created by 'buildTree'.
|
||||
void deleteTree() {
|
||||
RdxOps = 0;
|
||||
VectorizableTree.clear();
|
||||
ScalarToTreeEntry.clear();
|
||||
MustGather.clear();
|
||||
|
@ -528,8 +528,8 @@ private:
|
|||
/// Numbers instructions in different blocks.
|
||||
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
|
||||
|
||||
/// List of users to ignore during scheduling and that don't need extracting.
|
||||
ArrayRef<Value *> UserIgnoreList;
|
||||
/// Reduction operators.
|
||||
ValueSet *RdxOps;
|
||||
|
||||
// Analysis and block reference.
|
||||
Function *F;
|
||||
|
@ -543,10 +543,9 @@ private:
|
|||
IRBuilder<> Builder;
|
||||
};
|
||||
|
||||
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
ArrayRef<Value *> UserIgnoreLst) {
|
||||
void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
|
||||
deleteTree();
|
||||
UserIgnoreList = UserIgnoreLst;
|
||||
RdxOps = Rdx;
|
||||
if (!getSameType(Roots))
|
||||
return;
|
||||
buildTree_rec(Roots, 0);
|
||||
|
@ -578,9 +577,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
|||
if (!UserInst)
|
||||
continue;
|
||||
|
||||
// Ignore users in the user ignore list.
|
||||
if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
|
||||
UserIgnoreList.end())
|
||||
// Ignore uses that are part of the reduction.
|
||||
if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
|
||||
|
@ -711,9 +709,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Ignore users in the user ignore list.
|
||||
if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) !=
|
||||
UserIgnoreList.end())
|
||||
// This user is part of the reduction.
|
||||
if (RdxOps && RdxOps->count(UI))
|
||||
continue;
|
||||
|
||||
// Make sure that we can schedule this unknown user.
|
||||
|
@ -1752,9 +1749,8 @@ Value *BoUpSLP::vectorizeTree() {
|
|||
DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
|
||||
|
||||
assert((ScalarToTreeEntry.count(U) ||
|
||||
// It is legal to replace users in the ignorelist by undef.
|
||||
(std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
|
||||
UserIgnoreList.end())) &&
|
||||
// It is legal to replace the reduction users by undef.
|
||||
(RdxOps && RdxOps->count(U))) &&
|
||||
"Replacing out-of-tree value with undef");
|
||||
}
|
||||
#endif
|
||||
|
@ -1958,11 +1954,8 @@ private:
|
|||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a list of operands.
|
||||
/// \@param BuildVector A list of users to ignore for the purpose of
|
||||
/// scheduling and that don't need extracting.
|
||||
/// \returns true if a value was vectorized.
|
||||
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
ArrayRef<Value *> BuildVector = None);
|
||||
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a chain that may start at the operands of \V;
|
||||
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
|
||||
|
@ -2135,8 +2128,7 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
|||
return tryToVectorizeList(VL, R);
|
||||
}
|
||||
|
||||
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
ArrayRef<Value *> BuildVector) {
|
||||
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
|
||||
if (VL.size() < 2)
|
||||
return false;
|
||||
|
||||
|
@ -2186,33 +2178,13 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
|||
<< "\n");
|
||||
ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
|
||||
|
||||
ArrayRef<Value *> BuildVectorSlice;
|
||||
if (!BuildVector.empty())
|
||||
BuildVectorSlice = BuildVector.slice(i, OpsWidth);
|
||||
|
||||
R.buildTree(Ops, BuildVectorSlice);
|
||||
R.buildTree(Ops);
|
||||
int Cost = R.getTreeCost();
|
||||
|
||||
if (Cost < -SLPCostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
|
||||
Value *VectorizedRoot = R.vectorizeTree();
|
||||
R.vectorizeTree();
|
||||
|
||||
// Reconstruct the build vector by extracting the vectorized root. This
|
||||
// way we handle the case where some elements of the vector are undefined.
|
||||
// (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
|
||||
if (!BuildVectorSlice.empty()) {
|
||||
Instruction *InsertAfter = cast<Instruction>(VectorizedRoot);
|
||||
for (auto &V : BuildVectorSlice) {
|
||||
InsertElementInst *IE = cast<InsertElementInst>(V);
|
||||
IRBuilder<> Builder(++BasicBlock::iterator(InsertAfter));
|
||||
Instruction *Extract = cast<Instruction>(
|
||||
Builder.CreateExtractElement(VectorizedRoot, IE->getOperand(2)));
|
||||
IE->setOperand(1, Extract);
|
||||
IE->removeFromParent();
|
||||
IE->insertAfter(Extract);
|
||||
InsertAfter = IE;
|
||||
}
|
||||
}
|
||||
// Move to the next bundle.
|
||||
i += VF - 1;
|
||||
Changed = true;
|
||||
|
@ -2321,7 +2293,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
|
|||
/// *p =
|
||||
///
|
||||
class HorizontalReduction {
|
||||
SmallVector<Value *, 16> ReductionOps;
|
||||
SmallPtrSet<Value *, 16> ReductionOps;
|
||||
SmallVector<Value *, 32> ReducedVals;
|
||||
|
||||
BinaryOperator *ReductionRoot;
|
||||
|
@ -2415,7 +2387,7 @@ public:
|
|||
// We need to be able to reassociate the adds.
|
||||
if (!TreeN->isAssociative())
|
||||
return false;
|
||||
ReductionOps.push_back(TreeN);
|
||||
ReductionOps.insert(TreeN);
|
||||
}
|
||||
// Retract.
|
||||
Stack.pop_back();
|
||||
|
@ -2452,7 +2424,7 @@ public:
|
|||
|
||||
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
|
||||
ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
|
||||
V.buildTree(ValsToReduce, ReductionOps);
|
||||
V.buildTree(ValsToReduce, &ReductionOps);
|
||||
|
||||
// Estimate cost.
|
||||
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
|
||||
|
@ -2571,16 +2543,13 @@ private:
|
|||
///
|
||||
/// Returns true if it matches
|
||||
///
|
||||
static bool findBuildVector(InsertElementInst *FirstInsertElem,
|
||||
SmallVectorImpl<Value *> &BuildVector,
|
||||
SmallVectorImpl<Value *> &BuildVectorOpds) {
|
||||
if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
|
||||
static bool findBuildVector(InsertElementInst *IE,
|
||||
SmallVectorImpl<Value *> &Ops) {
|
||||
if (!isa<UndefValue>(IE->getOperand(0)))
|
||||
return false;
|
||||
|
||||
InsertElementInst *IE = FirstInsertElem;
|
||||
while (true) {
|
||||
BuildVector.push_back(IE);
|
||||
BuildVectorOpds.push_back(IE->getOperand(1));
|
||||
Ops.push_back(IE->getOperand(1));
|
||||
|
||||
if (IE->use_empty())
|
||||
return false;
|
||||
|
@ -2751,16 +2720,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
}
|
||||
|
||||
// Try to vectorize trees that start at insertelement instructions.
|
||||
if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
|
||||
if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
|
||||
SmallVector<Value *, 8> Ops;
|
||||
if (!findBuildVector(IE, Ops))
|
||||
continue;
|
||||
|
||||
// Vectorize starting with the build vector operands ignoring the
|
||||
// BuildVector instructions for the purpose of scheduling and user
|
||||
// extraction.
|
||||
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
|
||||
if (tryToVectorizeList(Ops, R)) {
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
|
|
|
@ -195,30 +195,6 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
|
|||
ret <4 x float> %rb
|
||||
}
|
||||
|
||||
; Make sure that vectorization happens even if insertelements operations
|
||||
; must be rescheduled. The case here is from compiling Julia.
|
||||
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @reschedule_extract(
|
||||
; CHECK: %1 = fadd <4 x float> %a, %b
|
||||
%a0 = extractelement <4 x float> %a, i32 0
|
||||
%b0 = extractelement <4 x float> %b, i32 0
|
||||
%c0 = fadd float %a0, %b0
|
||||
%v0 = insertelement <4 x float> undef, float %c0, i32 0
|
||||
%a1 = extractelement <4 x float> %a, i32 1
|
||||
%b1 = extractelement <4 x float> %b, i32 1
|
||||
%c1 = fadd float %a1, %b1
|
||||
%v1 = insertelement <4 x float> %v0, float %c1, i32 1
|
||||
%a2 = extractelement <4 x float> %a, i32 2
|
||||
%b2 = extractelement <4 x float> %b, i32 2
|
||||
%c2 = fadd float %a2, %b2
|
||||
%v2 = insertelement <4 x float> %v1, float %c2, i32 2
|
||||
%a3 = extractelement <4 x float> %a, i32 3
|
||||
%b3 = extractelement <4 x float> %b, i32 3
|
||||
%c3 = fadd float %a3, %b3
|
||||
%v3 = insertelement <4 x float> %v2, float %c3, i32 3
|
||||
ret <4 x float> %v3
|
||||
}
|
||||
|
||||
; Check that cost model for vectorization takes credit for
|
||||
; instructions that are erased.
|
||||
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
|
||||
|
|
Loading…
Reference in New Issue