From 18c2fbb143d7f65556cb65fa037492990a4e13b5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 1 May 2014 11:24:11 +0000 Subject: [PATCH] Revert r205965, which essentially reverts r205018 for the second time. =[ Turns out that this was the root cause of PR19621. We found a crasher only recently (likely due to improvements elsewhere in the SLP vectorizer) but the reduced test case failed all the way back to here. I've confirmed that reverting this patch both fixes the reduced test case in PR19621 and the actual source file that led to it, so it seems to really be rooted here. I've replied to the commit thread with discussion of my (feeble) attempts to debug this. Didn't make it very far, so reverting now that we have a good test case so that things can get back to healthy while the debugging carries on. llvm-svn: 207746 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 93 ++++++------------- .../X86/insert-element-build-vector.ll | 24 ----- 2 files changed, 29 insertions(+), 88 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 53feb959c80c..b49b1b0ff5ca 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -366,13 +366,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots, ignoring users for - /// the purpose of scheduling and extraction in the \p UserIgnoreLst. - void buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst = None); + /// Construct a vectorizable tree that starts at \p Roots and is possibly + /// used by a reduction of \p RdxOps. + void buildTree(ArrayRef Roots, ValueSet *RdxOps = 0); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { + RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -528,8 +528,8 @@ private: /// Numbers instructions in different blocks. DenseMap BlocksNumbers; - /// List of users to ignore during scheduling and that don't need extracting. - ArrayRef UserIgnoreList; + /// Reduction operators. + ValueSet *RdxOps; // Analysis and block reference. Function *F; @@ -543,10 +543,9 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst) { +void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { deleteTree(); - UserIgnoreList = UserIgnoreLst; + RdxOps = Rdx; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -578,9 +577,8 @@ void BoUpSLP::buildTree(ArrayRef Roots, if (!UserInst) continue; - // Ignore users in the user ignore list. - if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != - UserIgnoreList.end()) + // Ignore uses that are part of the reduction. + if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -711,9 +709,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { continue; } - // Ignore users in the user ignore list. - if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != - UserIgnoreList.end()) + // This user is part of the reduction. + if (RdxOps && RdxOps->count(UI)) continue; // Make sure that we can schedule this unknown user. @@ -1752,9 +1749,8 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace users in the ignorelist by undef. - (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != - UserIgnoreList.end())) && + // It is legal to replace the reduction users by undef. + (RdxOps && RdxOps->count(U))) && "Replacing out-of-tree value with undef"); } #endif @@ -1958,11 +1954,8 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector = None); + bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2135,8 +2128,7 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { if (VL.size() < 2) return false; @@ -2186,33 +2178,13 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << "\n"); ArrayRef Ops = VL.slice(i, OpsWidth); - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(i, OpsWidth); - - R.buildTree(Ops, BuildVectorSlice); + R.buildTree(Ops); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - Value *VectorizedRoot = R.vectorizeTree(); + R.vectorizeTree(); - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - Instruction *InsertAfter = cast(VectorizedRoot); - for (auto &V : BuildVectorSlice) { - InsertElementInst *IE = cast(V); - IRBuilder<> Builder(++BasicBlock::iterator(InsertAfter)); - Instruction *Extract = cast( - Builder.CreateExtractElement(VectorizedRoot, IE->getOperand(2))); - IE->setOperand(1, Extract); - IE->removeFromParent(); - IE->insertAfter(Extract); - InsertAfter = IE; - } - } // Move to the next bundle. i += VF - 1; Changed = true; @@ -2321,7 +2293,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallVector ReductionOps; + SmallPtrSet ReductionOps; SmallVector ReducedVals; BinaryOperator *ReductionRoot; @@ -2415,7 +2387,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.push_back(TreeN); + ReductionOps.insert(TreeN); } // Retract. Stack.pop_back(); @@ -2452,7 +2424,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, ReductionOps); + V.buildTree(ValsToReduce, &ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2571,16 +2543,13 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *FirstInsertElem, - SmallVectorImpl &BuildVector, - SmallVectorImpl &BuildVectorOpds) { - if (!isa(FirstInsertElem->getOperand(0))) +static bool findBuildVector(InsertElementInst *IE, + SmallVectorImpl &Ops) { + if (!isa(IE->getOperand(0))) return false; - InsertElementInst *IE = FirstInsertElem; while (true) { - BuildVector.push_back(IE); - BuildVectorOpds.push_back(IE->getOperand(1)); + Ops.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2751,16 +2720,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *FirstInsertElem = dyn_cast(it)) { - SmallVector BuildVector; - SmallVector BuildVectorOpds; - if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) + if (InsertElementInst *IE = dyn_cast(it)) { + SmallVector Ops; + if (!findBuildVector(IE, Ops)) continue; - // Vectorize starting with the build vector operands ignoring the - // BuildVector instructions for the purpose of scheduling and user - // extraction. - if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { + if (tryToVectorizeList(Ops, R)) { Changed = true; it = BB->begin(); e = BB->end(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 620292bdaf38..7537ea3b053c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -195,30 +195,6 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, ret <4 x float> %rb } -; Make sure that vectorization happens even if insertelements operations -; must be rescheduled. The case here is from compiling Julia. -define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @reschedule_extract( -; CHECK: %1 = fadd <4 x float> %a, %b - %a0 = extractelement <4 x float> %a, i32 0 - %b0 = extractelement <4 x float> %b, i32 0 - %c0 = fadd float %a0, %b0 - %v0 = insertelement <4 x float> undef, float %c0, i32 0 - %a1 = extractelement <4 x float> %a, i32 1 - %b1 = extractelement <4 x float> %b, i32 1 - %c1 = fadd float %a1, %b1 - %v1 = insertelement <4 x float> %v0, float %c1, i32 1 - %a2 = extractelement <4 x float> %a, i32 2 - %b2 = extractelement <4 x float> %b, i32 2 - %c2 = fadd float %a2, %b2 - %v2 = insertelement <4 x float> %v1, float %c2, i32 2 - %a3 = extractelement <4 x float> %a, i32 3 - %b3 = extractelement <4 x float> %b, i32 3 - %c3 = fadd float %a3, %b3 - %v3 = insertelement <4 x float> %v2, float %c3, i32 3 - ret <4 x float> %v3 -} - ; Check that cost model for vectorization takes credit for ; instructions that are erased. define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {