From fa250cad3715a5df114cc5916e3b6f70560f9116 Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Fri, 25 Mar 2016 21:06:30 +0000 Subject: [PATCH] Prevent construction of cycle in DAG store merge When merging stores in DAGCombiner, add check to ensure that no dependenices exist that would cause the construction of a cycle in our DAG. This may happen if one store has a data dependence on another instruction (e.g. a load) which itself has a (chain) dependence on another store being merged. These stores cannot be merged safely and doing so results in a cycle that is discovered in LegalizeDAG. This test is only done in cases where Antialias analysis is used (UseAA) as non-AA store merge candidates will be merged logically after all loads which have been checked to not alias. Reviewers: ahatanak, spatel, niravd, arsenm, hfinkel, tstellarAMD, jyknight Subscribers: llvm-commits, tberghammer, danalbert, srhines Differential Revision: http://reviews.llvm.org/D18336 llvm-svn: 264461 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 38 +++++++++++------ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 ++++++++++++++++++- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 7 ++-- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 +--------------- .../CodeGen/AArch64/vector_merge_dep_check.ll | 41 +++++++++++++++++++ 5 files changed, 111 insertions(+), 52 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 3eede2240c06..a352aae2a7d4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -625,18 +625,32 @@ public: /// NOTE: This is an expensive method. Use it carefully. bool hasPredecessor(const SDNode *N) const; - /// Return true if N is a predecessor of this node. - /// N is either an operand of this node, or can be reached by recursively - /// traversing up the operands. - /// In this helper the Visited and worklist sets are held externally to - /// cache predecessors over multiple invocations. If you want to test for - /// multiple predecessors this method is preferable to multiple calls to - /// hasPredecessor. Be sure to clear Visited and Worklist if the DAG - /// changes. - /// NOTE: This is still very expensive. Use carefully. - bool hasPredecessorHelper(const SDNode *N, - SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist) const; + /// Returns true if N is a predecessor of any node in Worklist. This + /// helper keeps Visited and Worklist sets externally to allow unions + /// searches to be performed in parallel, caching of results across + /// queries and incremental addition to Worklist. Stops early if N is + /// found but will resume. Remember to clear Visited and Worklists + /// if DAG changes. + static bool hasPredecessorHelper(const SDNode *N, + SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist) { + if (Visited.count(N)) + return true; + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + bool Found = false; + for (const SDValue &OpV : M->op_values()) { + SDNode *Op = OpV.getNode(); + if (Visited.insert(Op).second) + Worklist.push_back(Op); + if (Op == N) + Found = true; + } + if (Found) + return true; + } + return false; + } /// Return the number of values used by this operation. unsigned getNumOperands() const { return NumOperands; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0aecaa440053..09c7971c97db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -448,6 +448,12 @@ namespace { StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes); + /// Helper function for MergeConsecutiveStores. Checks if + /// Candidate stores have indirect dependency through their + /// operands. \return True if safe to merge + bool checkMergeStoreCandidatesForDependencies( + SmallVectorImpl &StoreNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -9636,6 +9642,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Caches for hasPredecessorHelper. SmallPtrSet Visited; SmallVector Worklist; + Worklist.push_back(N); // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep @@ -9651,7 +9658,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (N->hasPredecessorHelper(Use.getUser(), Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && @@ -9684,7 +9691,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; - if (N->hasPredecessorHelper(Use, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's @@ -11365,6 +11372,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( } } +// We need to check that merging these stores does not cause a loop +// in the DAG. Any store candidate may depend on another candidate +// indirectly through its operand (we already consider dependencies +// through the chain). Check in parallel by searching up from +// non-chain operands of candidates. +bool DAGCombiner::checkMergeStoreCandidatesForDependencies( + SmallVectorImpl &StoreNodes) { + SmallPtrSet Visited; + SmallVector Worklist; + // search ops of store candidates + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + SDNode *n = StoreNodes[i].MemNode; + // Potential loops may happen only through non-chain operands + for (unsigned j = 1; j < n->getNumOperands(); ++j) + Worklist.push_back(n->getOperand(j).getNode()); + } + // search through DAG. We can stop early if we find a storenode + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) + return false; + } + return true; +} + bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (OptLevel == CodeGenOpt::None) return false; @@ -11418,6 +11449,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (StoreNodes.size() < 2) return false; + // only do dep endence check in AA case + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + return false; + // Sort the memory operands according to their distance from the // base pointer. As a secondary criteria: make sure stores coming // later in the code come first in the list. This is important for diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c24999b06dd3..9357581ed6a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -20,6 +19,8 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -1471,7 +1472,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Caches for hasPredecessorHelper SmallPtrSet Visited; SmallVector Worklist; - + Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1489,7 +1490,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). - if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(ST, Visited, Worklist)) continue; StackPtr = ST->getBasePtr(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2daae135045f..ebf49b985a3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6866,47 +6866,13 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// hasPredecessor - Return true if N is a predecessor of this node. -/// N is either an operand of this node, or can be reached by recursively -/// traversing up the operands. -/// NOTE: This is an expensive method. Use it carefully. bool SDNode::hasPredecessor(const SDNode *N) const { SmallPtrSet Visited; SmallVector Worklist; + Worklist.push_back(this); return hasPredecessorHelper(N, Visited, Worklist); } -bool -SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist) const { - if (Visited.empty()) { - Worklist.push_back(this); - } else { - // Take a look in the visited set. If we've already encountered this node - // we needn't search further. - if (Visited.count(N)) - return true; - } - - // Haven't visited N yet. Continue the search. - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - bool Found = false; - for (const SDValue &OpV : M->op_values()) { - SDNode *Op = OpV.getNode(); - if (Visited.insert(Op).second) - Worklist.push_back(Op); - if (Op == N) - Found = true; - } - if (Found) - return true; - } - - return false; -} - uint64_t SDNode::getConstantOperandVal(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return cast(OperandList[Num])->getZExtValue(); diff --git a/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll new file mode 100644 index 000000000000..9220947e8362 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll @@ -0,0 +1,41 @@ +; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s +; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s + +; This test checks that we do not merge stores together which have +; dependencies through their non-chain operands (e.g. one store is the +; chain ancestor of a load whose value is used in as the data for the +; other store). Merging in such cases creates a loop in the DAG. + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +%"class.std::__1::complex.0.20.56.60.64.72.76.88.92.112.140.248" = type { float, float } + +; Function Attrs: noinline norecurse nounwind ssp uwtable +define void @fn(<2 x i64>* %argA, <2 x i64>* %argB, i64* %a) #0 align 2 { + %_p_vec_full = load <2 x i64>, <2 x i64>* %argA, align 4, !alias.scope !1, !noalias !3 + %x = extractelement <2 x i64> %_p_vec_full, i32 1 + store i64 %x, i64* %a, align 8, !alias.scope !4, !noalias !9 + %_p_vec_full155 = load <2 x i64>, <2 x i64>* %argB, align 4, !alias.scope !1, !noalias !3 + %y = extractelement <2 x i64> %_p_vec_full155, i32 0 + %scevgep41 = getelementptr i64, i64* %a, i64 -1 + store i64 %y, i64* %scevgep41, align 8, !alias.scope !4, !noalias !9 + ret void +} + +; CHECK: ret + +attributes #0 = { noinline norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"Snapdragon LLVM ARM Compiler 3.8.0 (based on LLVM 3.8.0)"} +!1 = distinct !{!1, !2, !"polly.alias.scope.rhs"} +!2 = distinct !{!2, !"polly.alias.scope.domain"} +!3 = !{!4, !5, !6, !7, !8} +!4 = distinct !{!4, !2, !"polly.alias.scope.blockB"} +!5 = distinct !{!5, !2, !"polly.alias.scope.add28.lcssa.reg2mem"} +!6 = distinct !{!6, !2, !"polly.alias.scope.count.0.lcssa.reg2mem"} +!7 = distinct !{!7, !2, !"polly.alias.scope.mul"} +!8 = distinct !{!8, !2, !"polly.alias.scope.add28.us.lcssa.reg2mem"} +!9 = !{!1, !5, !6, !7, !8}