forked from OSchip/llvm-project
Prevent construction of cycle in DAG store merge
When merging stores in DAGCombiner, add check to ensure that no dependenices exist that would cause the construction of a cycle in our DAG. This may happen if one store has a data dependence on another instruction (e.g. a load) which itself has a (chain) dependence on another store being merged. These stores cannot be merged safely and doing so results in a cycle that is discovered in LegalizeDAG. This test is only done in cases where Antialias analysis is used (UseAA) as non-AA store merge candidates will be merged logically after all loads which have been checked to not alias. Reviewers: ahatanak, spatel, niravd, arsenm, hfinkel, tstellarAMD, jyknight Subscribers: llvm-commits, tberghammer, danalbert, srhines Differential Revision: http://reviews.llvm.org/D18336 llvm-svn: 264461
This commit is contained in:
parent
7dcae2e14a
commit
fa250cad37
|
@ -625,18 +625,32 @@ public:
|
||||||
/// NOTE: This is an expensive method. Use it carefully.
|
/// NOTE: This is an expensive method. Use it carefully.
|
||||||
bool hasPredecessor(const SDNode *N) const;
|
bool hasPredecessor(const SDNode *N) const;
|
||||||
|
|
||||||
/// Return true if N is a predecessor of this node.
|
/// Returns true if N is a predecessor of any node in Worklist. This
|
||||||
/// N is either an operand of this node, or can be reached by recursively
|
/// helper keeps Visited and Worklist sets externally to allow unions
|
||||||
/// traversing up the operands.
|
/// searches to be performed in parallel, caching of results across
|
||||||
/// In this helper the Visited and worklist sets are held externally to
|
/// queries and incremental addition to Worklist. Stops early if N is
|
||||||
/// cache predecessors over multiple invocations. If you want to test for
|
/// found but will resume. Remember to clear Visited and Worklists
|
||||||
/// multiple predecessors this method is preferable to multiple calls to
|
/// if DAG changes.
|
||||||
/// hasPredecessor. Be sure to clear Visited and Worklist if the DAG
|
static bool hasPredecessorHelper(const SDNode *N,
|
||||||
/// changes.
|
SmallPtrSetImpl<const SDNode *> &Visited,
|
||||||
/// NOTE: This is still very expensive. Use carefully.
|
SmallVectorImpl<const SDNode *> &Worklist) {
|
||||||
bool hasPredecessorHelper(const SDNode *N,
|
if (Visited.count(N))
|
||||||
SmallPtrSetImpl<const SDNode *> &Visited,
|
return true;
|
||||||
SmallVectorImpl<const SDNode *> &Worklist) const;
|
while (!Worklist.empty()) {
|
||||||
|
const SDNode *M = Worklist.pop_back_val();
|
||||||
|
bool Found = false;
|
||||||
|
for (const SDValue &OpV : M->op_values()) {
|
||||||
|
SDNode *Op = OpV.getNode();
|
||||||
|
if (Visited.insert(Op).second)
|
||||||
|
Worklist.push_back(Op);
|
||||||
|
if (Op == N)
|
||||||
|
Found = true;
|
||||||
|
}
|
||||||
|
if (Found)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the number of values used by this operation.
|
/// Return the number of values used by this operation.
|
||||||
unsigned getNumOperands() const { return NumOperands; }
|
unsigned getNumOperands() const { return NumOperands; }
|
||||||
|
|
|
@ -448,6 +448,12 @@ namespace {
|
||||||
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
|
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
|
||||||
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
|
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
|
||||||
|
|
||||||
|
/// Helper function for MergeConsecutiveStores. Checks if
|
||||||
|
/// Candidate stores have indirect dependency through their
|
||||||
|
/// operands. \return True if safe to merge
|
||||||
|
bool checkMergeStoreCandidatesForDependencies(
|
||||||
|
SmallVectorImpl<MemOpLink> &StoreNodes);
|
||||||
|
|
||||||
/// Merge consecutive store operations into a wide store.
|
/// Merge consecutive store operations into a wide store.
|
||||||
/// This optimization uses wide integers or vectors when possible.
|
/// This optimization uses wide integers or vectors when possible.
|
||||||
/// \return True if some memory operations were changed.
|
/// \return True if some memory operations were changed.
|
||||||
|
@ -9636,6 +9642,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
|
||||||
// Caches for hasPredecessorHelper.
|
// Caches for hasPredecessorHelper.
|
||||||
SmallPtrSet<const SDNode *, 32> Visited;
|
SmallPtrSet<const SDNode *, 32> Visited;
|
||||||
SmallVector<const SDNode *, 16> Worklist;
|
SmallVector<const SDNode *, 16> Worklist;
|
||||||
|
Worklist.push_back(N);
|
||||||
|
|
||||||
// If the offset is a constant, there may be other adds of constants that
|
// If the offset is a constant, there may be other adds of constants that
|
||||||
// can be folded with this one. We should do this to avoid having to keep
|
// can be folded with this one. We should do this to avoid having to keep
|
||||||
|
@ -9651,7 +9658,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
|
||||||
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
|
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (N->hasPredecessorHelper(Use.getUser(), Visited, Worklist))
|
if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (Use.getUser()->getOpcode() != ISD::ADD &&
|
if (Use.getUser()->getOpcode() != ISD::ADD &&
|
||||||
|
@ -9684,7 +9691,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
|
||||||
for (SDNode *Use : Ptr.getNode()->uses()) {
|
for (SDNode *Use : Ptr.getNode()->uses()) {
|
||||||
if (Use == N)
|
if (Use == N)
|
||||||
continue;
|
continue;
|
||||||
if (N->hasPredecessorHelper(Use, Visited, Worklist))
|
if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If Ptr may be folded in addressing mode of other use, then it's
|
// If Ptr may be folded in addressing mode of other use, then it's
|
||||||
|
@ -11365,6 +11372,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We need to check that merging these stores does not cause a loop
|
||||||
|
// in the DAG. Any store candidate may depend on another candidate
|
||||||
|
// indirectly through its operand (we already consider dependencies
|
||||||
|
// through the chain). Check in parallel by searching up from
|
||||||
|
// non-chain operands of candidates.
|
||||||
|
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
|
||||||
|
SmallVectorImpl<MemOpLink> &StoreNodes) {
|
||||||
|
SmallPtrSet<const SDNode *, 16> Visited;
|
||||||
|
SmallVector<const SDNode *, 8> Worklist;
|
||||||
|
// search ops of store candidates
|
||||||
|
for (unsigned i = 0; i < StoreNodes.size(); ++i) {
|
||||||
|
SDNode *n = StoreNodes[i].MemNode;
|
||||||
|
// Potential loops may happen only through non-chain operands
|
||||||
|
for (unsigned j = 1; j < n->getNumOperands(); ++j)
|
||||||
|
Worklist.push_back(n->getOperand(j).getNode());
|
||||||
|
}
|
||||||
|
// search through DAG. We can stop early if we find a storenode
|
||||||
|
for (unsigned i = 0; i < StoreNodes.size(); ++i) {
|
||||||
|
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (OptLevel == CodeGenOpt::None)
|
if (OptLevel == CodeGenOpt::None)
|
||||||
return false;
|
return false;
|
||||||
|
@ -11418,6 +11449,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (StoreNodes.size() < 2)
|
if (StoreNodes.size() < 2)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// only do dep endence check in AA case
|
||||||
|
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
|
||||||
|
: DAG.getSubtarget().useAA();
|
||||||
|
if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
|
||||||
|
return false;
|
||||||
|
|
||||||
// Sort the memory operands according to their distance from the
|
// Sort the memory operands according to their distance from the
|
||||||
// base pointer. As a secondary criteria: make sure stores coming
|
// base pointer. As a secondary criteria: make sure stores coming
|
||||||
// later in the code come first in the list. This is important for
|
// later in the code come first in the list. This is important for
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
#include "llvm/ADT/SetVector.h"
|
#include "llvm/ADT/SetVector.h"
|
||||||
#include "llvm/ADT/SmallPtrSet.h"
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
#include "llvm/ADT/SmallSet.h"
|
#include "llvm/ADT/SmallSet.h"
|
||||||
|
@ -20,6 +19,8 @@
|
||||||
#include "llvm/CodeGen/Analysis.h"
|
#include "llvm/CodeGen/Analysis.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
||||||
|
#include "llvm/CodeGen/SelectionDAG.h"
|
||||||
|
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||||
#include "llvm/IR/CallingConv.h"
|
#include "llvm/IR/CallingConv.h"
|
||||||
#include "llvm/IR/Constants.h"
|
#include "llvm/IR/Constants.h"
|
||||||
#include "llvm/IR/DataLayout.h"
|
#include "llvm/IR/DataLayout.h"
|
||||||
|
@ -1471,7 +1472,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
|
||||||
// Caches for hasPredecessorHelper
|
// Caches for hasPredecessorHelper
|
||||||
SmallPtrSet<const SDNode *, 32> Visited;
|
SmallPtrSet<const SDNode *, 32> Visited;
|
||||||
SmallVector<const SDNode *, 16> Worklist;
|
SmallVector<const SDNode *, 16> Worklist;
|
||||||
|
Worklist.push_back(Idx.getNode());
|
||||||
SDValue StackPtr, Ch;
|
SDValue StackPtr, Ch;
|
||||||
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
|
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
|
||||||
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
|
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
|
||||||
|
@ -1489,7 +1490,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
|
||||||
// If the index is dependent on the store we will introduce a cycle when
|
// If the index is dependent on the store we will introduce a cycle when
|
||||||
// creating the load (the load uses the index, and by replacing the chain
|
// creating the load (the load uses the index, and by replacing the chain
|
||||||
// we will make the index dependent on the load).
|
// we will make the index dependent on the load).
|
||||||
if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
|
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
StackPtr = ST->getBasePtr();
|
StackPtr = ST->getBasePtr();
|
||||||
|
|
|
@ -6866,47 +6866,13 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// hasPredecessor - Return true if N is a predecessor of this node.
|
|
||||||
/// N is either an operand of this node, or can be reached by recursively
|
|
||||||
/// traversing up the operands.
|
|
||||||
/// NOTE: This is an expensive method. Use it carefully.
|
|
||||||
bool SDNode::hasPredecessor(const SDNode *N) const {
|
bool SDNode::hasPredecessor(const SDNode *N) const {
|
||||||
SmallPtrSet<const SDNode *, 32> Visited;
|
SmallPtrSet<const SDNode *, 32> Visited;
|
||||||
SmallVector<const SDNode *, 16> Worklist;
|
SmallVector<const SDNode *, 16> Worklist;
|
||||||
|
Worklist.push_back(this);
|
||||||
return hasPredecessorHelper(N, Visited, Worklist);
|
return hasPredecessorHelper(N, Visited, Worklist);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
SDNode::hasPredecessorHelper(const SDNode *N,
|
|
||||||
SmallPtrSetImpl<const SDNode *> &Visited,
|
|
||||||
SmallVectorImpl<const SDNode *> &Worklist) const {
|
|
||||||
if (Visited.empty()) {
|
|
||||||
Worklist.push_back(this);
|
|
||||||
} else {
|
|
||||||
// Take a look in the visited set. If we've already encountered this node
|
|
||||||
// we needn't search further.
|
|
||||||
if (Visited.count(N))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Haven't visited N yet. Continue the search.
|
|
||||||
while (!Worklist.empty()) {
|
|
||||||
const SDNode *M = Worklist.pop_back_val();
|
|
||||||
bool Found = false;
|
|
||||||
for (const SDValue &OpV : M->op_values()) {
|
|
||||||
SDNode *Op = OpV.getNode();
|
|
||||||
if (Visited.insert(Op).second)
|
|
||||||
Worklist.push_back(Op);
|
|
||||||
if (Op == N)
|
|
||||||
Found = true;
|
|
||||||
}
|
|
||||||
if (Found)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
|
uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
|
||||||
assert(Num < NumOperands && "Invalid child # of SDNode!");
|
assert(Num < NumOperands && "Invalid child # of SDNode!");
|
||||||
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
|
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s
|
||||||
|
; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s
|
||||||
|
|
||||||
|
; This test checks that we do not merge stores together which have
|
||||||
|
; dependencies through their non-chain operands (e.g. one store is the
|
||||||
|
; chain ancestor of a load whose value is used in as the data for the
|
||||||
|
; other store). Merging in such cases creates a loop in the DAG.
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "aarch64--linux-android"
|
||||||
|
|
||||||
|
%"class.std::__1::complex.0.20.56.60.64.72.76.88.92.112.140.248" = type { float, float }
|
||||||
|
|
||||||
|
; Function Attrs: noinline norecurse nounwind ssp uwtable
|
||||||
|
define void @fn(<2 x i64>* %argA, <2 x i64>* %argB, i64* %a) #0 align 2 {
|
||||||
|
%_p_vec_full = load <2 x i64>, <2 x i64>* %argA, align 4, !alias.scope !1, !noalias !3
|
||||||
|
%x = extractelement <2 x i64> %_p_vec_full, i32 1
|
||||||
|
store i64 %x, i64* %a, align 8, !alias.scope !4, !noalias !9
|
||||||
|
%_p_vec_full155 = load <2 x i64>, <2 x i64>* %argB, align 4, !alias.scope !1, !noalias !3
|
||||||
|
%y = extractelement <2 x i64> %_p_vec_full155, i32 0
|
||||||
|
%scevgep41 = getelementptr i64, i64* %a, i64 -1
|
||||||
|
store i64 %y, i64* %scevgep41, align 8, !alias.scope !4, !noalias !9
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
attributes #0 = { noinline norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
|
||||||
|
!llvm.ident = !{!0}
|
||||||
|
|
||||||
|
!0 = !{!"Snapdragon LLVM ARM Compiler 3.8.0 (based on LLVM 3.8.0)"}
|
||||||
|
!1 = distinct !{!1, !2, !"polly.alias.scope.rhs"}
|
||||||
|
!2 = distinct !{!2, !"polly.alias.scope.domain"}
|
||||||
|
!3 = !{!4, !5, !6, !7, !8}
|
||||||
|
!4 = distinct !{!4, !2, !"polly.alias.scope.blockB"}
|
||||||
|
!5 = distinct !{!5, !2, !"polly.alias.scope.add28.lcssa.reg2mem"}
|
||||||
|
!6 = distinct !{!6, !2, !"polly.alias.scope.count.0.lcssa.reg2mem"}
|
||||||
|
!7 = distinct !{!7, !2, !"polly.alias.scope.mul"}
|
||||||
|
!8 = distinct !{!8, !2, !"polly.alias.scope.add28.us.lcssa.reg2mem"}
|
||||||
|
!9 = !{!1, !5, !6, !7, !8}
|
Loading…
Reference in New Issue