forked from OSchip/llvm-project
[SelectionDAG] CSE nodes with differing SDNodeFlags
In the optimizer (GVN etc.) when eliminating redundant nodes with different flags, the flags are ignored for the purposes of testing for congruence, and then intersected for the purposes of producing a result that supports the union of all the uses. This commit makes SelectionDAG's CSE do the same thing, allowing it to CSE nodes in more cases. This fixes PR26063. Differential Revision: http://reviews.llvm.org/D15957 llvm-svn: 257940
This commit is contained in:
parent
5a9259caa9
commit
4e9b2a60ab
|
@ -369,6 +369,18 @@ public:
|
|||
(UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
|
||||
(NoSignedZeros << 6) | (AllowReciprocal << 7);
|
||||
}
|
||||
|
||||
/// Clear any flags in this flag set that aren't also set in Flags.
|
||||
void intersectWith(const SDNodeFlags *Flags) {
|
||||
NoUnsignedWrap &= Flags->NoUnsignedWrap;
|
||||
NoSignedWrap &= Flags->NoSignedWrap;
|
||||
Exact &= Flags->Exact;
|
||||
UnsafeAlgebra &= Flags->UnsafeAlgebra;
|
||||
NoNaNs &= Flags->NoNaNs;
|
||||
NoInfs &= Flags->NoInfs;
|
||||
NoSignedZeros &= Flags->NoSignedZeros;
|
||||
AllowReciprocal &= Flags->AllowReciprocal;
|
||||
}
|
||||
};
|
||||
|
||||
/// Represents one node in the SelectionDAG.
|
||||
|
@ -682,6 +694,9 @@ public:
|
|||
/// and directly, but it is not to avoid creating a vtable for this class.
|
||||
const SDNodeFlags *getFlags() const;
|
||||
|
||||
/// Clear any flags in this node that aren't also set in Flags.
|
||||
void intersectFlagsWith(const SDNodeFlags *Flags);
|
||||
|
||||
/// Return the number of values defined/returned by this operator.
|
||||
unsigned getNumValues() const { return NumValues; }
|
||||
|
||||
|
|
|
@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
|
|||
}
|
||||
}
|
||||
|
||||
/// Add logical or fast math flag values to FoldingSetNodeID value.
|
||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
|
||||
const SDNodeFlags *Flags) {
|
||||
if (!isBinOpWithFlags(Opcode))
|
||||
return;
|
||||
|
||||
unsigned RawFlags = 0;
|
||||
if (Flags)
|
||||
RawFlags = Flags->getRawFlags();
|
||||
ID.AddInteger(RawFlags);
|
||||
}
|
||||
|
||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
|
||||
AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
|
||||
}
|
||||
|
||||
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
|
||||
SDVTList VTList, ArrayRef<SDValue> OpList) {
|
||||
AddNodeIDOpcode(ID, OpC);
|
||||
|
@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
|
|||
}
|
||||
} // end switch (N->getOpcode())
|
||||
|
||||
AddNodeIDFlags(ID, N);
|
||||
|
||||
// Target specific memory nodes could also have address spaces to check.
|
||||
if (N->isTargetMemoryOpcode())
|
||||
ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
|
||||
|
@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
|
|||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
|
@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
|
|||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
|
@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
|
|||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
|
@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
|
|||
SDValue Ops[] = {N1, N2};
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, Opcode, VTs, Ops);
|
||||
AddNodeIDFlags(ID, Opcode, Flags);
|
||||
void *IP = nullptr;
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
|
||||
if (Flags)
|
||||
E->intersectFlagsWith(Flags);
|
||||
return SDValue(E, 0);
|
||||
}
|
||||
|
||||
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
|
||||
|
||||
|
@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
|
|||
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, Opcode, VTList, Ops);
|
||||
AddNodeIDFlags(ID, Opcode, Flags);
|
||||
void *IP = nullptr;
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) {
|
||||
if (Flags)
|
||||
E->intersectFlagsWith(Flags);
|
||||
return E;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
|
||||
if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
|
||||
FlagsNode->Flags.intersectWith(Flags);
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
|
||||
assert(N->getNumValues() == 1 &&
|
||||
"Can't unroll a vector with multiple results!");
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: llc -asm-verbose=false < %s | FileCheck %s
|
||||
; PR26063
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
; CHECK: .LBB0_1:
|
||||
; CHECK-NEXT: bl f{{$}}
|
||||
; CHECK-NEXT: ldrb r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}}
|
||||
; CHECK-NEXT: cmp r{{[0-9]+}}, #1{{$}}
|
||||
; CHECK-NEXT: cmpne r[[T0]], #0{{$}}
|
||||
; CHECK-NEXT: bne .LBB0_1{{$}}
|
||||
define i8* @h(i8* readonly %a, i32 %b, i32 %c) {
|
||||
entry:
|
||||
%0 = load i8, i8* %a, align 1
|
||||
%tobool4 = icmp ne i8 %0, 0
|
||||
%cmp5 = icmp ne i32 %b, 1
|
||||
%1 = and i1 %cmp5, %tobool4
|
||||
br i1 %1, label %while.body.preheader, label %while.end
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ]
|
||||
%call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)()
|
||||
%incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1
|
||||
%2 = load i8, i8* %incdec.ptr, align 1
|
||||
%tobool = icmp ne i8 %2, 0
|
||||
%cmp = icmp ne i32 %call, 1
|
||||
%3 = and i1 %cmp, %tobool
|
||||
br i1 %3, label %while.body, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
%incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ]
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ]
|
||||
ret i8* %a.addr.0.lcssa
|
||||
}
|
||||
|
||||
declare i32 @f(...)
|
|
@ -1,20 +0,0 @@
|
|||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
; CHECK: addl
|
||||
|
||||
; The two additions are the same , but have different flags.
|
||||
; In theory this code should never be generated by the frontend, but this
|
||||
; tries to test that two identical instructions with two different flags
|
||||
; actually generate two different nodes.
|
||||
;
|
||||
; Normally the combiner would see this condition without the flags
|
||||
; and optimize the result of the sub into a register clear
|
||||
; (the final result would be 0). With the different flags though the combiner
|
||||
; needs to keep the add + sub nodes, because the two nodes result as different
|
||||
; nodes and so cannot assume that the subtraction of the two nodes
|
||||
; generates 0 as result
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
%1 = add i32 %a, %b
|
||||
%2 = add nsw i32 %a, %b
|
||||
%3 = sub i32 %1, %2
|
||||
ret i32 %3
|
||||
}
|
Loading…
Reference in New Issue