forked from OSchip/llvm-project
[X86] Merge negated ISD::SUB nodes into X86ISD::SUB equivalent (PR40483)
Follow up to D58597, where it was noted that the commuted ISD::SUB variant was having problems with lack of combines. See also D63958 where we untangled setcc/sub pairs. Differential Revision: https://reviews.llvm.org/D58875 llvm-svn: 365791
This commit is contained in:
parent
aeb3826228
commit
5cc7c9ab93
|
@ -42653,6 +42653,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
|
|||
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
|
||||
"Expected X86ISD::ADD or X86ISD::SUB");
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
MVT VT = LHS.getSimpleValueType();
|
||||
|
@ -42660,21 +42661,20 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
// If we don't use the flag result, simplify back to a generic ADD/SUB.
|
||||
if (!N->hasAnyUseOfValue(1)) {
|
||||
SDLoc DL(N);
|
||||
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
|
||||
return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
|
||||
}
|
||||
|
||||
// Fold any similar generic ADD/SUB opcodes to reuse this node.
|
||||
auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
|
||||
// TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this
|
||||
// currently causes regressions as we don't have broad x86sub combines.
|
||||
if (Negate)
|
||||
return;
|
||||
SDValue Ops[] = {N0, N1};
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0));
|
||||
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops))
|
||||
DCI.CombineTo(GenericAddSub, SDValue(N, 0));
|
||||
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
|
||||
SDValue Op(N, 0);
|
||||
if (Negate)
|
||||
Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
|
||||
DCI.CombineTo(GenericAddSub, Op);
|
||||
}
|
||||
};
|
||||
MatchGeneric(LHS, RHS, false);
|
||||
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
|
||||
|
|
|
@ -199,26 +199,24 @@ define i32 @PR40483_sub2(i32*, i32) nounwind {
|
|||
define i32 @PR40483_sub3(i32*, i32) nounwind {
|
||||
; X86-LABEL: PR40483_sub3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl (%eax), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl (%esi), %edx
|
||||
; X86-NEXT: movl %edx, %eax
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: subl %ecx, %edi
|
||||
; X86-NEXT: movl %edi, (%esi)
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: subl %esi, %ecx
|
||||
; X86-NEXT: subl %esi, %edx
|
||||
; X86-NEXT: movl %edx, (%eax)
|
||||
; X86-NEXT: jae .LBB5_1
|
||||
; X86-NEXT: # %bb.2:
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: jmp .LBB5_3
|
||||
; X86-NEXT: .LBB5_1:
|
||||
; X86-NEXT: subl %edx, %ecx
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: .LBB5_3:
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: retl
|
||||
; X86-NEXT: .LBB5_1:
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: negl %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR40483_sub3:
|
||||
|
@ -226,8 +224,8 @@ define i32 @PR40483_sub3(i32*, i32) nounwind {
|
|||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: subl %esi, %eax
|
||||
; X64-NEXT: movl %esi, %edx
|
||||
; X64-NEXT: subl %ecx, %edx
|
||||
; X64-NEXT: movl %eax, %edx
|
||||
; X64-NEXT: negl %edx
|
||||
; X64-NEXT: orl %eax, %edx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: subl %esi, %ecx
|
||||
|
|
|
@ -303,18 +303,16 @@ define i32 @func_p(i32 %a, i32 %b) nounwind {
|
|||
}
|
||||
|
||||
; PR13475
|
||||
; If we have sub a, b and cmp b, a and the result of cmp is used
|
||||
; by sbb, we should not optimize cmp away.
|
||||
; We don't need an explicit cmp here. A sub/neg combo will do.
|
||||
|
||||
define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: func_q:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl %ecx, %edx
|
||||
; CHECK-NEXT: subl %eax, %edx
|
||||
; CHECK-NEXT: cmpl %ecx, %eax
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: xorl %edx, %eax
|
||||
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: sbbl %ecx, %ecx
|
||||
; CHECK-NEXT: negl %eax
|
||||
; CHECK-NEXT: xorl %ecx, %eax
|
||||
; CHECK-NEXT: retl
|
||||
%t1 = icmp ult i32 %a0, %a1
|
||||
%t2 = sub i32 %a1, %a0
|
||||
|
|
Loading…
Reference in New Issue