From 5cc7c9ab93996d9e52d8c40cb0ef1f835f4f7e7c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 11 Jul 2019 15:56:33 +0000 Subject: [PATCH] [X86] Merge negated ISD::SUB nodes into X86ISD::SUB equivalent (PR40483) Follow up to D58597, where it was noted that the commuted ISD::SUB variant was having problems with lack of combines. See also D63958 where we untangled setcc/sub pairs. Differential Revision: https://reviews.llvm.org/D58875 llvm-svn: 365791 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++------ llvm/test/CodeGen/X86/combine-sbb.ll | 30 ++++++++++++------------- llvm/test/CodeGen/X86/jump_sign.ll | 14 +++++------- 3 files changed, 27 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 351b0cf4e2ab..6b152fe9d7ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42653,6 +42653,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && "Expected X86ISD::ADD or X86ISD::SUB"); + SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); MVT VT = LHS.getSimpleValueType(); @@ -42660,21 +42661,20 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, // If we don't use the flag result, simplify back to a generic ADD/SUB. if (!N->hasAnyUseOfValue(1)) { - SDLoc DL(N); SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); } // Fold any similar generic ADD/SUB opcodes to reuse this node. auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { - // TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this - // currently causes regressions as we don't have broad x86sub combines. - if (Negate) - return; SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) - DCI.CombineTo(GenericAddSub, SDValue(N, 0)); + if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + SDValue Op(N, 0); + if (Negate) + Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); + DCI.CombineTo(GenericAddSub, Op); + } }; MatchGeneric(LHS, RHS, false); MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index 43011b033be2..8a86c4dbb955 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -199,26 +199,24 @@ define i32 @PR40483_sub2(i32*, i32) nounwind { define i32 @PR40483_sub3(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub3: ; X86: # %bb.0: -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: subl %ecx, %edi -; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: jae .LBB5_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: jmp .LBB5_3 -; X86-NEXT: .LBB5_1: -; X86-NEXT: subl %edx, %ecx -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: .LBB5_3: ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: retl +; X86-NEXT: .LBB5_1: +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub3: @@ -226,8 +224,8 @@ define i32 @PR40483_sub3(i32*, i32) nounwind { ; X64-NEXT: movl (%rdi), %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: movl %esi, %edx -; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: negl %edx ; X64-NEXT: orl %eax, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: subl %esi, %ecx diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll index f31d012ede31..78a322004380 100644 --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -303,18 +303,16 @@ define i32 @func_p(i32 %a, i32 %b) nounwind { } ; PR13475 -; If we have sub a, b and cmp b, a and the result of cmp is used -; by sbb, we should not optimize cmp away. +; We don't need an explicit cmp here. A sub/neg combo will do. + define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: subl %eax, %edx -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: xorl %edx, %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl %ecx, %ecx +; CHECK-NEXT: negl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: retl %t1 = icmp ult i32 %a0, %a1 %t2 = sub i32 %a1, %a0