From 803912ea5709b9f6d72d8e9ba74502a7d261abbd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 19 Aug 2018 04:26:31 +0000
Subject: [PATCH] [X86] Fix an issue in the matching for ADDUS.

We were basically assuming only one operand of the compare could be an ADD node and using that to swap operands. But we can have a normal add followed by a saturing add.

This rewrites the canonicalization to just be based on the condition code.

llvm-svn: 340134
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 23 +++++++++----------
 .../CodeGen/X86/sse2-intrinsics-canonical.ll  | 21 +++--------------
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 853f15161a69..1b5fea186042 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33111,12 +33111,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     SDValue CondLHS = Cond->getOperand(0);
     SDValue CondRHS = Cond->getOperand(1);
 
-    // Canonicalize ADD to CondRHS to simplify the logic below.
-    if (CondLHS.getOpcode() == ISD::ADD) {
-      std::swap(CondLHS, CondRHS);
-      CC = ISD::getSetCCSwappedOperands(CC);
-    }
-
     // Check if one of the arms of the VSELECT is vector with all bits set.
     // If it's on the left side invert the predicate to simplify logic below.
     SDValue Other;
@@ -33127,10 +33121,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
       Other = LHS;
     }
 
-    // We can test against either of the addition operands.
-    if (Other.getNode() && Other.getNumOperands() == 2 &&
-        (Other.getOperand(0) == CondLHS ||
-         Other.getOperand(1) == CondLHS)) {
+    if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
       SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
 
       auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
@@ -33138,9 +33129,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
         return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
       };
 
+      // Canonicalize condition operands.
+      if (CC == ISD::SETUGE) {
+        std::swap(CondLHS, CondRHS);
+        CC = ISD::SETULE;
+      }
+
+      // We can test against either of the addition operands.
       // x <= x+y ? x+y : ~0 --> addus x, y
-      if ((CC == ISD::SETULE) &&
-          Other.getOpcode() == ISD::ADD && Other == CondRHS)
+      // x+y >= x ? x+y : ~0 --> addus x, y
+      if (CC == ISD::SETULE && Other == CondRHS &&
+          (OpLHS == CondLHS || OpRHS == CondLHS))
         return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
                                 ADDUSBuilder);
     }
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-canonical.ll
index dc4ae2e46975..de89e7af7958 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-canonical.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-canonical.ll
@@ -278,34 +278,19 @@ define <8 x i16> @add_addusw(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
 ; SSE-LABEL: add_addusw:
 ; SSE:       ## %bb.0:
 ; SSE-NEXT:    paddw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xfd,0xca]
-; SSE-NEXT:    paddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xfd,0xc1]
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT:    ## encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
-; SSE-NEXT:    ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
-; SSE-NEXT:    pxor %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xef,0xca]
-; SSE-NEXT:    pxor %xmm0, %xmm2 ## encoding: [0x66,0x0f,0xef,0xd0]
-; SSE-NEXT:    pcmpgtw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0x65,0xca]
-; SSE-NEXT:    por %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xeb,0xc1]
+; SSE-NEXT:    paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: add_addusw:
 ; AVX2:       ## %bb.0:
 ; AVX2-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xfd,0xca]
-; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
-; AVX2-NEXT:    vpminuw %xmm0, %xmm1, %xmm2 ## encoding: [0xc4,0xe2,0x71,0x3a,0xd0]
-; AVX2-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x75,0xca]
-; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x76,0xd2]
-; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xca]
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xeb,0xc0]
+; AVX2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
 ; AVX2-NEXT:    retl ## encoding: [0xc3]
 ;
 ; SKX-LABEL: add_addusw:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
-; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
-; SKX-NEXT:    vpcmpnleuw %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3e,0xc8,0x06]
-; SKX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
-; SKX-NEXT:    vmovdqu16 %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
 ; SKX-NEXT:    retl ## encoding: [0xc3]
   %a = add <8 x i16> %y, %z
   %b = add <8 x i16> %x, %a