forked from OSchip/llvm-project
[X86][SSE] combineSubToSubus - support v8i32 handling from SSSE3 (not SSE41)
Now that UMIN etc are Legal/Custom for SSE2+, we can efficiently match SUBUS v8i32 cases from SSSE3 which can perform efficient truncation with PSHUFB. llvm-svn: 326033
This commit is contained in:
parent
17ad62be94
commit
8ad91261e8
|
@ -37635,10 +37635,10 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
|
|||
SDValue Op1 = N->getOperand(1);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// PSUBUS is supported, starting from SSE2, but special preprocessing
|
||||
// for v8i32 requires umin, which appears in SSE41.
|
||||
// PSUBUS is supported, starting from SSE2, but truncation for v8i32
|
||||
// is only worth it with SSSE3 (PSHUFB).
|
||||
if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) &&
|
||||
!(Subtarget.hasSSE41() && (VT == MVT::v8i32)) &&
|
||||
!(Subtarget.hasSSSE3() && (VT == MVT::v8i32)) &&
|
||||
!(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
|
||||
!(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 ||
|
||||
VT == MVT::v16i32 || VT == MVT::v8i64)))
|
||||
|
|
|
@ -1337,32 +1337,26 @@ define <8 x i16> @psubus_8i32_max(<8 x i16> %x, <8 x i32> %y) nounwind {
|
|||
;
|
||||
; SSSE3-LABEL: psubus_8i32_max:
|
||||
; SSSE3: # %bb.0: # %vector.ph
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm4
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm5
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSSE3-NEXT: por %xmm4, %xmm6
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm3
|
||||
; SSSE3-NEXT: pandn %xmm2, %xmm6
|
||||
; SSSE3-NEXT: por %xmm3, %xmm6
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm3
|
||||
; SSSE3-NEXT: por %xmm0, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm0
|
||||
; SSSE3-NEXT: pandn %xmm1, %xmm4
|
||||
; SSSE3-NEXT: por %xmm4, %xmm0
|
||||
; SSSE3-NEXT: psubd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psubd %xmm2, %xmm6
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm6
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSSE3-NEXT: movdqa %xmm6, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: pand %xmm7, %xmm2
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm7
|
||||
; SSSE3-NEXT: por %xmm2, %xmm7
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm7
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm1
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm6
|
||||
; SSSE3-NEXT: por %xmm1, %xmm6
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm6
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
|
||||
; SSSE3-NEXT: psubusw %xmm6, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: psubus_8i32_max:
|
||||
|
@ -2012,34 +2006,26 @@ define <8 x i16> @psubus_i16_i32_max_swapped(<8 x i16> %x, <8 x i32> %y) nounwin
|
|||
;
|
||||
; SSSE3-LABEL: psubus_i16_i32_max_swapped:
|
||||
; SSSE3: # %bb.0: # %vector.ph
|
||||
; SSSE3-NEXT: pxor %xmm3, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pxor %xmm5, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm6
|
||||
; SSSE3-NEXT: por %xmm5, %xmm6
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm3, %xmm6
|
||||
; SSSE3-NEXT: pandn %xmm0, %xmm3
|
||||
; SSSE3-NEXT: por %xmm6, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm5, %xmm0
|
||||
; SSSE3-NEXT: por %xmm4, %xmm5
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSSE3-NEXT: pand %xmm0, %xmm5
|
||||
; SSSE3-NEXT: pandn %xmm4, %xmm0
|
||||
; SSSE3-NEXT: por %xmm5, %xmm0
|
||||
; SSSE3-NEXT: psubd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psubd %xmm2, %xmm3
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm3
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm5
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSSE3-NEXT: movdqa %xmm6, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: pand %xmm7, %xmm2
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm7
|
||||
; SSSE3-NEXT: por %xmm2, %xmm7
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm7
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm1
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm6
|
||||
; SSSE3-NEXT: por %xmm1, %xmm6
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm6
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
|
||||
; SSSE3-NEXT: psubusw %xmm6, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: psubus_i16_i32_max_swapped:
|
||||
|
@ -2124,35 +2110,26 @@ define <8 x i16> @psubus_i16_i32_min(<8 x i16> %x, <8 x i32> %y) nounwind {
|
|||
;
|
||||
; SSSE3-LABEL: psubus_i16_i32_min:
|
||||
; SSSE3: # %bb.0: # %vector.ph
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm4
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm5
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm6
|
||||
; SSSE3-NEXT: por %xmm4, %xmm6
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm5
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm5, %xmm6
|
||||
; SSSE3-NEXT: pandn %xmm2, %xmm5
|
||||
; SSSE3-NEXT: por %xmm6, %xmm5
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm2
|
||||
; SSSE3-NEXT: por %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm4
|
||||
; SSSE3-NEXT: pandn %xmm1, %xmm2
|
||||
; SSSE3-NEXT: por %xmm4, %xmm2
|
||||
; SSSE3-NEXT: psubd %xmm2, %xmm3
|
||||
; SSSE3-NEXT: psubd %xmm5, %xmm0
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm3
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSSE3-NEXT: movdqa %xmm6, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: pand %xmm7, %xmm2
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm7
|
||||
; SSSE3-NEXT: por %xmm2, %xmm7
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm7
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm1
|
||||
; SSSE3-NEXT: pandn %xmm5, %xmm6
|
||||
; SSSE3-NEXT: por %xmm1, %xmm6
|
||||
; SSSE3-NEXT: pshufb %xmm3, %xmm6
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
|
||||
; SSSE3-NEXT: psubusw %xmm6, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: psubus_i16_i32_min:
|
||||
|
|
Loading…
Reference in New Issue