forked from OSchip/llvm-project
[X86][SSE] Don't use LowerVSETCCWithSUBUS for unsigned compare with +ve operands (PR47448)
We already simplify the unsigned comparisons if we've found the operands are non-negative, but we were still calling LowerVSETCCWithSUBUS which resulted in the PR47448 regressions.
This commit is contained in:
parent
7993431dad
commit
9de0a3da6a
|
@ -22731,8 +22731,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
}
|
||||
|
||||
// Try to use SUBUS and PCMPEQ.
|
||||
if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))
|
||||
return V;
|
||||
if (FlipSigns)
|
||||
if (SDValue V =
|
||||
LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// We are handling one of the integer comparisons here. Since SSE only has
|
||||
// GT and EQ comparisons for integer, swapping operands and multiple
|
||||
|
|
|
@ -285,9 +285,9 @@ define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
|||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: psrlw $1, %xmm1
|
||||
; SSE2-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: uge_v8i16:
|
||||
|
@ -316,9 +316,9 @@ define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
|||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: psrlw $1, %xmm1
|
||||
; SSE2-NEXT: psubusw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: ule_v8i16:
|
||||
|
@ -459,9 +459,9 @@ define <8 x i16> @PR47448_uge(i16 signext %0) {
|
|||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: PR47448_uge:
|
||||
|
@ -508,10 +508,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) {
|
|||
; SSE-NEXT: movd %edi, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
|
||||
; SSE-NEXT: psubusw %xmm0, %xmm1
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE-NEXT: pcmpeqw %xmm1, %xmm0
|
||||
; SSE-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: PR47448_ugt:
|
||||
|
@ -520,10 +517,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) {
|
|||
; AVX1-NEXT: vmovd %edi, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
|
||||
; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: PR47448_ugt:
|
||||
|
@ -531,10 +525,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) {
|
|||
; AVX2-NEXT: andl $7, %edi
|
||||
; AVX2-NEXT: vmovd %edi, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8]
|
||||
; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%2 = and i16 %0, 7
|
||||
%3 = insertelement <8 x i16> undef, i16 %2, i32 0
|
||||
|
|
Loading…
Reference in New Issue