[X86][SSE] Fold 128-bit PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors

This is seen in the sub-128-bit vector trunc(ext()) of comparison results

Fixes pr46585.ll regression in D66004
This commit is contained in:
Simon Pilgrim 2020-08-05 18:09:22 +01:00
parent 3c39db0c44
commit b60f998859
5 changed files with 47 additions and 71 deletions

View File

@ -42129,6 +42129,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
}
}
// Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors.
if (VT.is128BitVector()) {
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Src0, Src1;
if (N0.getOpcode() == ExtOpc &&
N0.getOperand(0).getValueType().is64BitVector() &&
N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
Src0 = N0.getOperand(0);
}
if (N1.getOpcode() == ExtOpc &&
N1.getOperand(0).getValueType().is64BitVector() &&
N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
Src1 = N1.getOperand(0);
}
if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) {
assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)");
Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType());
Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
}
}
// Attempt to combine as shuffle.
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))

View File

@ -420,8 +420,6 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
; SSE2-SSSE3-LABEL: v8i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
@ -429,8 +427,6 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
; AVX12-LABEL: v8i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq

View File

@ -966,34 +966,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
}
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
; SSE2-LABEL: icmp_v8i16_v8i1:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: packsswb %xmm0, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpb $-1, %al
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
; SSE41-LABEL: icmp_v8i16_v8i1:
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
; SSE41-NEXT: packsswb %xmm0, %xmm0
; SSE41-NEXT: pmovmskb %xmm0, %eax
; SSE41-NEXT: cmpb $-1, %al
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
; SSE-LABEL: icmp_v8i16_v8i1:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
; SSE-NEXT: cmpb $-1, %al
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: icmp_v8i16_v8i1:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: cmpb $-1, %al
; AVX-NEXT: sete %al

View File

@ -950,33 +950,21 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
}
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
; SSE2-LABEL: icmp_v8i16_v8i1:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; SSE41-LABEL: icmp_v8i16_v8i1:
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
; SSE41-NEXT: pmovmskb %xmm0, %eax
; SSE41-NEXT: testw %ax, %ax
; SSE41-NEXT: setne %al
; SSE41-NEXT: retq
; SSE-LABEL: icmp_v8i16_v8i1:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
; AVX-LABEL: icmp_v8i16_v8i1:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: testw %ax, %ax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: setne %al
; AVX-NEXT: retq
;

View File

@ -1034,34 +1034,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
}
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
; SSE2-LABEL: icmp_v8i16_v8i1:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: packsswb %xmm0, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: testb %al, %al
; SSE2-NEXT: setnp %al
; SSE2-NEXT: retq
;
; SSE41-LABEL: icmp_v8i16_v8i1:
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
; SSE41-NEXT: packsswb %xmm0, %xmm0
; SSE41-NEXT: pmovmskb %xmm0, %eax
; SSE41-NEXT: testb %al, %al
; SSE41-NEXT: setnp %al
; SSE41-NEXT: retq
; SSE-LABEL: icmp_v8i16_v8i1:
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
; SSE-NEXT: testb %al, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
; AVX-LABEL: icmp_v8i16_v8i1:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: testb %al, %al
; AVX-NEXT: setnp %al