forked from OSchip/llvm-project
[X86][SSE] Fold 128-bit PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors
This is seen in the sub-128-bit vector trunc(ext()) of comparison results Fixes pr46585.ll regression in D66004
This commit is contained in:
parent
3c39db0c44
commit
b60f998859
|
@ -42129,6 +42129,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors.
|
||||
if (VT.is128BitVector()) {
|
||||
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
SDValue Src0, Src1;
|
||||
if (N0.getOpcode() == ExtOpc &&
|
||||
N0.getOperand(0).getValueType().is64BitVector() &&
|
||||
N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
|
||||
Src0 = N0.getOperand(0);
|
||||
}
|
||||
if (N1.getOpcode() == ExtOpc &&
|
||||
N1.getOperand(0).getValueType().is64BitVector() &&
|
||||
N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) {
|
||||
Src1 = N1.getOperand(0);
|
||||
}
|
||||
if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) {
|
||||
assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)");
|
||||
Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType());
|
||||
Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to combine as shuffle.
|
||||
SDValue Op(N, 0);
|
||||
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
|
||||
|
|
|
@ -420,8 +420,6 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
|
|||
; SSE2-SSSE3-LABEL: v8i8:
|
||||
; SSE2-SSSE3: # %bb.0:
|
||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
|
||||
; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE2-SSSE3-NEXT: retq
|
||||
|
@ -429,8 +427,6 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
|
|||
; AVX12-LABEL: v8i8:
|
||||
; AVX12: # %bb.0:
|
||||
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX12-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX12-NEXT: retq
|
||||
|
|
|
@ -966,34 +966,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
|
|||
}
|
||||
|
||||
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE2-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: cmpb $-1, %al
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: cmpb $-1, %al
|
||||
; SSE41-NEXT: sete %al
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: cmpb $-1, %al
|
||||
; SSE-NEXT: sete %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: icmp_v8i16_v8i1:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: cmpb $-1, %al
|
||||
; AVX-NEXT: sete %al
|
||||
|
|
|
@ -950,33 +950,21 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
|
|||
}
|
||||
|
||||
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE2-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testw %ax, %ax
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: setne %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: icmp_v8i16_v8i1:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testw %ax, %ax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: setne %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -1034,34 +1034,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
|
|||
}
|
||||
|
||||
define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE2-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: setnp %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: setnp %al
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: icmp_v8i16_v8i1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: setnp %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: icmp_v8i16_v8i1:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: setnp %al
|
||||
|
|
Loading…
Reference in New Issue