forked from OSchip/llvm-project
[X86] Combine zext(packus(x),packus(y)) -> concat(x,y) (PR39637)
Its proving tricky to combine shuffles across multiple vector sizes, so for now I'm adding this more specific combine - the pattern is common enough to be worth it as a first step. llvm-svn: 354757
This commit is contained in:
parent
3fe4bd464c
commit
cfaf663a35
|
@ -40328,6 +40328,20 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
|
|||
if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
|
||||
return R;
|
||||
|
||||
// TODO: Combine with any target/faux shuffle.
|
||||
if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 &&
|
||||
VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
SDValue N01 = N0.getOperand(1);
|
||||
unsigned NumSrcElts = N00.getValueType().getVectorNumElements();
|
||||
unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
|
||||
APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
|
||||
if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) &&
|
||||
(N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) {
|
||||
return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -702,9 +702,6 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
|||
; AVX2-NEXT: vmovd %eax, %xmm2
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm1
|
||||
|
|
|
@ -387,9 +387,6 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
|
@ -910,9 +907,6 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
|
@ -927,9 +921,6 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
|
|
|
@ -1840,27 +1840,21 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
|||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
|
|
|
@ -1795,27 +1795,21 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
|||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
|
|
Loading…
Reference in New Issue