[X86][AVX] Fold extract_subvector(truncate(x),0) -> truncate(extract_subvector(x),0)

This is currently only supported for VLX targets where the op should be legal.
This commit is contained in:
Simon Pilgrim 2020-07-27 14:32:32 +01:00
parent f720c9c68c
commit ab4ffa52f0
2 changed files with 18 additions and 10 deletions

View File

@ -48435,6 +48435,14 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
}
if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
(VT.is128BitVector() || VT.is256BitVector())) {
SDLoc DL(N);
SDValue InVecSrc = InVec.getOperand(0);
unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits;
SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
return DAG.getNode(InOpcode, DL, VT, Ext);
}
}
return SDValue();

View File

@ -2008,24 +2008,24 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrld $16, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm1
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vmovd %xmm0, %eax
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512BWVL-NEXT: vzeroupper