forked from OSchip/llvm-project
[X86][AVX512BW] AVX512BW can sign-extend v32i8 to v32i16 for simpler v32i8 multiplies.
Only pre-AVX512BW targets need to split v32i8 vectors. llvm-svn: 264509
This commit is contained in:
parent
b549ab02b4
commit
7379a70677
|
@ -18910,8 +18910,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||||
if (VT == MVT::v16i8 || VT == MVT::v32i8) {
|
if (VT == MVT::v16i8 || VT == MVT::v32i8) {
|
||||||
if (Subtarget.hasInt256()) {
|
if (Subtarget.hasInt256()) {
|
||||||
// For 256-bit vectors, split into 128-bit vectors to allow the
|
// For 256-bit vectors, split into 128-bit vectors to allow the
|
||||||
// sign-extension to occur.
|
// sign-extension to occur. We don't need this on AVX512BW as we can
|
||||||
if (VT == MVT::v32i8)
|
// safely sign-extend to v32i16.
|
||||||
|
if (VT == MVT::v32i8 && !Subtarget.hasBWI())
|
||||||
return Lower256IntArith(Op, DAG);
|
return Lower256IntArith(Op, DAG);
|
||||||
|
|
||||||
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
|
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
|
||||||
|
|
|
@ -473,15 +473,11 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: mul_v32i8c:
|
; AVX512BW-LABEL: mul_v32i8c:
|
||||||
; AVX512BW: # BB#0: # %entry
|
; AVX512BW: # BB#0: # %entry
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1
|
; AVX512BW-NEXT: vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
|
||||||
; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
|
; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpmullw %ymm2, %ymm1, %ymm1
|
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%A = mul <32 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
|
%A = mul <32 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
|
||||||
|
@ -678,17 +674,10 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: mul_v32i8:
|
; AVX512BW-LABEL: mul_v32i8:
|
||||||
; AVX512BW: # BB#0: # %entry
|
; AVX512BW: # BB#0: # %entry
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm2
|
; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm3
|
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
|
|
||||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1
|
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm1
|
|
||||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
||||||
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%A = mul <32 x i8> %i, %j
|
%A = mul <32 x i8> %i, %j
|
||||||
|
|
Loading…
Reference in New Issue