forked from OSchip/llvm-project
[X86] When lowering truncates to vXi1, don't sign extend i16/i8 types to 512-bit if we have VLX.
This should only affect what we do for v8i16. Previously we went to v8i64, but if we have VLX we only need v8i32. This prevents an unnecessary zmm usage. llvm-svn: 321303
This commit is contained in:
parent
df898cc5ed
commit
742ac98d01
|
@ -16449,7 +16449,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
|||
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
|
||||
"Unexpected vector type.");
|
||||
unsigned NumElts = InVT.getVectorNumElements();
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
|
||||
MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
|
||||
MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
|
||||
InVT = ExtVT;
|
||||
ShiftInx = InVT.getScalarSizeInBits() - 1;
|
||||
|
|
|
@ -30,13 +30,13 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
|
|||
; AVX512F-LABEL: v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -943,12 +943,12 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
|
|||
; AVX512F-NEXT: vpsllw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm2, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
|
|
@ -26,9 +26,9 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
|
|||
; AVX512F-LABEL: v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -640,9 +640,9 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
|
|||
; AVX512F-NEXT: vpsllw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
|
|
@ -537,9 +537,9 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
|
|||
;
|
||||
; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
|
|
Loading…
Reference in New Issue