forked from OSchip/llvm-project
[X86] Improve legalization of vXi16/vXi8 selects.
Extend vXi1 conditions of vXi8/vXi16 selects even before type legalization gets a chance to split wide vectors. Previously we would only extend 128 and 256 bit vectors. But if we start with a 512 bit vector or wider that needs to be split we wouldn't extend until after the split had taken place. By extending early we improve the results of type legalization. Don't widen condition of 128/256 bit vXi16/vXi8 selects when we have BWI but not VLX. We can still use a mask register by widening the select to 512-bits instead. This is similar to what we do for compares already. llvm-svn: 322450
This commit is contained in:
parent
7a3b10184b
commit
e9fc0cd920
|
@ -31508,14 +31508,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
|||
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
|
||||
// lowering on KNL. In this case we convert it to
|
||||
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
|
||||
// The same situation for all 128 and 256-bit vectors of i8 and i16.
|
||||
// The same situation all vectors of i8 and i16 without BWI.
|
||||
// Make sure we extend these even before type legalization gets a chance to
|
||||
// split wide vectors.
|
||||
// Since SKX these selects have a proper lowering.
|
||||
if (Subtarget.hasAVX512() && CondVT.isVector() &&
|
||||
if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
|
||||
CondVT.getVectorElementType() == MVT::i1 &&
|
||||
(VT.is128BitVector() || VT.is256BitVector()) &&
|
||||
VT.getVectorNumElements() > 4 &&
|
||||
(VT.getVectorElementType() == MVT::i8 ||
|
||||
VT.getVectorElementType() == MVT::i16) &&
|
||||
!(Subtarget.hasBWI() && Subtarget.hasVLX())) {
|
||||
VT.getVectorElementType() == MVT::i16)) {
|
||||
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
|
||||
DCI.AddToWorklist(Cond.getNode());
|
||||
return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
|
||||
|
|
|
@ -3437,6 +3437,14 @@ let Predicates = [HasAVX512, NoVLX] in {
|
|||
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, NoVLX] in {
|
||||
defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
|
||||
defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
|
||||
|
||||
defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
|
||||
defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
// 512-bit store.
|
||||
def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
|
||||
|
|
|
@ -139,20 +139,11 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64
|
|||
; AVX512F-NEXT: movq %rsp, %rbp
|
||||
; AVX512F-NEXT: andq $-32, %rsp
|
||||
; AVX512F-NEXT: subq $64, %rsp
|
||||
; AVX512F-NEXT: movq %rdi, %rax
|
||||
; AVX512F-NEXT: shrq $32, %rax
|
||||
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movl %edi, (%rsp)
|
||||
; AVX512F-NEXT: shrq $32, %rdi
|
||||
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
|
||||
; AVX512F-NEXT: kmovw (%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
|
@ -161,6 +152,14 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64
|
|||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
|
||||
; AVX512F-NEXT: movq %rbp, %rsp
|
||||
; AVX512F-NEXT: popq %rbp
|
||||
; AVX512F-NEXT: retq
|
||||
|
@ -189,20 +188,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
|
|||
; AVX512F-NEXT: movq %rsp, %rbp
|
||||
; AVX512F-NEXT: andq $-32, %rsp
|
||||
; AVX512F-NEXT: subq $64, %rsp
|
||||
; AVX512F-NEXT: movq %rdi, %rax
|
||||
; AVX512F-NEXT: shrq $32, %rax
|
||||
; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movl %edi, (%rsp)
|
||||
; AVX512F-NEXT: shrq $32, %rdi
|
||||
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: kmovw (%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
|
@ -211,6 +201,14 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
|
|||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
||||
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: movq %rbp, %rsp
|
||||
; AVX512F-NEXT: popq %rbp
|
||||
; AVX512F-NEXT: retq
|
||||
|
|
|
@ -152,18 +152,17 @@ define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwi
|
|||
define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
|
||||
; KNL-LABEL: zext_32x8mem_to_32x16:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
|
||||
; KNL-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_32x8mem_to_32x16:
|
||||
|
@ -181,18 +180,17 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
|
|||
define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
|
||||
; KNL-LABEL: sext_32x8mem_to_32x16:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm1
|
||||
; KNL-NEXT: vpmovsxbw (%rdi), %ymm2
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
|
||||
; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
|
||||
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
|
||||
; KNL-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: sext_32x8mem_to_32x16:
|
||||
|
@ -227,18 +225,18 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
|
|||
define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
|
||||
; KNL-LABEL: zext_32x8_to_32x16_mask:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_32x8_to_32x16_mask:
|
||||
|
@ -272,18 +270,18 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
|
|||
define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
|
||||
; KNL-LABEL: sext_32x8_to_32x16_mask:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
|
||||
; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||
; KNL-NEXT: vpmovsxbw %xmm3, %ymm3
|
||||
; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
|
||||
; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: sext_32x8_to_32x16_mask:
|
||||
|
|
|
@ -1281,14 +1281,14 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
|||
define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
|
||||
; KNL-LABEL: test21:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
|
||||
; KNL-NEXT: vpsllw $15, %ymm3, %ymm2
|
||||
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
|
@ -1308,14 +1308,14 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
|
|||
;
|
||||
; AVX512DQ-LABEL: test21:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpsraw $15, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
||||
; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2
|
||||
; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1
|
||||
; AVX512DQ-NEXT: retq
|
||||
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
|
||||
|
|
|
@ -961,10 +961,12 @@ define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
|
|||
;
|
||||
; AVX512BW-LABEL: test47:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
|
||||
; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k0
|
||||
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
||||
; AVX512BW-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpblendmb %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
|
@ -992,10 +994,12 @@ define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
|
|||
;
|
||||
; AVX512BW-LABEL: test48:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2
|
||||
; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k0
|
||||
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
|
||||
; AVX512BW-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test48:
|
||||
|
@ -1022,10 +1026,12 @@ define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
|
|||
;
|
||||
; AVX512BW-LABEL: test49:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2
|
||||
; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm0, %k0
|
||||
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
|
||||
; AVX512BW-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm0, %k1
|
||||
; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue