From eab2d4665fdb6a28664c6936d58adc6b35796db1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 14 Dec 2017 08:25:58 +0000 Subject: [PATCH] [SelectionDAG][X86] Improve legalization of v32i1 CONCAT_VECTORS of v16i1 for AVX512F. A v32i1 CONCAT_VECTORS of v16i1 uses promotion to v32i8 to legalize the v32i1. This results in a bunch of extract_vector_elts and a build_vector that ultimately gets scalarized. This patch checks to see if v16i8 is legal and inserts a any_extend to that so that we can concat v16i8 to v32i8 and avoid creating the extracts. llvm-svn: 320674 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 15 + llvm/test/CodeGen/X86/avx512-mask-op.ll | 816 +-- llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 264 +- .../CodeGen/X86/avx512vl-vec-masked-cmp.ll | 5332 ++--------------- .../test/CodeGen/X86/bitcast-and-setcc-512.ll | 276 +- llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 132 +- .../CodeGen/X86/vector-compare-results.ll | 544 +- 7 files changed, 526 insertions(+), 6853 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8db0d043dd19..29f0bb475b08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3497,6 +3497,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); + // If the input type is legal and we can promote it to a legal type with the + // same element size, go ahead do that to create a new concat. + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeLegal) { + EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); + if (TLI.isTypeLegal(InPromotedTy)) { + SmallVector Ops(NumOperands); + for (unsigned i = 0; i < NumOperands; ++i) { + Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, + N->getOperand(i)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); + } + } + // Take the elements from the first vector. SmallVector Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 3a4075b9d86d..9b58ee5935d2 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1802,138 +1802,137 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; KNL-NEXT: subq $32, %rsp ; KNL-NEXT: vmovups (%rdi), %zmm2 ; KNL-NEXT: vmovups 64(%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1 ; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $15, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vmovd %ecx, %xmm3 -; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: vmovd %ecx, %xmm2 +; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $12, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $11, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $10, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $9, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $8, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $7, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $6, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $5, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $4, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $3, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $2, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftlw $1, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; KNL-NEXT: kshiftrw $15, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2 ; KNL-NEXT: kshiftlw $14, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $15, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vmovd %ecx, %xmm2 -; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; KNL-NEXT: vmovd %ecx, %xmm3 +; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $13, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $12, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $11, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $10, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $9, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $8, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $7, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $6, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $5, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $4, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $3, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $2, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftlw $1, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k2, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} -; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 +; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; KNL-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z} +; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax @@ -1997,77 +1996,76 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; KNL-NEXT: vcmpltps %zmm5, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm3 -; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: vmovd %ecx, %xmm5 +; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 -; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 +; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 +; KNL-NEXT: vpor %xmm5, %xmm3, %xmm3 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: vpor %xmm4, %xmm2, %xmm2 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -2150,138 +2148,137 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; AVX512DQ-NEXT: subq $32, %rsp ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 -; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 ; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm3 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vmovd %ecx, %xmm2 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2 ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm2 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vmovd %ecx, %xmm3 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} -; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} -; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0 +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z} +; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z} +; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm0, %k0 ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax @@ -2345,77 +2342,76 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm1, %k0 ; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm3 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vmovd %ecx, %xmm5 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 ; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpor %xmm5, %xmm3, %xmm3 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 @@ -2909,310 +2905,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: pushq %r15 -; KNL-NEXT: .cfi_def_cfa_offset 24 -; KNL-NEXT: pushq %r14 -; KNL-NEXT: .cfi_def_cfa_offset 32 -; KNL-NEXT: pushq %r13 -; KNL-NEXT: .cfi_def_cfa_offset 40 -; KNL-NEXT: pushq %r12 -; KNL-NEXT: .cfi_def_cfa_offset 48 -; KNL-NEXT: pushq %rbx -; KNL-NEXT: .cfi_def_cfa_offset 56 -; KNL-NEXT: .cfi_offset %rbx, -56 -; KNL-NEXT: .cfi_offset %r12, -48 -; KNL-NEXT: .cfi_offset %r13, -40 -; KNL-NEXT: .cfi_offset %r14, -32 -; KNL-NEXT: .cfi_offset %r15, -24 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r8d -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r10d -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r11d -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r14d -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r15d -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r12d -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r13d -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebx -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebp -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %esi -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %r9d, %xmm3 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 -; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: kmovw %k0, 6(%rdi) ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, 6(%rdi) -; KNL-NEXT: kshiftlw $14, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r8d -; KNL-NEXT: kshiftlw $15, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: kshiftlw $13, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r9d -; KNL-NEXT: kshiftlw $12, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r11d -; KNL-NEXT: kshiftlw $11, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r14d -; KNL-NEXT: kshiftlw $10, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r15d -; KNL-NEXT: kshiftlw $9, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r12d -; KNL-NEXT: kshiftlw $8, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r13d -; KNL-NEXT: kshiftlw $7, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $6, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: kshiftlw $5, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebp -; KNL-NEXT: kshiftlw $4, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebx -; KNL-NEXT: kshiftlw $3, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $2, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: kshiftlw $1, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %r10d, %xmm2 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL-NEXT: kshiftrw $15, %k2, %k0 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 -; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, 4(%rdi) ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 4(%rdi) -; KNL-NEXT: kshiftlw $14, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r8d -; KNL-NEXT: kshiftlw $15, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: kshiftlw $13, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r9d -; KNL-NEXT: kshiftlw $12, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r11d -; KNL-NEXT: kshiftlw $11, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r14d -; KNL-NEXT: kshiftlw $10, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r15d -; KNL-NEXT: kshiftlw $9, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r12d -; KNL-NEXT: kshiftlw $8, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r13d -; KNL-NEXT: kshiftlw $7, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $6, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: kshiftlw $5, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebp -; KNL-NEXT: kshiftlw $4, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebx -; KNL-NEXT: kshiftlw $3, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $2, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: kshiftlw $1, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %r10d, %xmm1 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 -; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: kmovw %k1, 2(%rdi) -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r8d -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r10d -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r11d -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r14d -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r15d -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r12d -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r13d -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %esi -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebp -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebx -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %r9d, %xmm0 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k0, 2(%rdi) ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) -; KNL-NEXT: popq %rbx -; KNL-NEXT: popq %r12 -; KNL-NEXT: popq %r13 -; KNL-NEXT: popq %r14 -; KNL-NEXT: popq %r15 -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -3234,310 +2942,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: pushq %r15 -; AVX512DQ-NEXT: .cfi_def_cfa_offset 24 -; AVX512DQ-NEXT: pushq %r14 -; AVX512DQ-NEXT: .cfi_def_cfa_offset 32 -; AVX512DQ-NEXT: pushq %r13 -; AVX512DQ-NEXT: .cfi_def_cfa_offset 40 -; AVX512DQ-NEXT: pushq %r12 -; AVX512DQ-NEXT: .cfi_def_cfa_offset 48 -; AVX512DQ-NEXT: pushq %rbx -; AVX512DQ-NEXT: .cfi_def_cfa_offset 56 -; AVX512DQ-NEXT: .cfi_offset %rbx, -56 -; AVX512DQ-NEXT: .cfi_offset %r12, -48 -; AVX512DQ-NEXT: .cfi_offset %r13, -40 -; AVX512DQ-NEXT: .cfi_offset %r14, -32 -; AVX512DQ-NEXT: .cfi_offset %r15, -24 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r8d -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r9d -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r10d -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r11d -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r14d -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r15d -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r12d -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r13d -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ebx -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ebp -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %edx -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %esi -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: vmovd %r9d, %xmm3 -; AVX512DQ-NEXT: kmovw %k1, %r9d -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k2 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 -; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) -; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r8d -; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r10d -; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r9d -; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r11d -; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r14d -; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r15d -; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r12d -; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r13d -; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %esi -; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ebp -; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ebx -; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %edx -; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: vmovd %r10d, %xmm2 -; AVX512DQ-NEXT: kmovw %k0, %r10d -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0 -; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 -; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r8d -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r10d -; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r9d -; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r11d -; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r14d -; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r15d -; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r12d -; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %r13d -; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ecx -; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %esi -; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ebp -; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %ebx -; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %edx -; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: vmovd %r10d, %xmm1 -; AVX512DQ-NEXT: kmovw %k0, %r10d -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 -; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r8d -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r9d -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r10d -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r11d -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r14d -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r15d -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r12d -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %r13d -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %edx -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %esi -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ebp -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ebx -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: vmovd %r9d, %xmm0 -; AVX512DQ-NEXT: kmovw %k1, %r9d -; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) -; AVX512DQ-NEXT: popq %rbx -; AVX512DQ-NEXT: popq %r12 -; AVX512DQ-NEXT: popq %r13 -; AVX512DQ-NEXT: popq %r14 -; AVX512DQ-NEXT: popq %r15 -; AVX512DQ-NEXT: popq %rbp ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq store <64 x i1> %v, <64 x i1>* %a diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 9c25ba6c5b6a..8c1fbd645393 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -355,284 +355,28 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rsp) ; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 82dba2993e79..de47780651ba 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -16,98 +16,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -134,98 +53,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -499,103 +337,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -622,103 +379,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -1963,98 +1639,19 @@ define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -2082,98 +1679,19 @@ define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -2450,89 +1968,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -2541,12 +1985,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -2574,89 +2013,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -2665,12 +2030,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -3132,141 +2492,13 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__ ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3395,142 +2627,14 @@ define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64> ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3731,15 +2835,15 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -3752,145 +2856,17 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3999,174 +2975,46 @@ define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12251,98 +11099,17 @@ define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -12369,98 +11136,17 @@ define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -12734,103 +11420,22 @@ define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -12857,103 +11462,22 @@ define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -14198,98 +12722,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -14317,98 +12762,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -14685,89 +13051,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -14776,12 +13068,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -14809,89 +13096,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -14900,12 +13113,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -15367,141 +13575,13 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_ ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15630,142 +13710,14 @@ define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15966,15 +13918,15 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -15987,145 +13939,17 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16234,174 +14058,46 @@ define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24486,100 +22182,19 @@ define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -24606,101 +22221,20 @@ define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -24979,105 +22513,24 @@ define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -25104,106 +22557,25 @@ define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -26493,100 +23865,21 @@ define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -26614,101 +23907,22 @@ define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -26990,91 +24204,17 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -27083,12 +24223,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -27116,92 +24251,18 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -27210,12 +24271,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -27680,146 +24736,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -27949,146 +24877,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -28289,15 +25089,15 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -28310,148 +25110,20 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -28577,162 +25249,34 @@ define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm4 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -36945,18 +33489,10 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -36964,82 +33500,9 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -37066,18 +33529,10 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 @@ -37085,82 +33540,9 @@ define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -37440,18 +33822,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -37459,87 +33835,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %_ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -37566,18 +33867,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 @@ -37585,87 +33880,12 @@ define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -38964,101 +35184,22 @@ define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -39086,101 +35227,22 @@ define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -39463,92 +35525,18 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -39557,12 +35545,7 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %_ ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -39590,92 +35573,18 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: pushq %r15 -; NoVLX-NEXT: pushq %r14 -; NoVLX-NEXT: pushq %r13 -; NoVLX-NEXT: pushq %r12 -; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: .cfi_offset %rbx, -56 -; NoVLX-NEXT: .cfi_offset %r12, -48 -; NoVLX-NEXT: .cfi_offset %r13, -40 -; NoVLX-NEXT: .cfi_offset %r14, -32 -; NoVLX-NEXT: .cfi_offset %r15, -24 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r11d -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r14d -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r15d -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r12d -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %r13d -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ebx -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %r10d, %xmm0 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -39684,12 +35593,7 @@ define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64 ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: leaq -40(%rbp), %rsp -; NoVLX-NEXT: popq %rbx -; NoVLX-NEXT: popq %r12 -; NoVLX-NEXT: popq %r13 -; NoVLX-NEXT: popq %r14 -; NoVLX-NEXT: popq %r15 +; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq @@ -40153,150 +36057,22 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %_ ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40422,150 +36198,22 @@ define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40765,15 +36413,15 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8 +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -40787,150 +36435,22 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3 +; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 -; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2 -; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40999,219 +36519,91 @@ define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} +; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 +; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} +; NoVLX-NEXT: vpmovdb %zmm5, %xmm5 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3 -; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 -; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 -; NoVLX-NEXT: kshiftlw $14, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftlw $15, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $13, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $11, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $10, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $9, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $8, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $7, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $6, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $5, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $4, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $3, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $2, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftlw $1, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 +; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll index 4180bc22cce5..dfda374aa52f 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -287,278 +287,20 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { ; AVX512F-NEXT: .cfi_def_cfa_register %rbp ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm1 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm0 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm1 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 ; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm2 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2 +; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 +; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 371aef00c8d0..9914f0b93434 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -60,142 +60,14 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512F-NEXT: subq $32, %rsp ; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm1 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm0 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll index 029217cc32d6..47ed70ce0ed5 100644 --- a/llvm/test/CodeGen/X86/vector-compare-results.ll +++ b/llvm/test/CodeGen/X86/vector-compare-results.ll @@ -4259,280 +4259,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; ; AVX512F-LABEL: test_cmp_v64i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 -; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3 -; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm3 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 ; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm2 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 +; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3 +; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2 -; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm1 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $14, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: kshiftlw $15, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm0 -; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $13, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $12, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $11, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $10, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $9, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $7, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $6, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $5, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $4, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $3, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $2, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftlw $1, %k0, %k1 -; AVX512F-NEXT: kshiftrw $15, %k1, %k1 -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 @@ -4546,280 +4290,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; ; AVX512DQ-LABEL: test_cmp_v64i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3 -; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm3 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm2 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3 +; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2 -; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm1 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %ecx -; AVX512DQ-NEXT: vmovd %ecx, %xmm0 -; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 -; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 -; AVX512DQ-NEXT: kmovw %k1, %eax -; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0