[SelectionDAG][X86] Improve legalization of v32i1 CONCAT_VECTORS of v16i1 for AVX512F.

A v32i1 CONCAT_VECTORS of v16i1 uses promotion to v32i8 to legalize the v32i1. This results in a bunch of extract_vector_elts and a build_vector that ultimately gets scalarized.

This patch checks to see if v16i8 is legal and inserts a any_extend to that so that we can concat v16i8 to v32i8 and avoid creating the extracts.

llvm-svn: 320674
This commit is contained in:
Craig Topper 2017-12-14 08:25:58 +00:00
parent 271a5c72a0
commit eab2d4665f
7 changed files with 526 additions and 6853 deletions

View File

@ -3497,6 +3497,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
// If the input type is legal and we can promote it to a legal type with the
// same element size, go ahead do that to create a new concat.
if (getTypeAction(N->getOperand(0).getValueType()) ==
TargetLowering::TypeLegal) {
EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
if (TLI.isTypeLegal(InPromotedTy)) {
SmallVector<SDValue, 8> Ops(NumOperands);
for (unsigned i = 0; i < NumOperands; ++i) {
Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
N->getOperand(i));
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
}
}
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {

File diff suppressed because it is too large Load Diff

View File

@ -355,284 +355,28 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rsp)
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0

File diff suppressed because it is too large Load Diff

View File

@ -287,278 +287,20 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
; AVX512F-NEXT: .cfi_def_cfa_register %rbp
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm1
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm1
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm2
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0

View File

@ -60,142 +60,14 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm1
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0

View File

@ -4259,280 +4259,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX512F-LABEL: test_cmp_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm3
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm2
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm1
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: kshiftlw $15, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %ecx
; AVX512F-NEXT: vmovd %ecx, %xmm0
; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $13, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $12, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $11, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $10, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $9, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $8, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $7, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $6, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $5, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $4, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $3, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $2, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftlw $1, %k0, %k1
; AVX512F-NEXT: kshiftrw $15, %k1, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
@ -4546,280 +4290,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX512DQ-LABEL: test_cmp_v64i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
; AVX512DQ-NEXT: vmovd %ecx, %xmm3
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
; AVX512DQ-NEXT: vmovd %ecx, %xmm2
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
; AVX512DQ-NEXT: vmovd %ecx, %xmm1
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0