[X86] Rewrite vXi1 element insertion by using a vXi1 scalar_to_vector and inserting into a vXi1 vector.

The existing code was already doing something very similar to subvector insertion so this allows us to remove the nearly duplicate code.

This patch is a little larger than it should be due to differences between the DQI handling between the two today.

llvm-svn: 323212
This commit is contained in:
Craig Topper 2018-01-23 15:56:36 +00:00
parent 0c9f77a9f9
commit c58c2b5c9b
7 changed files with 519 additions and 619 deletions

View File

@ -1198,6 +1198,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
@ -14924,74 +14925,11 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
unsigned NumElems = VecVT.getVectorNumElements();
// Copy into a k-register, extract to v1i1 and insert_subvector.
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt);
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
// Need to promote to v16i1, do the insert, then extract back.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), Vec,
DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
DAG.getIntPtrConstant(0, dl));
}
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
if (Vec.isUndef()) {
if (IdxVal)
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
return EltInVec;
}
// Insertion of one bit into first position
if (IdxVal == 0) {
// Clean top bits of vector.
EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt);
EltInVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
getZeroVector(VecVT, Subtarget, DAG, dl),
EltInVec, DAG.getIntPtrConstant(0, dl));
// Clean the first bit in source vector.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
if (IdxVal == NumElems - 1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Clean the last bit in the source vector.
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Move the current value of the bit to be replace to bit 0.
SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Xor with the new bit.
Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec);
// Shift to MSB, filling bottom bits with 0.
Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged,
DAG.getConstant(NumElems - 1, dl, MVT::i8));
// Shift to the final position, filling upper bits with 0.
Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged,
DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8));
// Xor with original vector to cancel out the original bit value that's still
// present.
return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec,
Op.getOperand(2));
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

View File

@ -2161,20 +2161,14 @@ define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
; KNL-LABEL: test_2f64toub:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; KNL-NEXT: vcvttsd2si %xmm2, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: vcvttsd2si %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; KNL-NEXT: vcvttsd2si %xmm0, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftrw $1, %k0, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k1
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; KNL-NEXT: vzeroupper
@ -2194,17 +2188,12 @@ define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax
; AVX512DQ-NEXT: kmovw %eax, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQ-NEXT: korw %k1, %k2, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQ-NEXT: kxorw %k1, %k0, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
; AVX512DQ-NEXT: korb %k0, %k1, %k1
; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512DQ-NEXT: vzeroupper
@ -2213,20 +2202,14 @@ define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
; AVX512BW-LABEL: test_2f64toub:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax
; AVX512BW-NEXT: kmovd %eax, %k0
; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: kmovw %eax, %k1
; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
; AVX512BW-NEXT: kshiftlw $1, %k2, %k2
; AVX512BW-NEXT: korw %k1, %k2, %k1
; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
; AVX512BW-NEXT: kxorw %k0, %k2, %k0
; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: kmovw %eax, %k0
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1
; AVX512BW-NEXT: korw %k1, %k0, %k1
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@ -2365,20 +2348,14 @@ define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
; KNL-LABEL: test_2f64tosb:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; KNL-NEXT: vcvttsd2si %xmm2, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: vcvttsd2si %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; KNL-NEXT: vcvttsd2si %xmm0, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftrw $1, %k0, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k1
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; KNL-NEXT: vzeroupper
@ -2398,17 +2375,12 @@ define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax
; AVX512DQ-NEXT: kmovw %eax, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQ-NEXT: korw %k1, %k2, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQ-NEXT: kxorw %k1, %k0, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
; AVX512DQ-NEXT: korb %k0, %k1, %k1
; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512DQ-NEXT: vzeroupper
@ -2417,20 +2389,14 @@ define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
; AVX512BW-LABEL: test_2f64tosb:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax
; AVX512BW-NEXT: kmovd %eax, %k0
; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: kmovw %eax, %k1
; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
; AVX512BW-NEXT: kshiftlw $1, %k2, %k2
; AVX512BW-NEXT: korw %k1, %k2, %k1
; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
; AVX512BW-NEXT: kxorw %k0, %k2, %k0
; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: kmovw %eax, %k0
; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1
; AVX512BW-NEXT: korw %k1, %k0, %k1
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper

View File

@ -307,7 +307,7 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
; KNL-NEXT: retq
@ -320,7 +320,7 @@ define i16 @test16(i1 *%addr, i16 %a) {
; SKX-NEXT: kxorw %k0, %k2, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $5, %k0, %k0
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
; SKX-NEXT: retq
@ -341,7 +341,7 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: retq
@ -354,7 +354,7 @@ define i8 @test17(i1 *%addr, i8 %a) {
; SKX-NEXT: kxorb %k0, %k2, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $3, %k0, %k0
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kxorb %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq
@ -793,7 +793,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: orl %ecx, %eax
; KNL-NEXT: vzeroupper
@ -811,7 +811,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; SKX-NEXT: kxord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k1, %k1
; SKX-NEXT: kshiftrd $27, %k1, %k1
; SKX-NEXT: kxord %k0, %k1, %k0
; SKX-NEXT: kxord %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@ -835,7 +835,7 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: vzeroupper
@ -846,12 +846,12 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: kshiftrb $2, %k0, %k1
; SKX-NEXT: kmovd %eax, %k2
; SKX-NEXT: kxorw %k2, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $13, %k1, %k1
; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kxorb %k2, %k1, %k1
; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kshiftrb $5, %k1, %k1
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq
@ -871,12 +871,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: vzeroupper
@ -887,12 +886,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftrw $1, %k0, %k1
; SKX-NEXT: kmovd %eax, %k2
; SKX-NEXT: kxorw %k2, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $14, %k1, %k1
; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftlb $1, %k1, %k1
; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq

View File

@ -990,7 +990,7 @@ define <64 x i8> @test16(i64 %x) {
; KNL-NEXT: kxorw %k4, %k5, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: kxorw %k0, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k4
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
@ -1012,7 +1012,7 @@ define <64 x i8> @test16(i64 %x) {
; SKX-NEXT: kxorq %k1, %k2, %k1
; SKX-NEXT: kshiftlq $63, %k1, %k1
; SKX-NEXT: kshiftrq $58, %k1, %k1
; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: kxorq %k1, %k0, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@ -1025,7 +1025,7 @@ define <64 x i8> @test16(i64 %x) {
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: kxorq %k1, %k0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@ -1046,7 +1046,7 @@ define <64 x i8> @test16(i64 %x) {
; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT: kxorw %k0, %k4, %k0
; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
@ -1079,12 +1079,12 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: cmpl %edx, %esi
; KNL-NEXT: setg %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftrw $5, %k0, %k5
; KNL-NEXT: kxorw %k4, %k5, %k4
; KNL-NEXT: kshiftrw $5, %k0, %k4
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: kxorw %k0, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k4
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
@ -1107,7 +1107,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; SKX-NEXT: kxorq %k1, %k2, %k1
; SKX-NEXT: kshiftlq $63, %k1, %k1
; SKX-NEXT: kshiftrq $58, %k1, %k1
; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: kxorq %k1, %k0, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@ -1121,7 +1121,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: kxorq %k1, %k0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@ -1138,12 +1138,12 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; AVX512DQ-NEXT: kmovw %edi, %k3
; AVX512DQ-NEXT: cmpl %edx, %esi
; AVX512DQ-NEXT: setg %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5
; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT: kxorw %k0, %k4, %k0
; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
@ -1165,20 +1165,19 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k2
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %esi, %k2
; KNL-NEXT: kshiftrw $8, %k2, %k0
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: kshiftrw $6, %k1, %k3
; KNL-NEXT: kxorw %k2, %k3, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k1
; KNL-NEXT: kshiftlw $9, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kshiftlw $7, %k0, %k0
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
@ -1187,38 +1186,37 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; SKX-LABEL: test18:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: kshiftrw $9, %k1, %k1
; SKX-NEXT: kshiftrb $6, %k0, %k3
; SKX-NEXT: kxorb %k1, %k3, %k1
; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: kmovd %esi, %k2
; SKX-NEXT: kshiftrw $8, %k2, %k0
; SKX-NEXT: kshiftrw $9, %k2, %k2
; SKX-NEXT: kshiftrb $6, %k1, %k3
; SKX-NEXT: kxorb %k2, %k3, %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $1, %k2, %k2
; SKX-NEXT: kxorb %k2, %k1, %k1
; SKX-NEXT: kshiftlb $1, %k1, %k1
; SKX-NEXT: kshiftrb $1, %k1, %k1
; SKX-NEXT: kxorb %k0, %k1, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: kshiftrb $1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k2, %k1
; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: korb %k0, %k1, %k0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test18:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: kshiftrw $7, %k0, %k1
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: kmovd %esi, %k2
; AVX512BW-NEXT: kshiftrw $8, %k2, %k0
; AVX512BW-NEXT: kshiftrw $9, %k2, %k2
; AVX512BW-NEXT: kshiftrw $6, %k1, %k3
; AVX512BW-NEXT: kxorw %k2, %k3, %k2
; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
; AVX512BW-NEXT: kshiftrw $9, %k2, %k2
; AVX512BW-NEXT: kxorw %k2, %k1, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: kshiftlw $9, %k1, %k1
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kshiftlw $7, %k0, %k0
; AVX512BW-NEXT: korw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@ -1226,19 +1224,19 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; AVX512DQ-LABEL: test18:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: kmovw %esi, %k2
; AVX512DQ-NEXT: kshiftrw $8, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
; AVX512DQ-NEXT: kshiftrb $6, %k1, %k3
; AVX512DQ-NEXT: kxorb %k2, %k3, %k2
; AVX512DQ-NEXT: kshiftlb $7, %k2, %k2
; AVX512DQ-NEXT: kshiftrb $1, %k2, %k2
; AVX512DQ-NEXT: kxorb %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
; AVX512DQ-NEXT: kxorb %k0, %k1, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
; AVX512DQ-NEXT: korb %k0, %k1, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0

View File

@ -7305,7 +7305,7 @@ define <64 x i8> @vmov_test16(i64 %x) {
; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -7318,7 +7318,7 @@ define <64 x i8> @vmov_test16(i64 %x) {
; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@ -7339,7 +7339,7 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -7353,7 +7353,7 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@ -7366,37 +7366,37 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
; GENERIC-LABEL: vmov_test18:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kshiftrw $8, %k1, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $9, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test18:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftrw $8, %k1, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $9, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00]
; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00]
; SKX-NEXT: kshiftrw $8, %k2, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $9, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $6, %k1, %k3 # sched: [3:1.00]
; SKX-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $7, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00]
; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = bitcast i8 %a to <8 x i1>

File diff suppressed because it is too large Load Diff

View File

@ -1844,7 +1844,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k7
; AVX512F-32-NEXT: kxorq %k1, %k5, %k7
; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
; AVX512F-32-NEXT: kmovd %ecx, %k5
@ -1855,7 +1855,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2
; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -1863,7 +1863,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0
; AVX512F-32-NEXT: kxorq %k7, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k3
@ -1872,7 +1872,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0
; AVX512F-32-NEXT: kxorq %k4, %k0, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -1881,7 +1881,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4
; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -1890,7 +1890,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %bl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %ebx, %k5
@ -1898,7 +1898,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k6, %k7
; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kmovd %edx, %k6
@ -1906,7 +1906,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1
; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -1914,7 +1914,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2
; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -1927,12 +1927,12 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $12, %edx
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0
; AVX512F-32-NEXT: kmovd %edx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
@ -1943,24 +1943,24 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $14, %edi
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4
; AVX512F-32-NEXT: kmovd %edi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4
; AVX512F-32-NEXT: kmovd %esi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4
; AVX512F-32-NEXT: kmovd %eax, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
@ -1971,14 +1971,14 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4
; AVX512F-32-NEXT: kxorq %k5, %k4, %k4
; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k4
; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3
; AVX512F-32-NEXT: kxorq %k6, %k3, %k6
; AVX512F-32-NEXT: kmovd %edx, %k3
@ -1988,7 +1988,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6
; AVX512F-32-NEXT: kxorq %k4, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kmovd %eax, %k4
@ -1996,7 +1996,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6
; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
; AVX512F-32-NEXT: kmovd %edx, %k2
@ -2005,7 +2005,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k1, %k6
; AVX512F-32-NEXT: kshiftrq $21, %k6, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -2014,7 +2014,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $42, %k0, %k0
; AVX512F-32-NEXT: kxorq %k6, %k0, %k6
; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $22, %k6, %k0
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -2023,7 +2023,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k5
@ -2031,7 +2031,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k6, %k7
; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6
; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
; AVX512F-32-NEXT: kmovd %eax, %k6
@ -2040,7 +2040,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k3
@ -2048,7 +2048,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kshiftrq $26, %k4, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -2059,12 +2059,12 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $28, %edx
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $37, %k7, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $27, %k4, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $36, %k1, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kshiftrq $28, %k1, %k4
; AVX512F-32-NEXT: kmovd %edx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
@ -2077,24 +2077,24 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $30, %esi
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $29, %k1, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $34, %k0, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftrq $30, %k0, %k1
; AVX512F-32-NEXT: kmovd %esi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $33, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $31, %k0, %k1
; AVX512F-32-NEXT: kmovd %ecx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $32, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
; AVX512F-32-NEXT: kmovd %ebx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
@ -2103,12 +2103,12 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $7, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $31, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $33, %k0, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $30, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $34, %k0, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k6
@ -2118,7 +2118,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5
; AVX512F-32-NEXT: kxorq %k0, %k5, %k5
; AVX512F-32-NEXT: kxorq %k5, %k0, %k5
; AVX512F-32-NEXT: kshiftrq $35, %k5, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -2126,7 +2126,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %al
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3
; AVX512F-32-NEXT: kxorq %k5, %k3, %k5
; AVX512F-32-NEXT: kxorq %k3, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3
; AVX512F-32-NEXT: kxorq %k2, %k3, %k2
; AVX512F-32-NEXT: kmovd %eax, %k3
@ -2135,7 +2135,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $27, %k2, %k2
; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
; AVX512F-32-NEXT: kxorq %k2, %k5, %k2
; AVX512F-32-NEXT: kshiftrq $37, %k2, %k5
; AVX512F-32-NEXT: kxorq %k4, %k5, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -2144,7 +2144,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $26, %k5, %k5
; AVX512F-32-NEXT: kxorq %k2, %k5, %k2
; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $38, %k2, %k5
; AVX512F-32-NEXT: kxorq %k7, %k5, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k5
@ -2153,7 +2153,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kshiftrq $39, %k7, %k2
; AVX512F-32-NEXT: kxorq %k6, %k2, %k6
; AVX512F-32-NEXT: kmovd %edx, %k2
@ -2163,7 +2163,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -2174,28 +2174,28 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $12, %ecx
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: shrl $14, %edi
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $22, %k0, %k0
; AVX512F-32-NEXT: kxorq %k6, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k6, %k0
; AVX512F-32-NEXT: kshiftrq $42, %k0, %k6
; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
; AVX512F-32-NEXT: kmovd %edi, %k7
; AVX512F-32-NEXT: shrl $15, %esi
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $43, %k0, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kmovd %esi, %k6
; AVX512F-32-NEXT: shrb $3, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k3
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kshiftrq $44, %k3, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k1
; AVX512F-32-NEXT: kmovd %edx, %k0
@ -2203,7 +2203,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $19, %k1, %k1
; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k3, %k1
; AVX512F-32-NEXT: kshiftrq $45, %k1, %k3
; AVX512F-32-NEXT: kxorq %k5, %k3, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k3
@ -2212,7 +2212,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $46, %k1, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -2220,12 +2220,12 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $6, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5
; AVX512F-32-NEXT: kxorq %k1, %k5, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $47, %k1, %k5
; AVX512F-32-NEXT: kxorq %k6, %k5, %k5
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5
; AVX512F-32-NEXT: kxorq %k1, %k5, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $48, %k1, %k5
; AVX512F-32-NEXT: kmovd %eax, %k6
; AVX512F-32-NEXT: kxorq %k6, %k5, %k6
@ -2236,14 +2236,14 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $49, %k1, %k6
; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $14, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k1, %k6
; AVX512F-32-NEXT: kshiftrq $50, %k6, %k1
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
@ -2254,7 +2254,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k0
@ -2262,14 +2262,14 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: shrb $3, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k4
@ -2278,40 +2278,40 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $8, %k2, %k2
; AVX512F-32-NEXT: kxorq %k6, %k2, %k2
; AVX512F-32-NEXT: kxorq %k2, %k6, %k2
; AVX512F-32-NEXT: kshiftrq $56, %k2, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
; AVX512F-32-NEXT: kshiftrq $57, %k1, %k2
; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $6, %k0, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1
; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1
; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
@ -2321,18 +2321,18 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: shrl $30, %ecx
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1
; AVX512F-32-NEXT: kmovd %ecx, %k2
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovd %eax, %k1
@ -2544,7 +2544,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k7
; AVX512F-32-NEXT: kxorq %k1, %k5, %k7
; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
; AVX512F-32-NEXT: kmovd %ecx, %k5
@ -2555,7 +2555,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2
; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -2563,7 +2563,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0
; AVX512F-32-NEXT: kxorq %k7, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k3
@ -2572,7 +2572,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0
; AVX512F-32-NEXT: kxorq %k4, %k0, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -2581,7 +2581,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4
; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -2590,7 +2590,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %bl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %ebx, %k5
@ -2598,7 +2598,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k6, %k7
; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kmovd %edx, %k6
@ -2606,7 +2606,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1
; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -2614,7 +2614,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2
; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -2627,12 +2627,12 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $12, %edx
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0
; AVX512F-32-NEXT: kmovd %edx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
@ -2643,24 +2643,24 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $14, %edi
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4
; AVX512F-32-NEXT: kmovd %edi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4
; AVX512F-32-NEXT: kmovd %esi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4
; AVX512F-32-NEXT: kmovd %eax, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
@ -2671,14 +2671,14 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4
; AVX512F-32-NEXT: kxorq %k5, %k4, %k4
; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4
; AVX512F-32-NEXT: kxorq %k3, %k4, %k4
; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3
; AVX512F-32-NEXT: kxorq %k6, %k3, %k6
; AVX512F-32-NEXT: kmovd %edx, %k3
@ -2688,7 +2688,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6
; AVX512F-32-NEXT: kxorq %k4, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kmovd %eax, %k4
@ -2696,7 +2696,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6
; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
; AVX512F-32-NEXT: kmovd %edx, %k2
@ -2705,7 +2705,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k1, %k6
; AVX512F-32-NEXT: kshiftrq $21, %k6, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -2714,7 +2714,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $42, %k0, %k0
; AVX512F-32-NEXT: kxorq %k6, %k0, %k6
; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $22, %k6, %k0
; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -2723,7 +2723,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k5
@ -2731,7 +2731,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k6, %k7
; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6
; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
; AVX512F-32-NEXT: kmovd %eax, %k6
@ -2740,7 +2740,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3
; AVX512F-32-NEXT: kxorq %k7, %k3, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k3
@ -2748,7 +2748,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kshiftrq $26, %k4, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k2
@ -2759,12 +2759,12 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $28, %edx
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $37, %k7, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $27, %k4, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $36, %k1, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kshiftrq $28, %k1, %k4
; AVX512F-32-NEXT: kmovd %edx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
@ -2777,24 +2777,24 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $30, %esi
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $29, %k1, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $34, %k0, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftrq $30, %k0, %k1
; AVX512F-32-NEXT: kmovd %esi, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $33, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $31, %k0, %k1
; AVX512F-32-NEXT: kmovd %ecx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $32, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
; AVX512F-32-NEXT: kmovd %ebx, %k7
; AVX512F-32-NEXT: kxorq %k7, %k1, %k1
@ -2803,12 +2803,12 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $7, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $31, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $33, %k0, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $30, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $34, %k0, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k6
@ -2818,7 +2818,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5
; AVX512F-32-NEXT: kxorq %k0, %k5, %k5
; AVX512F-32-NEXT: kxorq %k5, %k0, %k5
; AVX512F-32-NEXT: kshiftrq $35, %k5, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kmovd %ecx, %k0
@ -2826,7 +2826,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %al
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3
; AVX512F-32-NEXT: kxorq %k5, %k3, %k5
; AVX512F-32-NEXT: kxorq %k3, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3
; AVX512F-32-NEXT: kxorq %k2, %k3, %k2
; AVX512F-32-NEXT: kmovd %eax, %k3
@ -2835,7 +2835,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $3, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $27, %k2, %k2
; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
; AVX512F-32-NEXT: kxorq %k2, %k5, %k2
; AVX512F-32-NEXT: kshiftrq $37, %k2, %k5
; AVX512F-32-NEXT: kxorq %k4, %k5, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -2844,7 +2844,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $26, %k5, %k5
; AVX512F-32-NEXT: kxorq %k2, %k5, %k2
; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $38, %k2, %k5
; AVX512F-32-NEXT: kxorq %k7, %k5, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k5
@ -2853,7 +2853,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
; AVX512F-32-NEXT: kshiftrq $39, %k7, %k2
; AVX512F-32-NEXT: kxorq %k6, %k2, %k6
; AVX512F-32-NEXT: kmovd %edx, %k2
@ -2863,7 +2863,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7
; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kmovd %ecx, %k1
@ -2874,28 +2874,28 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $12, %ecx
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: shrl $14, %edi
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $22, %k0, %k0
; AVX512F-32-NEXT: kxorq %k6, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k6, %k0
; AVX512F-32-NEXT: kshiftrq $42, %k0, %k6
; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
; AVX512F-32-NEXT: kmovd %edi, %k7
; AVX512F-32-NEXT: shrl $15, %esi
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k0
; AVX512F-32-NEXT: kxorq %k3, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $43, %k0, %k3
; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
; AVX512F-32-NEXT: kmovd %esi, %k6
; AVX512F-32-NEXT: shrb $3, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3
; AVX512F-32-NEXT: kxorq %k0, %k3, %k3
; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
; AVX512F-32-NEXT: kshiftrq $44, %k3, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k1
; AVX512F-32-NEXT: kmovd %edx, %k0
@ -2903,7 +2903,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $4, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $19, %k1, %k1
; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k3, %k1
; AVX512F-32-NEXT: kshiftrq $45, %k1, %k3
; AVX512F-32-NEXT: kxorq %k5, %k3, %k4
; AVX512F-32-NEXT: kmovd %ecx, %k3
@ -2912,7 +2912,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: andb $1, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4
; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $46, %k1, %k4
; AVX512F-32-NEXT: kxorq %k7, %k4, %k5
; AVX512F-32-NEXT: kmovd %ecx, %k4
@ -2920,12 +2920,12 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $6, %cl
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5
; AVX512F-32-NEXT: kxorq %k1, %k5, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $47, %k1, %k5
; AVX512F-32-NEXT: kxorq %k6, %k5, %k5
; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5
; AVX512F-32-NEXT: kxorq %k1, %k5, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $48, %k1, %k5
; AVX512F-32-NEXT: kmovd %eax, %k6
; AVX512F-32-NEXT: kxorq %k6, %k5, %k6
@ -2936,14 +2936,14 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $49, %k1, %k6
; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $14, %k6, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
; AVX512F-32-NEXT: kxorq %k6, %k1, %k6
; AVX512F-32-NEXT: kshiftrq $50, %k6, %k1
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
@ -2954,7 +2954,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7
; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k0
@ -2962,14 +2962,14 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrb $2, %dl
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7
; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: shrb $3, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7
; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k4
@ -2978,40 +2978,40 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7
; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7
; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7
; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
; AVX512F-32-NEXT: kshiftrq $8, %k2, %k2
; AVX512F-32-NEXT: kxorq %k6, %k2, %k2
; AVX512F-32-NEXT: kxorq %k2, %k6, %k2
; AVX512F-32-NEXT: kshiftrq $56, %k2, %k6
; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
; AVX512F-32-NEXT: kshiftrq $57, %k1, %k2
; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $6, %k0, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1
; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1
; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1
; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
@ -3021,18 +3021,18 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: shrl $30, %ecx
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1
; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1
; AVX512F-32-NEXT: kmovd %ecx, %k2
; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1
; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kxorq %k1, %k0, %k0
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovd %eax, %k1