[X86] Call SimplifyDemandedVectorElts on KSHIFTL/KSHIFTR nodes during DAG combine.

llvm-svn: 372154
This commit is contained in:
Craig Topper 2019-09-17 18:02:52 +00:00
parent b50894b9c3
commit f1ba94ade0
5 changed files with 721 additions and 851 deletions

View File

@ -45107,6 +45107,20 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@ -45247,6 +45261,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
}
return SDValue();

View File

@ -1886,332 +1886,311 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-LABEL: test21:
; KNL: # %bb.0:
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kmovw %edx, %k1
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %edx, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kxorw %k0, %k2, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %r8d, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k1
; KNL-NEXT: kmovw %r9d, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k3
; KNL-NEXT: kxorw %k0, %k3, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k2
; KNL-NEXT: kmovw %ecx, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k2
; KNL-NEXT: kmovw %r8d, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k2
; KNL-NEXT: kmovw %r9d, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $8, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $7, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $6, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $5, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $4, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $3, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: korw %k2, %k0, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k4
; KNL-NEXT: kxorw %k0, %k4, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k3
; KNL-NEXT: kxorw %k0, %k3, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k4
; KNL-NEXT: kxorw %k2, %k4, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: kxorw %k2, %k3, %k2
; KNL-NEXT: kshiftrw $3, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $12, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $4, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $11, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $5, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $10, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $6, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $9, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $7, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $8, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $8, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $7, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $6, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $5, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $4, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $3, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: kxorw %k3, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: korw %k3, %k0, %k3
; KNL-NEXT: korw %k3, %k2, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k5
; KNL-NEXT: kxorw %k0, %k5, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k4
; KNL-NEXT: kxorw %k0, %k4, %k4
; KNL-NEXT: kshiftrw $2, %k4, %k5
; KNL-NEXT: kxorw %k3, %k5, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k3
; KNL-NEXT: kxorw %k3, %k4, %k3
; KNL-NEXT: kshiftrw $3, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $12, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $4, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $11, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $5, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $6, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $9, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $7, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $8, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $8, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $7, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $9, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $6, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $10, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $5, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $11, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $4, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $12, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $3, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $2, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $14, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $1, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: korw %k4, %k3, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k5
; KNL-NEXT: kxorw %k4, %k5, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $13, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k4
@ -2329,228 +2308,113 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
;
; AVX512DQNOBW-LABEL: test21:
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: kmovw %esi, %k0
; AVX512DQNOBW-NEXT: kmovw %edi, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k0, %k2, %k0
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQNOBW-NEXT: kxorw %k0, %k1, %k0
; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k1
; AVX512DQNOBW-NEXT: kmovw %edx, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k1
; AVX512DQNOBW-NEXT: kmovw %ecx, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k1
; AVX512DQNOBW-NEXT: kmovw %r8d, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k1
; AVX512DQNOBW-NEXT: kmovw %r9d, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k3
; AVX512DQNOBW-NEXT: kxorw %k1, %k3, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %edx, %k0
; AVX512DQNOBW-NEXT: kmovw %edi, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k1, %k2, %k1
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k1, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k3
; AVX512DQNOBW-NEXT: kxorw %k0, %k3, %k0
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0
; AVX512DQNOBW-NEXT: kxorw %k0, %k2, %k0
; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k2
; AVX512DQNOBW-NEXT: kmovw %ecx, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k2
; AVX512DQNOBW-NEXT: kmovw %r8d, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k2
; AVX512DQNOBW-NEXT: kmovw %r9d, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k4
; AVX512DQNOBW-NEXT: kxorw %k1, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k4
; AVX512DQNOBW-NEXT: kxorw %k2, %k4, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $14, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k3, %k2
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k3
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $13, %k3, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k3
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
@ -2645,18 +2509,12 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k5
; AVX512DQNOBW-NEXT: kxorw %k1, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $2, %k4, %k5
; AVX512DQNOBW-NEXT: kxorw %k3, %k5, %k3
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $14, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $13, %k3, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k4, %k3
; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $13, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $3, %k3, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
@ -2747,13 +2605,113 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: vpmovm2d %k3, %zmm4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k1, %k5, %k1
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k5
; AVX512DQNOBW-NEXT: kxorw %k4, %k5, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $13, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $12, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $11, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $10, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $9, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $8, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $7, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $6, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $5, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $4, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $2, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k4
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: korw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1
; AVX512DQNOBW-NEXT: vpmovm2d %k2, %zmm4
; AVX512DQNOBW-NEXT: vpmovm2d %k3, %zmm4
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4
; AVX512DQNOBW-NEXT: vpmovm2d %k2, %zmm4
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4

View File

@ -2753,229 +2753,114 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; KNL-LABEL: store_64i1:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edx, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: kmovw %r8d, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %r9d, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kmovw %esi, %k2
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k1, %k2, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k3
; KNL-NEXT: kxorw %k0, %k3, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: kmovw %r8d, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: kmovw %r9d, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $8, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $7, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $6, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $5, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $4, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $3, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k4
; KNL-NEXT: kxorw %k1, %k3, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k4
; KNL-NEXT: kxorw %k2, %k4, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: kxorw %k2, %k3, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftrw $3, %k2, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
@ -3070,18 +2955,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k5
; KNL-NEXT: kxorw %k1, %k4, %k4
; KNL-NEXT: kshiftrw $2, %k4, %k5
; KNL-NEXT: kxorw %k3, %k5, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $14, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k3
; KNL-NEXT: kxorw %k3, %k4, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $13, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftrw $3, %k3, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
@ -3172,9 +3051,109 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: korw %k4, %k3, %k3
; KNL-NEXT: kmovw %k3, 6(%rdi)
; KNL-NEXT: kmovw %k2, 4(%rdi)
; KNL-NEXT: kmovw %k1, 2(%rdi)
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k1, %k5, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k5
; KNL-NEXT: kxorw %k4, %k5, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $13, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $12, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $11, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $9, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $8, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $7, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $6, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $5, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $4, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $3, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $2, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: korw %k4, %k1, %k1
; KNL-NEXT: kmovw %k1, 6(%rdi)
; KNL-NEXT: kmovw %k3, 4(%rdi)
; KNL-NEXT: kmovw %k2, 2(%rdi)
; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: retq
;
@ -3196,229 +3175,114 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; AVX512DQ-LABEL: store_64i1:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edx, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQ-NEXT: kxorw %k0, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $2, %k0, %k1
; AVX512DQ-NEXT: kmovw %ecx, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $3, %k0, %k1
; AVX512DQ-NEXT: kmovw %r8d, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $4, %k0, %k1
; AVX512DQ-NEXT: kmovw %r9d, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $6, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $7, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $8, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $9, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $10, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $11, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: korw %k1, %k0, %k0
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k3
; AVX512DQ-NEXT: kxorw %k1, %k3, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %ecx, %k0
; AVX512DQ-NEXT: kmovw %esi, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1
; AVX512DQ-NEXT: kxorw %k1, %k2, %k1
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k1, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k3
; AVX512DQ-NEXT: kxorw %k0, %k3, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $3, %k0, %k2
; AVX512DQ-NEXT: kmovw %r8d, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $12, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $4, %k0, %k2
; AVX512DQ-NEXT: kmovw %r9d, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $11, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $10, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $6, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $7, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $8, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $8, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $7, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $9, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $6, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $10, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $5, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $11, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $4, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $12, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: korw %k2, %k1, %k1
; AVX512DQ-NEXT: korw %k2, %k0, %k0
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k4
; AVX512DQ-NEXT: kxorw %k1, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $2, %k3, %k4
; AVX512DQ-NEXT: kxorw %k2, %k4, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $14, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k3, %k2
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k3
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $13, %k3, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k3
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
@ -3513,18 +3377,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k5
; AVX512DQ-NEXT: kxorw %k1, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $2, %k4, %k5
; AVX512DQ-NEXT: kxorw %k3, %k5, %k3
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $14, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $13, %k3, %k3
; AVX512DQ-NEXT: kxorw %k3, %k4, %k3
; AVX512DQ-NEXT: kshiftrw $2, %k3, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $13, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $3, %k3, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
@ -3615,9 +3473,109 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: korw %k4, %k3, %k3
; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k1, %k5, %k1
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k5
; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $13, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $12, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $11, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $9, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $8, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $7, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $6, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $5, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $4, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $2, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k4
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: korw %k4, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, 6(%rdi)
; AVX512DQ-NEXT: kmovw %k3, 4(%rdi)
; AVX512DQ-NEXT: kmovw %k2, 2(%rdi)
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: retq
;

View File

@ -1730,24 +1730,20 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
;
; AVX512-LABEL: smulo_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
; AVX512-NEXT: vmovq %xmm1, %rdx
; AVX512-NEXT: vmovq %xmm0, %rsi
; AVX512-NEXT: vmovq %xmm1, %rax
; AVX512-NEXT: vmovq %xmm0, %rcx
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
; AVX512-NEXT: vpextrq $1, %xmm0, %rsi
; AVX512-NEXT: imulq %rdx, %rsi
; AVX512-NEXT: seto %dl
; AVX512-NEXT: vmovq %rsi, %xmm0
; AVX512-NEXT: imulq %rax, %rcx
; AVX512-NEXT: vmovq %rcx, %xmm0
; AVX512-NEXT: vmovq %rsi, %xmm1
; AVX512-NEXT: vmovq %rcx, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kmovd %edx, %k1
; AVX512-NEXT: kshiftrw $1, %k1, %k2
; AVX512-NEXT: kxorw %k0, %k2, %k0
; AVX512-NEXT: kshiftlw $15, %k0, %k0
; AVX512-NEXT: kshiftrw $14, %k0, %k0
; AVX512-NEXT: kxorw %k0, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k0, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
@ -2201,73 +2197,46 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
;
; AVX512-LABEL: smulo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: kshiftrw $3, %k0, %k1
; AVX512-NEXT: kmovd %k1, %r9d
; AVX512-NEXT: andb $1, %r9b
; AVX512-NEXT: negb %r9b
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
; AVX512-NEXT: kshiftrw $3, %k1, %k2
; AVX512-NEXT: kmovd %k2, %r10d
; AVX512-NEXT: andb $1, %r10b
; AVX512-NEXT: negb %r10b
; AVX512-NEXT: kshiftrw $2, %k1, %k2
; AVX512-NEXT: kmovd %k2, %r11d
; AVX512-NEXT: andb $1, %r11b
; AVX512-NEXT: negb %r11b
; AVX512-NEXT: kshiftrw $2, %k0, %k2
; AVX512-NEXT: kmovd %k2, %ebx
; AVX512-NEXT: andb $1, %bl
; AVX512-NEXT: negb %bl
; AVX512-NEXT: kshiftrw $1, %k0, %k2
; AVX512-NEXT: kmovd %k2, %esi
; AVX512-NEXT: kmovd %k1, %ecx
; AVX512-NEXT: andb $1, %cl
; AVX512-NEXT: negb %cl
; AVX512-NEXT: kshiftrw $2, %k0, %k1
; AVX512-NEXT: kmovd %k0, %esi
; AVX512-NEXT: andb $1, %sil
; AVX512-NEXT: negb %sil
; AVX512-NEXT: kshiftrw $1, %k1, %k2
; AVX512-NEXT: kmovd %k2, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: negb %dl
; AVX512-NEXT: kmovd %k1, %eax
; AVX512-NEXT: andb $1, %al
; AVX512-NEXT: negb %al
; AVX512-NEXT: kmovd %k0, %ecx
; AVX512-NEXT: andb $1, %cl
; AVX512-NEXT: negb %cl
; AVX512-NEXT: kmovd %k2, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: negb %dl
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: imulb %cl
; AVX512-NEXT: imulb %dl
; AVX512-NEXT: movl %eax, %r8d
; AVX512-NEXT: seto %al
; AVX512-NEXT: movl %r8d, %ecx
; AVX512-NEXT: andb $1, %cl
; AVX512-NEXT: negb %cl
; AVX512-NEXT: cmpb %r8b, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: movl %r8d, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: negb %dl
; AVX512-NEXT: cmpb %r8b, %dl
; AVX512-NEXT: setne %dl
; AVX512-NEXT: orb %al, %dl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kshiftrw $1, %k0, %k1
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: imulb %sil
; AVX512-NEXT: movl %eax, %edx
; AVX512-NEXT: seto %al
; AVX512-NEXT: movl %edx, %ecx
; AVX512-NEXT: andb $1, %cl
; AVX512-NEXT: negb %cl
; AVX512-NEXT: cmpb %dl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
; AVX512-NEXT: movl %r11d, %eax
; AVX512-NEXT: imulb %bl
; AVX512-NEXT: kmovd %eax, %k1
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: imulb %cl
; AVX512-NEXT: movl %eax, %esi
; AVX512-NEXT: seto %al
; AVX512-NEXT: movl %esi, %ecx
@ -2278,38 +2247,37 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k0, %k0
; AVX512-NEXT: kshiftrw $14, %k0, %k0
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftlw $13, %k0, %k0
; AVX512-NEXT: kshiftrw $13, %k0, %k0
; AVX512-NEXT: movl %r10d, %eax
; AVX512-NEXT: imulb %r9b
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: movl %r9d, %eax
; AVX512-NEXT: imulb %r10b
; AVX512-NEXT: # kill: def $al killed $al def $eax
; AVX512-NEXT: seto %cl
; AVX512-NEXT: movl %eax, %ebx
; AVX512-NEXT: andb $1, %bl
; AVX512-NEXT: negb %bl
; AVX512-NEXT: cmpb %al, %bl
; AVX512-NEXT: setne %bl
; AVX512-NEXT: orb %cl, %bl
; AVX512-NEXT: movl %eax, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: negb %dl
; AVX512-NEXT: cmpb %al, %dl
; AVX512-NEXT: setne %dl
; AVX512-NEXT: orb %cl, %dl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kmovd %ecx, %k1
; AVX512-NEXT: kshiftlw $3, %k1, %k1
; AVX512-NEXT: korw %k1, %k0, %k1
; AVX512-NEXT: kmovd %ecx, %k2
; AVX512-NEXT: kshiftlw $3, %k2, %k2
; AVX512-NEXT: korw %k2, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kmovd %r8d, %k0
; AVX512-NEXT: kshiftrw $1, %k0, %k1
; AVX512-NEXT: kmovd %edx, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
; AVX512-NEXT: kmovd %r8d, %k1
; AVX512-NEXT: kmovd %esi, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kxorw %k0, %k2, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k2
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
@ -2321,7 +2289,6 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: retq
%t = call {<4 x i1>, <4 x i1>} @llvm.smul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0

View File

@ -1532,26 +1532,21 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
;
; AVX512-LABEL: umulo_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
; AVX512-NEXT: vpextrq $1, %xmm1, %r8
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vmovq %xmm1, %rdx
; AVX512-NEXT: vmovq %xmm0, %rcx
; AVX512-NEXT: vmovq %xmm1, %rsi
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
; AVX512-NEXT: mulq %rdx
; AVX512-NEXT: movq %rax, %rsi
; AVX512-NEXT: seto %r9b
; AVX512-NEXT: movq %rcx, %rax
; AVX512-NEXT: mulq %r8
; AVX512-NEXT: vmovq %rax, %xmm0
; AVX512-NEXT: vmovq %rsi, %xmm1
; AVX512-NEXT: movq %rcx, %rax
; AVX512-NEXT: mulq %rsi
; AVX512-NEXT: vmovq %rax, %xmm1
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kmovd %r9d, %k1
; AVX512-NEXT: kshiftrw $1, %k1, %k2
; AVX512-NEXT: kxorw %k0, %k2, %k0
; AVX512-NEXT: kshiftlw $15, %k0, %k0
; AVX512-NEXT: kshiftrw $14, %k0, %k0
; AVX512-NEXT: kxorw %k0, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k0, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
@ -1950,7 +1945,6 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
;
; AVX512-LABEL: umulo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: kshiftrw $3, %k0, %k1
@ -1962,47 +1956,26 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: kmovd %k2, %r10d
; AVX512-NEXT: andb $1, %r10b
; AVX512-NEXT: kshiftrw $2, %k0, %k2
; AVX512-NEXT: kmovd %k2, %r11d
; AVX512-NEXT: andb $1, %r11b
; AVX512-NEXT: kshiftrw $2, %k1, %k2
; AVX512-NEXT: kmovd %k2, %ebx
; AVX512-NEXT: andb $1, %bl
; AVX512-NEXT: kshiftrw $1, %k0, %k2
; AVX512-NEXT: kmovd %k2, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: kshiftrw $1, %k1, %k2
; AVX512-NEXT: kmovd %k2, %esi
; AVX512-NEXT: kmovd %k0, %esi
; AVX512-NEXT: andb $1, %sil
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: andb $1, %al
; AVX512-NEXT: kshiftrw $2, %k1, %k0
; AVX512-NEXT: kmovd %k1, %ecx
; AVX512-NEXT: andb $1, %cl
; AVX512-NEXT: kmovd %k2, %eax
; AVX512-NEXT: andb $1, %al
; AVX512-NEXT: kmovd %k0, %edx
; AVX512-NEXT: andb $1, %dl
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: mulb %cl
; AVX512-NEXT: mulb %dl
; AVX512-NEXT: movl %eax, %r8d
; AVX512-NEXT: seto %al
; AVX512-NEXT: testb $-2, %r8b
; AVX512-NEXT: setne %cl
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: setne %dl
; AVX512-NEXT: orb %al, %dl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kshiftrw $1, %k0, %k1
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: mulb %sil
; AVX512-NEXT: movl %eax, %edx
; AVX512-NEXT: seto %al
; AVX512-NEXT: testb $-2, %dl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
; AVX512-NEXT: movl %r11d, %eax
; AVX512-NEXT: mulb %bl
; AVX512-NEXT: kmovd %eax, %k1
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: mulb %cl
; AVX512-NEXT: movl %eax, %esi
; AVX512-NEXT: seto %al
; AVX512-NEXT: testb $-2, %sil
@ -2010,35 +1983,34 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: orb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: kmovd %eax, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k0, %k0
; AVX512-NEXT: kshiftrw $14, %k0, %k0
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftlw $13, %k0, %k0
; AVX512-NEXT: kshiftrw $13, %k0, %k0
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: movl %r9d, %eax
; AVX512-NEXT: mulb %r10b
; AVX512-NEXT: # kill: def $al killed $al def $eax
; AVX512-NEXT: seto %cl
; AVX512-NEXT: testb $-2, %al
; AVX512-NEXT: setne %bl
; AVX512-NEXT: orb %cl, %bl
; AVX512-NEXT: setne %dl
; AVX512-NEXT: orb %cl, %dl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kmovd %ecx, %k1
; AVX512-NEXT: kshiftlw $3, %k1, %k1
; AVX512-NEXT: korw %k1, %k0, %k1
; AVX512-NEXT: kmovd %ecx, %k2
; AVX512-NEXT: kshiftlw $3, %k2, %k2
; AVX512-NEXT: korw %k2, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kmovd %r8d, %k0
; AVX512-NEXT: kshiftrw $1, %k0, %k1
; AVX512-NEXT: kmovd %edx, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $14, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k1
; AVX512-NEXT: kmovd %r8d, %k1
; AVX512-NEXT: kmovd %esi, %k2
; AVX512-NEXT: kxorw %k2, %k1, %k1
; AVX512-NEXT: kxorw %k0, %k2, %k0
; AVX512-NEXT: kshiftrw $2, %k0, %k2
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
@ -2050,7 +2022,6 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: retq
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0