forked from OSchip/llvm-project
[X86] Call SimplifyDemandedVectorElts on KSHIFTL/KSHIFTR nodes during DAG combine.
llvm-svn: 372154
This commit is contained in:
parent
b50894b9c3
commit
f1ba94ade0
|
@ -45107,6 +45107,20 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI) {
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
|
||||||
|
APInt KnownUndef, KnownZero;
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
|
||||||
|
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
|
||||||
|
KnownZero, DCI))
|
||||||
|
return SDValue(N, 0);
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
DAGCombinerInfo &DCI) const {
|
DAGCombinerInfo &DCI) const {
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
|
@ -45247,6 +45261,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
|
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
|
||||||
case X86ISD::PMULDQ:
|
case X86ISD::PMULDQ:
|
||||||
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
|
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
|
||||||
|
case X86ISD::KSHIFTL:
|
||||||
|
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
|
@ -1886,332 +1886,311 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
|
||||||
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; KNL-LABEL: test21:
|
; KNL-LABEL: test21:
|
||||||
; KNL: # %bb.0:
|
; KNL: # %bb.0:
|
||||||
; KNL-NEXT: kmovw %esi, %k0
|
; KNL-NEXT: kmovw %edx, %k1
|
||||||
; KNL-NEXT: kmovw %edi, %k1
|
; KNL-NEXT: kmovw %edi, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k2
|
|
||||||
; KNL-NEXT: kxorw %k0, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
||||||
; KNL-NEXT: kxorw %k0, %k1, %k0
|
; KNL-NEXT: kxorw %k0, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
; KNL-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; KNL-NEXT: kmovw %edx, %k2
|
; KNL-NEXT: kxorw %k1, %k3, %k1
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $13, %k1, %k1
|
; KNL-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
; KNL-NEXT: kxorw %k1, %k2, %k1
|
||||||
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
; KNL-NEXT: kshiftrw $3, %k1, %k2
|
||||||
; KNL-NEXT: kmovw %ecx, %k2
|
; KNL-NEXT: kmovw %ecx, %k3
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $12, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $4, %k0, %k1
|
|
||||||
; KNL-NEXT: kmovw %r8d, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $11, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $5, %k0, %k1
|
|
||||||
; KNL-NEXT: kmovw %r9d, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $10, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $6, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $7, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $8, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $8, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $7, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $9, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $6, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $10, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $5, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $11, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $4, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $12, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $2, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: korw %k1, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k0
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k3
|
|
||||||
; KNL-NEXT: kxorw %k0, %k3, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; KNL-NEXT: kxorw %k0, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $2, %k0, %k2
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k2, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k0, %k2
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $12, %k2, %k2
|
; KNL-NEXT: kshiftrw $12, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $4, %k0, %k2
|
; KNL-NEXT: kshiftrw $4, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: kmovw %r8d, %k3
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $11, %k2, %k2
|
; KNL-NEXT: kshiftrw $11, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $5, %k0, %k2
|
; KNL-NEXT: kshiftrw $5, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: kmovw %r9d, %k3
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $10, %k2, %k2
|
; KNL-NEXT: kshiftrw $10, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $6, %k0, %k2
|
; KNL-NEXT: kshiftrw $6, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $7, %k0, %k2
|
; KNL-NEXT: kshiftrw $7, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $8, %k2, %k2
|
; KNL-NEXT: kshiftrw $8, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $8, %k0, %k2
|
; KNL-NEXT: kshiftrw $8, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $7, %k2, %k2
|
; KNL-NEXT: kshiftrw $7, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $9, %k0, %k2
|
; KNL-NEXT: kshiftrw $9, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $6, %k2, %k2
|
; KNL-NEXT: kshiftrw $6, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $10, %k0, %k2
|
; KNL-NEXT: kshiftrw $10, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $5, %k2, %k2
|
; KNL-NEXT: kshiftrw $5, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $11, %k0, %k2
|
; KNL-NEXT: kshiftrw $11, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $4, %k2, %k2
|
; KNL-NEXT: kshiftrw $4, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $12, %k0, %k2
|
; KNL-NEXT: kshiftrw $12, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $3, %k2, %k2
|
; KNL-NEXT: kshiftrw $3, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $13, %k0, %k2
|
; KNL-NEXT: kshiftrw $13, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k2, %k2
|
; KNL-NEXT: kshiftrw $2, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k2
|
; KNL-NEXT: kshiftrw $14, %k1, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
; KNL-NEXT: kmovw %eax, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: korw %k2, %k0, %k2
|
; KNL-NEXT: korw %k2, %k1, %k1
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k0
|
; KNL-NEXT: kmovw %eax, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k4
|
; KNL-NEXT: kxorw %k0, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k0, %k4, %k0
|
; KNL-NEXT: kshiftrw $2, %k3, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k4, %k2
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k0, %k3, %k0
|
; KNL-NEXT: kshiftrw $13, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k0, %k3
|
; KNL-NEXT: kxorw %k2, %k3, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: kshiftrw $3, %k2, %k3
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k3, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k0, %k3
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $12, %k3, %k3
|
; KNL-NEXT: kshiftrw $12, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $4, %k0, %k3
|
; KNL-NEXT: kshiftrw $4, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $11, %k3, %k3
|
; KNL-NEXT: kshiftrw $11, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $5, %k0, %k3
|
; KNL-NEXT: kshiftrw $5, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $10, %k3, %k3
|
; KNL-NEXT: kshiftrw $10, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $6, %k0, %k3
|
; KNL-NEXT: kshiftrw $6, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $9, %k3, %k3
|
; KNL-NEXT: kshiftrw $9, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $7, %k0, %k3
|
; KNL-NEXT: kshiftrw $7, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $8, %k3, %k3
|
; KNL-NEXT: kshiftrw $8, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $8, %k0, %k3
|
; KNL-NEXT: kshiftrw $8, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $7, %k3, %k3
|
; KNL-NEXT: kshiftrw $7, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $9, %k0, %k3
|
; KNL-NEXT: kshiftrw $9, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $6, %k3, %k3
|
; KNL-NEXT: kshiftrw $6, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $10, %k0, %k3
|
; KNL-NEXT: kshiftrw $10, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $5, %k3, %k3
|
; KNL-NEXT: kshiftrw $5, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $11, %k0, %k3
|
; KNL-NEXT: kshiftrw $11, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $4, %k3, %k3
|
; KNL-NEXT: kshiftrw $4, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $12, %k0, %k3
|
; KNL-NEXT: kshiftrw $12, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $3, %k3, %k3
|
; KNL-NEXT: kshiftrw $3, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $13, %k0, %k3
|
; KNL-NEXT: kshiftrw $13, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $2, %k3, %k3
|
; KNL-NEXT: kshiftrw $2, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k3
|
; KNL-NEXT: kshiftrw $14, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: korw %k3, %k0, %k3
|
; KNL-NEXT: korw %k3, %k2, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k0
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k5
|
; KNL-NEXT: kxorw %k0, %k4, %k4
|
||||||
; KNL-NEXT: kxorw %k0, %k5, %k0
|
; KNL-NEXT: kshiftrw $2, %k4, %k5
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
; KNL-NEXT: kxorw %k3, %k5, %k3
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k0, %k4, %k0
|
; KNL-NEXT: kshiftrw $13, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $2, %k0, %k4
|
; KNL-NEXT: kxorw %k3, %k4, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $3, %k3, %k4
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $12, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $4, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $11, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $5, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $10, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $6, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $9, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $7, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $8, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $8, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $7, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $9, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $6, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $10, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $5, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $11, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $4, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $12, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $3, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $13, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $14, %k3, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: korw %k4, %k3, %k3
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k0, %k5, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k0, %k5
|
||||||
|
; KNL-NEXT: kxorw %k4, %k5, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
; KNL-NEXT: kshiftrw $13, %k4, %k4
|
; KNL-NEXT: kshiftrw $13, %k4, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k0, %k0
|
; KNL-NEXT: kxorw %k4, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $3, %k0, %k4
|
; KNL-NEXT: kshiftrw $3, %k0, %k4
|
||||||
|
@ -2329,228 +2308,113 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
;
|
;
|
||||||
; AVX512DQNOBW-LABEL: test21:
|
; AVX512DQNOBW-LABEL: test21:
|
||||||
; AVX512DQNOBW: # %bb.0:
|
; AVX512DQNOBW: # %bb.0:
|
||||||
; AVX512DQNOBW-NEXT: kmovw %esi, %k0
|
; AVX512DQNOBW-NEXT: kmovw %edx, %k0
|
||||||
; AVX512DQNOBW-NEXT: kmovw %edi, %k1
|
; AVX512DQNOBW-NEXT: kmovw %edi, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k0, %k2, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k0, %k1, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %edx, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %ecx, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %r8d, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %r9d, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k3, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
|
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k1, %k2, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k1, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: kxorw %k0, %k3, %k0
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k0, %k2, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kmovw %ecx, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $12, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $4, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: kmovw %r8d, %k3
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $11, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $5, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: kmovw %r9d, %k3
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $10, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $6, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $9, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $7, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $8, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $8, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $7, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $9, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $6, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $10, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $5, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $11, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $4, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $12, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
|
; AVX512DQNOBW-NEXT: korw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k4
|
; AVX512DQNOBW-NEXT: kxorw %k1, %k3, %k3
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k4
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k4, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k4, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k3, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k3, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k2, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k3, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k3
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k2, %k3
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
|
@ -2645,18 +2509,12 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k5
|
; AVX512DQNOBW-NEXT: kxorw %k1, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k4, %k5
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k5, %k3
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k5, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $14, %k3, %k3
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k4, %k3
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k4, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $13, %k4, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $3, %k3, %k4
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k3, %k4
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
@ -2747,13 +2605,113 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
|
; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: vpmovm2d %k3, %zmm4
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k1, %k5, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k5, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $12, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $11, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $10, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $9, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $8, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $7, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $6, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $4, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $2, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQNOBW-NEXT: korw %k4, %k1, %k1
|
||||||
|
; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4
|
||||||
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
||||||
; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1
|
; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1
|
||||||
; AVX512DQNOBW-NEXT: vpmovm2d %k2, %zmm4
|
; AVX512DQNOBW-NEXT: vpmovm2d %k3, %zmm4
|
||||||
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
||||||
; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2
|
; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2
|
||||||
; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm4
|
; AVX512DQNOBW-NEXT: vpmovm2d %k2, %zmm4
|
||||||
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm4
|
||||||
; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3
|
; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||||
; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4
|
; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4
|
||||||
|
|
|
@ -2753,229 +2753,114 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
;
|
;
|
||||||
; KNL-LABEL: store_64i1:
|
; KNL-LABEL: store_64i1:
|
||||||
; KNL: ## %bb.0:
|
; KNL: ## %bb.0:
|
||||||
; KNL-NEXT: kmovw %edx, %k0
|
; KNL-NEXT: kmovw %ecx, %k0
|
||||||
; KNL-NEXT: kmovw %esi, %k1
|
; KNL-NEXT: kmovw %esi, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k2
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; KNL-NEXT: kxorw %k0, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; KNL-NEXT: kxorw %k0, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; KNL-NEXT: kmovw %ecx, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
|
||||||
; KNL-NEXT: kmovw %r8d, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $12, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $4, %k0, %k1
|
|
||||||
; KNL-NEXT: kmovw %r9d, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $11, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $5, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $10, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $6, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $7, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $8, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $8, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $7, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $9, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $6, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $10, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $5, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $11, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $4, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $12, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $2, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $14, %k0, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: korw %k1, %k0, %k0
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k1
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k3
|
|
||||||
; KNL-NEXT: kxorw %k1, %k3, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $14, %k1, %k1
|
; KNL-NEXT: kshiftrw $14, %k1, %k1
|
||||||
; KNL-NEXT: kxorw %k1, %k2, %k1
|
; KNL-NEXT: kxorw %k1, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k1, %k2
|
; KNL-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: kxorw %k0, %k3, %k0
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kshiftrw $13, %k0, %k0
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kxorw %k0, %k2, %k0
|
||||||
; KNL-NEXT: kshiftrw $13, %k2, %k2
|
; KNL-NEXT: kshiftrw $3, %k0, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kmovw %r8d, %k3
|
||||||
; KNL-NEXT: kshiftrw $3, %k1, %k2
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $12, %k2, %k2
|
; KNL-NEXT: kshiftrw $12, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $4, %k1, %k2
|
; KNL-NEXT: kshiftrw $4, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: kmovw %r9d, %k3
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $11, %k2, %k2
|
; KNL-NEXT: kshiftrw $11, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $5, %k1, %k2
|
; KNL-NEXT: kshiftrw $5, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $10, %k2, %k2
|
; KNL-NEXT: kshiftrw $10, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $6, %k1, %k2
|
; KNL-NEXT: kshiftrw $6, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $7, %k1, %k2
|
; KNL-NEXT: kshiftrw $7, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $8, %k2, %k2
|
; KNL-NEXT: kshiftrw $8, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $8, %k1, %k2
|
; KNL-NEXT: kshiftrw $8, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $7, %k2, %k2
|
; KNL-NEXT: kshiftrw $7, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $9, %k1, %k2
|
; KNL-NEXT: kshiftrw $9, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $6, %k2, %k2
|
; KNL-NEXT: kshiftrw $6, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $10, %k1, %k2
|
; KNL-NEXT: kshiftrw $10, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $5, %k2, %k2
|
; KNL-NEXT: kshiftrw $5, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $11, %k1, %k2
|
; KNL-NEXT: kshiftrw $11, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $4, %k2, %k2
|
; KNL-NEXT: kshiftrw $4, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $12, %k1, %k2
|
; KNL-NEXT: kshiftrw $12, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $3, %k2, %k2
|
; KNL-NEXT: kshiftrw $3, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $13, %k1, %k2
|
; KNL-NEXT: kshiftrw $13, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k2, %k2
|
; KNL-NEXT: kshiftrw $2, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $14, %k1, %k2
|
; KNL-NEXT: kshiftrw $14, %k0, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
; KNL-NEXT: kmovw %eax, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: korw %k2, %k1, %k1
|
; KNL-NEXT: korw %k2, %k0, %k0
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k2
|
; KNL-NEXT: kmovw %eax, %k2
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k4
|
; KNL-NEXT: kxorw %k1, %k3, %k3
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k3, %k4
|
||||||
; KNL-NEXT: kxorw %k2, %k4, %k2
|
; KNL-NEXT: kxorw %k2, %k4, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $14, %k2, %k2
|
; KNL-NEXT: kshiftrw $13, %k2, %k2
|
||||||
; KNL-NEXT: kxorw %k2, %k3, %k2
|
; KNL-NEXT: kxorw %k2, %k3, %k2
|
||||||
; KNL-NEXT: kshiftrw $2, %k2, %k3
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k3, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k2, %k3
|
; KNL-NEXT: kshiftrw $3, %k2, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
|
@ -3070,18 +2955,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k5
|
; KNL-NEXT: kxorw %k1, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k4, %k5
|
||||||
; KNL-NEXT: kxorw %k3, %k5, %k3
|
; KNL-NEXT: kxorw %k3, %k5, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $14, %k3, %k3
|
; KNL-NEXT: kshiftrw $13, %k3, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k4, %k3
|
; KNL-NEXT: kxorw %k3, %k4, %k3
|
||||||
; KNL-NEXT: kshiftrw $2, %k3, %k4
|
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
|
||||||
; KNL-NEXT: kshiftrw $13, %k4, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; KNL-NEXT: kshiftrw $3, %k3, %k4
|
; KNL-NEXT: kshiftrw $3, %k3, %k4
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
@ -3172,9 +3051,109 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
; KNL-NEXT: korw %k4, %k3, %k3
|
; KNL-NEXT: korw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kmovw %k3, 6(%rdi)
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %k2, 4(%rdi)
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kmovw %k1, 2(%rdi)
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k1, %k5, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k1, %k5
|
||||||
|
; KNL-NEXT: kxorw %k4, %k5, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $13, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $3, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $12, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $4, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $11, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $5, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $10, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $6, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $9, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $7, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $8, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $8, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $7, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $9, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $6, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $10, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $5, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $11, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $4, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $12, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $3, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $13, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $2, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $14, %k1, %k4
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
||||||
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; KNL-NEXT: korw %k4, %k1, %k1
|
||||||
|
; KNL-NEXT: kmovw %k1, 6(%rdi)
|
||||||
|
; KNL-NEXT: kmovw %k3, 4(%rdi)
|
||||||
|
; KNL-NEXT: kmovw %k2, 2(%rdi)
|
||||||
; KNL-NEXT: kmovw %k0, (%rdi)
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -3196,229 +3175,114 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: store_64i1:
|
; AVX512DQ-LABEL: store_64i1:
|
||||||
; AVX512DQ: ## %bb.0:
|
; AVX512DQ: ## %bb.0:
|
||||||
; AVX512DQ-NEXT: kmovw %edx, %k0
|
; AVX512DQ-NEXT: kmovw %ecx, %k0
|
||||||
; AVX512DQ-NEXT: kmovw %esi, %k1
|
; AVX512DQ-NEXT: kmovw %esi, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kxorw %k0, %k1, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: kmovw %ecx, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: kmovw %r8d, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $4, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: kmovw %r9d, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $6, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $7, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $8, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $9, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $10, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $11, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: korw %k1, %k0, %k0
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k1
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k3
|
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k3, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1
|
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k1
|
||||||
; AVX512DQ-NEXT: kxorw %k1, %k2, %k1
|
; AVX512DQ-NEXT: kxorw %k1, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: kxorw %k0, %k3, %k0
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k0, %k2, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $3, %k0, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kmovw %r8d, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k2
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $12, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $12, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $4, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: kmovw %r9d, %k3
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $11, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $11, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $10, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $10, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $6, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $7, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $8, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $8, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $8, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $7, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $7, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $9, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $6, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $6, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $10, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $5, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $5, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $11, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $4, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $4, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $12, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $13, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k2
|
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
|
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
; AVX512DQ-NEXT: kmovw %eax, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: korw %k2, %k1, %k1
|
; AVX512DQ-NEXT: korw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k2
|
; AVX512DQ-NEXT: kmovw %eax, %k2
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k4
|
; AVX512DQ-NEXT: kxorw %k1, %k3, %k3
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $2, %k3, %k4
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k4, %k2
|
; AVX512DQ-NEXT: kxorw %k2, %k4, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $13, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k3, %k2
|
; AVX512DQ-NEXT: kxorw %k2, %k3, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k2, %k3
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k4
|
|
||||||
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k3, %k3
|
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k3
|
; AVX512DQ-NEXT: kshiftrw $3, %k2, %k3
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k4
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
|
@ -3513,18 +3377,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k4
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k5
|
; AVX512DQ-NEXT: kxorw %k1, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $2, %k4, %k5
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k5, %k3
|
; AVX512DQ-NEXT: kxorw %k3, %k5, %k3
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
|
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $14, %k3, %k3
|
; AVX512DQ-NEXT: kshiftrw $13, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k4, %k3
|
; AVX512DQ-NEXT: kxorw %k3, %k4, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $2, %k3, %k4
|
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k5
|
|
||||||
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $13, %k4, %k4
|
|
||||||
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
|
||||||
; AVX512DQ-NEXT: kshiftrw $3, %k3, %k4
|
; AVX512DQ-NEXT: kshiftrw $3, %k3, %k4
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k5
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
@ -3615,9 +3473,109 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k4
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
; AVX512DQ-NEXT: korw %k4, %k3, %k3
|
; AVX512DQ-NEXT: korw %k4, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k1, %k5, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $13, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $12, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $11, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $9, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $8, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $7, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $6, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $11, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $4, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $12, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $13, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $2, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $14, %k1, %k4
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
||||||
|
; AVX512DQ-NEXT: korw %k4, %k1, %k1
|
||||||
|
; AVX512DQ-NEXT: kmovw %k1, 6(%rdi)
|
||||||
|
; AVX512DQ-NEXT: kmovw %k3, 4(%rdi)
|
||||||
|
; AVX512DQ-NEXT: kmovw %k2, 2(%rdi)
|
||||||
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
||||||
; AVX512DQ-NEXT: retq
|
; AVX512DQ-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -1730,24 +1730,20 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: smulo_v2i64:
|
; AVX512-LABEL: smulo_v2i64:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
|
; AVX512-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
|
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||||
; AVX512-NEXT: vmovq %xmm1, %rdx
|
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
|
||||||
; AVX512-NEXT: vmovq %xmm0, %rsi
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rsi
|
||||||
; AVX512-NEXT: imulq %rdx, %rsi
|
; AVX512-NEXT: imulq %rdx, %rsi
|
||||||
; AVX512-NEXT: seto %dl
|
; AVX512-NEXT: vmovq %rsi, %xmm0
|
||||||
; AVX512-NEXT: imulq %rax, %rcx
|
; AVX512-NEXT: imulq %rax, %rcx
|
||||||
; AVX512-NEXT: vmovq %rcx, %xmm0
|
; AVX512-NEXT: vmovq %rcx, %xmm1
|
||||||
; AVX512-NEXT: vmovq %rsi, %xmm1
|
|
||||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k0
|
; AVX512-NEXT: kmovd %eax, %k0
|
||||||
; AVX512-NEXT: kmovd %edx, %k1
|
; AVX512-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; AVX512-NEXT: kxorw %k0, %k1, %k1
|
|
||||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||||
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
||||||
|
@ -2201,73 +2197,46 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: smulo_v4i1:
|
; AVX512-LABEL: smulo_v4i1:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: pushq %rbx
|
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
|
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||||
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||||
; AVX512-NEXT: kmovd %k1, %r9d
|
; AVX512-NEXT: kmovd %k1, %r9d
|
||||||
; AVX512-NEXT: andb $1, %r9b
|
; AVX512-NEXT: andb $1, %r9b
|
||||||
; AVX512-NEXT: negb %r9b
|
; AVX512-NEXT: negb %r9b
|
||||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
|
||||||
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
|
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||||
; AVX512-NEXT: kshiftrw $3, %k1, %k2
|
; AVX512-NEXT: kshiftrw $3, %k1, %k2
|
||||||
; AVX512-NEXT: kmovd %k2, %r10d
|
; AVX512-NEXT: kmovd %k2, %r10d
|
||||||
; AVX512-NEXT: andb $1, %r10b
|
; AVX512-NEXT: andb $1, %r10b
|
||||||
; AVX512-NEXT: negb %r10b
|
; AVX512-NEXT: negb %r10b
|
||||||
; AVX512-NEXT: kshiftrw $2, %k1, %k2
|
; AVX512-NEXT: kshiftrw $2, %k1, %k2
|
||||||
; AVX512-NEXT: kmovd %k2, %r11d
|
; AVX512-NEXT: kmovd %k1, %ecx
|
||||||
; AVX512-NEXT: andb $1, %r11b
|
; AVX512-NEXT: andb $1, %cl
|
||||||
; AVX512-NEXT: negb %r11b
|
; AVX512-NEXT: negb %cl
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
||||||
; AVX512-NEXT: kmovd %k2, %ebx
|
; AVX512-NEXT: kmovd %k0, %esi
|
||||||
; AVX512-NEXT: andb $1, %bl
|
|
||||||
; AVX512-NEXT: negb %bl
|
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k2
|
|
||||||
; AVX512-NEXT: kmovd %k2, %esi
|
|
||||||
; AVX512-NEXT: andb $1, %sil
|
; AVX512-NEXT: andb $1, %sil
|
||||||
; AVX512-NEXT: negb %sil
|
; AVX512-NEXT: negb %sil
|
||||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
|
||||||
; AVX512-NEXT: kmovd %k2, %edx
|
|
||||||
; AVX512-NEXT: andb $1, %dl
|
|
||||||
; AVX512-NEXT: negb %dl
|
|
||||||
; AVX512-NEXT: kmovd %k1, %eax
|
; AVX512-NEXT: kmovd %k1, %eax
|
||||||
; AVX512-NEXT: andb $1, %al
|
; AVX512-NEXT: andb $1, %al
|
||||||
; AVX512-NEXT: negb %al
|
; AVX512-NEXT: negb %al
|
||||||
; AVX512-NEXT: kmovd %k0, %ecx
|
; AVX512-NEXT: kmovd %k2, %edx
|
||||||
; AVX512-NEXT: andb $1, %cl
|
; AVX512-NEXT: andb $1, %dl
|
||||||
; AVX512-NEXT: negb %cl
|
; AVX512-NEXT: negb %dl
|
||||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512-NEXT: imulb %cl
|
; AVX512-NEXT: imulb %dl
|
||||||
; AVX512-NEXT: movl %eax, %r8d
|
; AVX512-NEXT: movl %eax, %r8d
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: movl %r8d, %ecx
|
; AVX512-NEXT: movl %r8d, %edx
|
||||||
; AVX512-NEXT: andb $1, %cl
|
; AVX512-NEXT: andb $1, %dl
|
||||||
; AVX512-NEXT: negb %cl
|
; AVX512-NEXT: negb %dl
|
||||||
; AVX512-NEXT: cmpb %r8b, %cl
|
; AVX512-NEXT: cmpb %r8b, %dl
|
||||||
; AVX512-NEXT: setne %cl
|
; AVX512-NEXT: setne %dl
|
||||||
; AVX512-NEXT: orb %al, %cl
|
; AVX512-NEXT: orb %al, %dl
|
||||||
; AVX512-NEXT: setne %al
|
; AVX512-NEXT: setne %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k0
|
; AVX512-NEXT: kmovd %eax, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k1
|
; AVX512-NEXT: movl %esi, %eax
|
||||||
; AVX512-NEXT: movl %edx, %eax
|
; AVX512-NEXT: imulb %cl
|
||||||
; AVX512-NEXT: imulb %sil
|
|
||||||
; AVX512-NEXT: movl %eax, %edx
|
|
||||||
; AVX512-NEXT: seto %al
|
|
||||||
; AVX512-NEXT: movl %edx, %ecx
|
|
||||||
; AVX512-NEXT: andb $1, %cl
|
|
||||||
; AVX512-NEXT: negb %cl
|
|
||||||
; AVX512-NEXT: cmpb %dl, %cl
|
|
||||||
; AVX512-NEXT: setne %cl
|
|
||||||
; AVX512-NEXT: orb %al, %cl
|
|
||||||
; AVX512-NEXT: setne %al
|
|
||||||
; AVX512-NEXT: kmovd %eax, %k2
|
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512-NEXT: movl %r11d, %eax
|
|
||||||
; AVX512-NEXT: imulb %bl
|
|
||||||
; AVX512-NEXT: movl %eax, %esi
|
; AVX512-NEXT: movl %eax, %esi
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: movl %esi, %ecx
|
; AVX512-NEXT: movl %esi, %ecx
|
||||||
|
@ -2278,38 +2247,37 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: orb %al, %cl
|
; AVX512-NEXT: orb %al, %cl
|
||||||
; AVX512-NEXT: setne %al
|
; AVX512-NEXT: setne %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k2
|
; AVX512-NEXT: kmovd %eax, %k2
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||||
|
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||||
|
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||||
|
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||||
|
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $13, %k0, %k0
|
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k0, %k0
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: movl %r10d, %eax
|
; AVX512-NEXT: movl %r9d, %eax
|
||||||
; AVX512-NEXT: imulb %r9b
|
; AVX512-NEXT: imulb %r10b
|
||||||
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
||||||
; AVX512-NEXT: seto %cl
|
; AVX512-NEXT: seto %cl
|
||||||
; AVX512-NEXT: movl %eax, %ebx
|
; AVX512-NEXT: movl %eax, %edx
|
||||||
; AVX512-NEXT: andb $1, %bl
|
; AVX512-NEXT: andb $1, %dl
|
||||||
; AVX512-NEXT: negb %bl
|
; AVX512-NEXT: negb %dl
|
||||||
; AVX512-NEXT: cmpb %al, %bl
|
; AVX512-NEXT: cmpb %al, %dl
|
||||||
; AVX512-NEXT: setne %bl
|
; AVX512-NEXT: setne %dl
|
||||||
; AVX512-NEXT: orb %cl, %bl
|
; AVX512-NEXT: orb %cl, %dl
|
||||||
; AVX512-NEXT: setne %cl
|
; AVX512-NEXT: setne %cl
|
||||||
; AVX512-NEXT: kmovd %ecx, %k1
|
; AVX512-NEXT: kmovd %ecx, %k2
|
||||||
; AVX512-NEXT: kshiftlw $3, %k1, %k1
|
; AVX512-NEXT: kshiftlw $3, %k2, %k2
|
||||||
; AVX512-NEXT: korw %k1, %k0, %k1
|
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||||
; AVX512-NEXT: kmovd %r8d, %k0
|
; AVX512-NEXT: kmovd %r8d, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k1
|
|
||||||
; AVX512-NEXT: kmovd %edx, %k2
|
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512-NEXT: kmovd %esi, %k2
|
; AVX512-NEXT: kmovd %esi, %k2
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
||||||
|
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||||
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||||
|
@ -2321,7 +2289,6 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||||
; AVX512-NEXT: kmovd %k0, %eax
|
; AVX512-NEXT: kmovd %k0, %eax
|
||||||
; AVX512-NEXT: movb %al, (%rdi)
|
; AVX512-NEXT: movb %al, (%rdi)
|
||||||
; AVX512-NEXT: popq %rbx
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call {<4 x i1>, <4 x i1>} @llvm.smul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
%t = call {<4 x i1>, <4 x i1>} @llvm.smul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
||||||
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
||||||
|
|
|
@ -1532,26 +1532,21 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: umulo_v2i64:
|
; AVX512-LABEL: umulo_v2i64:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
|
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||||
; AVX512-NEXT: vpextrq $1, %xmm1, %r8
|
; AVX512-NEXT: vmovq %xmm1, %rsi
|
||||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; AVX512-NEXT: vmovq %xmm1, %rdx
|
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
|
||||||
; AVX512-NEXT: mulq %rdx
|
; AVX512-NEXT: mulq %rdx
|
||||||
; AVX512-NEXT: movq %rax, %rsi
|
|
||||||
; AVX512-NEXT: seto %r9b
|
|
||||||
; AVX512-NEXT: movq %rcx, %rax
|
|
||||||
; AVX512-NEXT: mulq %r8
|
|
||||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||||
; AVX512-NEXT: vmovq %rsi, %xmm1
|
; AVX512-NEXT: movq %rcx, %rax
|
||||||
|
; AVX512-NEXT: mulq %rsi
|
||||||
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k0
|
; AVX512-NEXT: kmovd %eax, %k0
|
||||||
; AVX512-NEXT: kmovd %r9d, %k1
|
; AVX512-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
|
||||||
; AVX512-NEXT: kxorw %k0, %k1, %k1
|
|
||||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||||
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
||||||
|
@ -1950,7 +1945,6 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: umulo_v4i1:
|
; AVX512-LABEL: umulo_v4i1:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: pushq %rbx
|
|
||||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
|
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||||
|
@ -1962,47 +1956,26 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: kmovd %k2, %r10d
|
; AVX512-NEXT: kmovd %k2, %r10d
|
||||||
; AVX512-NEXT: andb $1, %r10b
|
; AVX512-NEXT: andb $1, %r10b
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||||
; AVX512-NEXT: kmovd %k2, %r11d
|
; AVX512-NEXT: kmovd %k0, %esi
|
||||||
; AVX512-NEXT: andb $1, %r11b
|
|
||||||
; AVX512-NEXT: kshiftrw $2, %k1, %k2
|
|
||||||
; AVX512-NEXT: kmovd %k2, %ebx
|
|
||||||
; AVX512-NEXT: andb $1, %bl
|
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k2
|
|
||||||
; AVX512-NEXT: kmovd %k2, %edx
|
|
||||||
; AVX512-NEXT: andb $1, %dl
|
|
||||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
|
||||||
; AVX512-NEXT: kmovd %k2, %esi
|
|
||||||
; AVX512-NEXT: andb $1, %sil
|
; AVX512-NEXT: andb $1, %sil
|
||||||
; AVX512-NEXT: kmovd %k0, %eax
|
; AVX512-NEXT: kshiftrw $2, %k1, %k0
|
||||||
; AVX512-NEXT: andb $1, %al
|
|
||||||
; AVX512-NEXT: kmovd %k1, %ecx
|
; AVX512-NEXT: kmovd %k1, %ecx
|
||||||
; AVX512-NEXT: andb $1, %cl
|
; AVX512-NEXT: andb $1, %cl
|
||||||
|
; AVX512-NEXT: kmovd %k2, %eax
|
||||||
|
; AVX512-NEXT: andb $1, %al
|
||||||
|
; AVX512-NEXT: kmovd %k0, %edx
|
||||||
|
; AVX512-NEXT: andb $1, %dl
|
||||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512-NEXT: mulb %cl
|
; AVX512-NEXT: mulb %dl
|
||||||
; AVX512-NEXT: movl %eax, %r8d
|
; AVX512-NEXT: movl %eax, %r8d
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: testb $-2, %r8b
|
; AVX512-NEXT: testb $-2, %r8b
|
||||||
; AVX512-NEXT: setne %cl
|
; AVX512-NEXT: setne %dl
|
||||||
; AVX512-NEXT: orb %al, %cl
|
; AVX512-NEXT: orb %al, %dl
|
||||||
; AVX512-NEXT: setne %al
|
; AVX512-NEXT: setne %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k0
|
; AVX512-NEXT: kmovd %eax, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k1
|
; AVX512-NEXT: movl %esi, %eax
|
||||||
; AVX512-NEXT: movl %edx, %eax
|
; AVX512-NEXT: mulb %cl
|
||||||
; AVX512-NEXT: mulb %sil
|
|
||||||
; AVX512-NEXT: movl %eax, %edx
|
|
||||||
; AVX512-NEXT: seto %al
|
|
||||||
; AVX512-NEXT: testb $-2, %dl
|
|
||||||
; AVX512-NEXT: setne %cl
|
|
||||||
; AVX512-NEXT: orb %al, %cl
|
|
||||||
; AVX512-NEXT: setne %al
|
|
||||||
; AVX512-NEXT: kmovd %eax, %k2
|
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512-NEXT: movl %r11d, %eax
|
|
||||||
; AVX512-NEXT: mulb %bl
|
|
||||||
; AVX512-NEXT: movl %eax, %esi
|
; AVX512-NEXT: movl %eax, %esi
|
||||||
; AVX512-NEXT: seto %al
|
; AVX512-NEXT: seto %al
|
||||||
; AVX512-NEXT: testb $-2, %sil
|
; AVX512-NEXT: testb $-2, %sil
|
||||||
|
@ -2010,35 +1983,34 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: orb %al, %cl
|
; AVX512-NEXT: orb %al, %cl
|
||||||
; AVX512-NEXT: setne %al
|
; AVX512-NEXT: setne %al
|
||||||
; AVX512-NEXT: kmovd %eax, %k2
|
; AVX512-NEXT: kmovd %eax, %k2
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||||
|
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||||
|
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||||
|
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||||
|
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $13, %k0, %k0
|
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k0, %k0
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: movl %r9d, %eax
|
; AVX512-NEXT: movl %r9d, %eax
|
||||||
; AVX512-NEXT: mulb %r10b
|
; AVX512-NEXT: mulb %r10b
|
||||||
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
||||||
; AVX512-NEXT: seto %cl
|
; AVX512-NEXT: seto %cl
|
||||||
; AVX512-NEXT: testb $-2, %al
|
; AVX512-NEXT: testb $-2, %al
|
||||||
; AVX512-NEXT: setne %bl
|
; AVX512-NEXT: setne %dl
|
||||||
; AVX512-NEXT: orb %cl, %bl
|
; AVX512-NEXT: orb %cl, %dl
|
||||||
; AVX512-NEXT: setne %cl
|
; AVX512-NEXT: setne %cl
|
||||||
; AVX512-NEXT: kmovd %ecx, %k1
|
; AVX512-NEXT: kmovd %ecx, %k2
|
||||||
; AVX512-NEXT: kshiftlw $3, %k1, %k1
|
; AVX512-NEXT: kshiftlw $3, %k2, %k2
|
||||||
; AVX512-NEXT: korw %k1, %k0, %k1
|
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||||
; AVX512-NEXT: kmovd %r8d, %k0
|
; AVX512-NEXT: kmovd %r8d, %k1
|
||||||
; AVX512-NEXT: kshiftrw $1, %k0, %k1
|
|
||||||
; AVX512-NEXT: kmovd %edx, %k2
|
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
|
||||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
|
||||||
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
|
||||||
; AVX512-NEXT: kmovd %esi, %k2
|
; AVX512-NEXT: kmovd %esi, %k2
|
||||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
||||||
|
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||||
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||||
|
@ -2050,7 +2022,6 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||||
; AVX512-NEXT: kmovd %k0, %eax
|
; AVX512-NEXT: kmovd %k0, %eax
|
||||||
; AVX512-NEXT: movb %al, (%rdi)
|
; AVX512-NEXT: movb %al, (%rdi)
|
||||||
; AVX512-NEXT: popq %rbx
|
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
||||||
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
||||||
|
|
Loading…
Reference in New Issue