forked from OSchip/llvm-project
[X86] Rewrite to the vXi1 subvector insertion code to not rely on the value of bits that might be undef
The previous code tried to do a trick where we would extract the subvector from the location we were inserting. Then xor that with the new value. Take the xored value and clear out the bits above the subvector size. Then shift that xored subvector to the insert location. And finally xor that with the original vector. Since the old subvector was used in both xors, this would leave just the new subvector at the inserted location. Since the surrounding bits had been zeroed no other bits of the original vector would be modified. Unfortunately, if the old subvector came from undef we might aggressively propagate the undef. Then we end up with the XORs not cancelling because they aren't using the same value for the two uses of the old subvector. @bkramer gave me a case that demonstrated this, but we haven't reduced it enough to make it easily readable to see what's happening. This patch uses a safer, but more costly approach. It isolate the bits above the insertion and bits below the insert point and ORs those together leaving 0 for the insertion location. Then widens the subvector with 0s in the upper bits, shifts it into position with 0s in the lower bits. Then we do another OR. Differential Revision: https://reviews.llvm.org/D68311 llvm-svn: 373495
This commit is contained in:
parent
0cacf136fc
commit
74c7d6be28
|
@ -5769,23 +5769,35 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// Widen the vector if needed.
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
|
||||
// Move the current value of the bit to be replace to the lsbs.
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
|
||||
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
|
||||
// Xor with the new bit.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
|
||||
// Shift to MSB, filling bottom bits with 0.
|
||||
|
||||
// Clear the upper bits of the subvector and move it to its insert position.
|
||||
unsigned ShiftLeft = NumElems - SubVecNumElems;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
|
||||
// Shift to the final position, filling upper bits with 0.
|
||||
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
|
||||
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
|
||||
// Xor with original vector leaving the new value.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
|
||||
|
||||
// Isolate the bits below the insertion point.
|
||||
unsigned LowShift = NumElems - IdxVal;
|
||||
SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec,
|
||||
DAG.getTargetConstant(LowShift, dl, MVT::i8));
|
||||
Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low,
|
||||
DAG.getTargetConstant(LowShift, dl, MVT::i8));
|
||||
|
||||
// Isolate the bits after the last inserted bit.
|
||||
unsigned HighShift = IdxVal + SubVecNumElems;
|
||||
SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
|
||||
DAG.getTargetConstant(HighShift, dl, MVT::i8));
|
||||
High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
|
||||
DAG.getTargetConstant(HighShift, dl, MVT::i8));
|
||||
|
||||
// Now OR all 3 pieces together.
|
||||
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
|
||||
SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec);
|
||||
|
||||
// Reduce to original width if needed.
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
|
||||
}
|
||||
|
||||
static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -302,12 +302,15 @@ define i16 @test16(i1 *%addr, i16 %a) {
|
|||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movb (%rdi), %al
|
||||
; KNL-NEXT: kmovw %esi, %k0
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: kshiftrw $10, %k0, %k2
|
||||
; KNL-NEXT: kxorw %k1, %k2, %k1
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $5, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $11, %k0, %k1
|
||||
; KNL-NEXT: kshiftlw $11, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $6, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $6, %k0, %k0
|
||||
; KNL-NEXT: kmovw %eax, %k2
|
||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||
; KNL-NEXT: kshiftrw $5, %k2, %k2
|
||||
; KNL-NEXT: korw %k2, %k1, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; KNL-NEXT: retq
|
||||
|
@ -316,11 +319,14 @@ define i16 @test16(i1 *%addr, i16 %a) {
|
|||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: kmovb (%rdi), %k0
|
||||
; SKX-NEXT: kmovd %esi, %k1
|
||||
; SKX-NEXT: kshiftrw $10, %k1, %k2
|
||||
; SKX-NEXT: kxorw %k0, %k2, %k0
|
||||
; SKX-NEXT: kshiftrw $11, %k1, %k2
|
||||
; SKX-NEXT: kshiftlw $11, %k2, %k2
|
||||
; SKX-NEXT: kshiftlw $6, %k1, %k1
|
||||
; SKX-NEXT: kshiftrw $6, %k1, %k1
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
||||
; SKX-NEXT: kxorw %k0, %k1, %k0
|
||||
; SKX-NEXT: korw %k0, %k2, %k0
|
||||
; SKX-NEXT: korw %k0, %k1, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; SKX-NEXT: retq
|
||||
|
@ -336,12 +342,15 @@ define i8 @test17(i1 *%addr, i8 %a) {
|
|||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movb (%rdi), %al
|
||||
; KNL-NEXT: kmovw %esi, %k0
|
||||
; KNL-NEXT: kshiftrw $5, %k0, %k1
|
||||
; KNL-NEXT: kshiftlw $5, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $12, %k0, %k0
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: kshiftrw $4, %k0, %k2
|
||||
; KNL-NEXT: kxorw %k1, %k2, %k1
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $11, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: retq
|
||||
|
@ -350,11 +359,14 @@ define i8 @test17(i1 *%addr, i8 %a) {
|
|||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: kmovb (%rdi), %k0
|
||||
; SKX-NEXT: kmovd %esi, %k1
|
||||
; SKX-NEXT: kshiftrb $4, %k1, %k2
|
||||
; SKX-NEXT: kxorb %k0, %k2, %k0
|
||||
; SKX-NEXT: kshiftrb $5, %k1, %k2
|
||||
; SKX-NEXT: kshiftlb $5, %k2, %k2
|
||||
; SKX-NEXT: kshiftlb $4, %k1, %k1
|
||||
; SKX-NEXT: kshiftrb $4, %k1, %k1
|
||||
; SKX-NEXT: kshiftlb $7, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $3, %k0, %k0
|
||||
; SKX-NEXT: kxorb %k0, %k1, %k0
|
||||
; SKX-NEXT: korb %k0, %k2, %k0
|
||||
; SKX-NEXT: korb %k0, %k1, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SKX-NEXT: retq
|
||||
|
@ -790,12 +802,15 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
|
|||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: shll $16, %ecx
|
||||
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftrw $4, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $5, %k0, %k1
|
||||
; KNL-NEXT: kshiftlw $5, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $12, %k0, %k0
|
||||
; KNL-NEXT: kmovw %eax, %k2
|
||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $11, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
||||
; KNL-NEXT: kshiftrw $11, %k2, %k2
|
||||
; KNL-NEXT: korw %k2, %k1, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: orl %ecx, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -808,12 +823,15 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
|
|||
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0
|
||||
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1
|
||||
; SKX-NEXT: kunpckwd %k0, %k1, %k0
|
||||
; SKX-NEXT: kshiftrd $4, %k0, %k1
|
||||
; SKX-NEXT: kshiftrd $5, %k0, %k1
|
||||
; SKX-NEXT: kshiftld $5, %k1, %k1
|
||||
; SKX-NEXT: kshiftld $28, %k0, %k0
|
||||
; SKX-NEXT: kshiftrd $28, %k0, %k0
|
||||
; SKX-NEXT: kmovd %eax, %k2
|
||||
; SKX-NEXT: kxord %k2, %k1, %k1
|
||||
; SKX-NEXT: kshiftld $31, %k1, %k1
|
||||
; SKX-NEXT: kshiftrd $27, %k1, %k1
|
||||
; SKX-NEXT: kxord %k1, %k0, %k0
|
||||
; SKX-NEXT: kshiftld $31, %k2, %k2
|
||||
; SKX-NEXT: kshiftrd $27, %k2, %k2
|
||||
; SKX-NEXT: kord %k2, %k1, %k1
|
||||
; SKX-NEXT: kord %k1, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -832,12 +850,15 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
|
|||
; KNL-NEXT: cmpl %esi, %edi
|
||||
; KNL-NEXT: setb %al
|
||||
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
||||
; KNL-NEXT: kmovw %eax, %k2
|
||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
||||
; KNL-NEXT: kshiftlw $3, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $14, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $13, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -848,12 +869,15 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
|
|||
; SKX-NEXT: cmpl %esi, %edi
|
||||
; SKX-NEXT: setb %al
|
||||
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftrb $2, %k0, %k1
|
||||
; SKX-NEXT: kmovd %eax, %k2
|
||||
; SKX-NEXT: kxorb %k2, %k1, %k1
|
||||
; SKX-NEXT: kshiftrb $3, %k0, %k1
|
||||
; SKX-NEXT: kshiftlb $3, %k1, %k1
|
||||
; SKX-NEXT: kshiftlb $6, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $6, %k0, %k0
|
||||
; SKX-NEXT: korw %k1, %k0, %k0
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: kshiftlb $7, %k1, %k1
|
||||
; SKX-NEXT: kshiftrb $5, %k1, %k1
|
||||
; SKX-NEXT: kxorw %k1, %k0, %k0
|
||||
; SKX-NEXT: korw %k0, %k1, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SKX-NEXT: retq
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4913,24 +4913,30 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
|
|||
; AVX512F-LABEL: widen_masked_store:
|
||||
; AVX512F: ## %bb.0:
|
||||
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: kmovw %edx, %k0
|
||||
; AVX512F-NEXT: andl $1, %esi
|
||||
; AVX512F-NEXT: kmovw %esi, %k1
|
||||
; AVX512F-NEXT: kxorw %k0, %k0, %k2
|
||||
; AVX512F-NEXT: kshiftrw $1, %k2, %k2
|
||||
; AVX512F-NEXT: kshiftlw $1, %k2, %k2
|
||||
; AVX512F-NEXT: korw %k1, %k2, %k1
|
||||
; AVX512F-NEXT: kshiftrw $1, %k1, %k2
|
||||
; AVX512F-NEXT: kxorw %k0, %k2, %k0
|
||||
; AVX512F-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512F-NEXT: kxorw %k0, %k1, %k0
|
||||
; AVX512F-NEXT: kmovw %esi, %k0
|
||||
; AVX512F-NEXT: kxorw %k0, %k0, %k1
|
||||
; AVX512F-NEXT: kshiftrw $1, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftlw $1, %k1, %k1
|
||||
; AVX512F-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512F-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512F-NEXT: kmovw %ecx, %k2
|
||||
; AVX512F-NEXT: kxorw %k2, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftlw $2, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512F-NEXT: kmovw %edx, %k2
|
||||
; AVX512F-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512F-NEXT: kshiftrw $14, %k2, %k2
|
||||
; AVX512F-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512F-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $3, %k0, %k1
|
||||
; AVX512F-NEXT: kshiftlw $3, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512F-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512F-NEXT: kmovw %ecx, %k1
|
||||
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512F-NEXT: kxorw %k1, %k0, %k0
|
||||
; AVX512F-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512F-NEXT: kshiftlw $12, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
|
||||
|
@ -4939,48 +4945,60 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
|
|||
;
|
||||
; AVX512VLDQ-LABEL: widen_masked_store:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: kmovw %edx, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %esi, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kxorw %k0, %k0, %k2
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k2, %k2
|
||||
; AVX512VLDQ-NEXT: kshiftlb $1, %k2, %k2
|
||||
; AVX512VLDQ-NEXT: korb %k1, %k2, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k2
|
||||
; AVX512VLDQ-NEXT: kxorb %k0, %k2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %esi, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kxorw %k0, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $1, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: korb %k0, %k1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %ecx, %k2
|
||||
; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $2, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %edx, %k2
|
||||
; AVX512VLDQ-NEXT: kshiftlb $7, %k2, %k2
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k2, %k2
|
||||
; AVX512VLDQ-NEXT: korb %k2, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: korb %k1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $3, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $6, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %ecx, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: widen_masked_store:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: kmovd %edx, %k0
|
||||
; AVX512VLBW-NEXT: andl $1, %esi
|
||||
; AVX512VLBW-NEXT: kmovw %esi, %k1
|
||||
; AVX512VLBW-NEXT: kxorw %k0, %k0, %k2
|
||||
; AVX512VLBW-NEXT: kshiftrw $1, %k2, %k2
|
||||
; AVX512VLBW-NEXT: kshiftlw $1, %k2, %k2
|
||||
; AVX512VLBW-NEXT: korw %k1, %k2, %k1
|
||||
; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k2
|
||||
; AVX512VLBW-NEXT: kxorw %k0, %k2, %k0
|
||||
; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0
|
||||
; AVX512VLBW-NEXT: kmovw %esi, %k0
|
||||
; AVX512VLBW-NEXT: kxorw %k0, %k0, %k1
|
||||
; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $1, %k1, %k1
|
||||
; AVX512VLBW-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLBW-NEXT: kmovd %ecx, %k2
|
||||
; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $2, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kmovd %edx, %k2
|
||||
; AVX512VLBW-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512VLBW-NEXT: kshiftrw $14, %k2, %k2
|
||||
; AVX512VLBW-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512VLBW-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $3, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512VLBW-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512VLBW-NEXT: kmovd %ecx, %k1
|
||||
; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512VLBW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512VLBW-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask)
|
||||
|
|
|
@ -1730,20 +1730,26 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
|||
;
|
||||
; AVX512-LABEL: smulo_v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rsi
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512-NEXT: vmovq %xmm0, %rsi
|
||||
; AVX512-NEXT: imulq %rdx, %rsi
|
||||
; AVX512-NEXT: vmovq %rsi, %xmm0
|
||||
; AVX512-NEXT: seto %dl
|
||||
; AVX512-NEXT: imulq %rax, %rcx
|
||||
; AVX512-NEXT: vmovq %rcx, %xmm1
|
||||
; AVX512-NEXT: vmovq %rcx, %xmm0
|
||||
; AVX512-NEXT: vmovq %rsi, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k1
|
||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %edx, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k0, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: korw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
|
@ -2197,46 +2203,76 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
;
|
||||
; AVX512-LABEL: smulo_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k0
|
||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||
; AVX512-NEXT: kmovd %k1, %r9d
|
||||
; AVX512-NEXT: andb $1, %r9b
|
||||
; AVX512-NEXT: negb %r9b
|
||||
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; AVX512-NEXT: kshiftrw $3, %k1, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %r10d
|
||||
; AVX512-NEXT: andb $1, %r10b
|
||||
; AVX512-NEXT: negb %r10b
|
||||
; AVX512-NEXT: kshiftrw $2, %k1, %k2
|
||||
; AVX512-NEXT: kmovd %k1, %ecx
|
||||
; AVX512-NEXT: andb $1, %cl
|
||||
; AVX512-NEXT: negb %cl
|
||||
; AVX512-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512-NEXT: kmovd %k0, %esi
|
||||
; AVX512-NEXT: kmovd %k2, %r11d
|
||||
; AVX512-NEXT: andb $1, %r11b
|
||||
; AVX512-NEXT: negb %r11b
|
||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %ebx
|
||||
; AVX512-NEXT: andb $1, %bl
|
||||
; AVX512-NEXT: negb %bl
|
||||
; AVX512-NEXT: kshiftrw $1, %k0, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %esi
|
||||
; AVX512-NEXT: andb $1, %sil
|
||||
; AVX512-NEXT: negb %sil
|
||||
; AVX512-NEXT: kmovd %k1, %eax
|
||||
; AVX512-NEXT: andb $1, %al
|
||||
; AVX512-NEXT: negb %al
|
||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %edx
|
||||
; AVX512-NEXT: andb $1, %dl
|
||||
; AVX512-NEXT: negb %dl
|
||||
; AVX512-NEXT: kmovd %k1, %eax
|
||||
; AVX512-NEXT: andb $1, %al
|
||||
; AVX512-NEXT: negb %al
|
||||
; AVX512-NEXT: kmovd %k0, %ecx
|
||||
; AVX512-NEXT: andb $1, %cl
|
||||
; AVX512-NEXT: negb %cl
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: imulb %dl
|
||||
; AVX512-NEXT: imulb %cl
|
||||
; AVX512-NEXT: movl %eax, %r8d
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: movl %r8d, %edx
|
||||
; AVX512-NEXT: andb $1, %dl
|
||||
; AVX512-NEXT: negb %dl
|
||||
; AVX512-NEXT: cmpb %r8b, %dl
|
||||
; AVX512-NEXT: setne %dl
|
||||
; AVX512-NEXT: orb %al, %dl
|
||||
; AVX512-NEXT: movl %r8d, %ecx
|
||||
; AVX512-NEXT: andb $1, %cl
|
||||
; AVX512-NEXT: negb %cl
|
||||
; AVX512-NEXT: cmpb %r8b, %cl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: movl %esi, %eax
|
||||
; AVX512-NEXT: imulb %cl
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $15, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k0, %k0
|
||||
; AVX512-NEXT: movl %edx, %eax
|
||||
; AVX512-NEXT: imulb %sil
|
||||
; AVX512-NEXT: movl %eax, %edx
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: movl %edx, %ecx
|
||||
; AVX512-NEXT: andb $1, %cl
|
||||
; AVX512-NEXT: negb %cl
|
||||
; AVX512-NEXT: cmpb %dl, %cl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kshiftlw $1, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k0, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $14, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $3, %k0, %k2
|
||||
; AVX512-NEXT: movl %r11d, %eax
|
||||
; AVX512-NEXT: imulb %bl
|
||||
; AVX512-NEXT: movl %eax, %esi
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: movl %esi, %ecx
|
||||
|
@ -2246,26 +2282,22 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||
; AVX512-NEXT: kmovd %eax, %k3
|
||||
; AVX512-NEXT: kshiftlw $2, %k3, %k3
|
||||
; AVX512-NEXT: korw %k3, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512-NEXT: movl %r9d, %eax
|
||||
; AVX512-NEXT: imulb %r10b
|
||||
; AVX512-NEXT: movl %r10d, %eax
|
||||
; AVX512-NEXT: imulb %r9b
|
||||
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
||||
; AVX512-NEXT: seto %cl
|
||||
; AVX512-NEXT: movl %eax, %edx
|
||||
; AVX512-NEXT: andb $1, %dl
|
||||
; AVX512-NEXT: negb %dl
|
||||
; AVX512-NEXT: cmpb %al, %dl
|
||||
; AVX512-NEXT: setne %dl
|
||||
; AVX512-NEXT: orb %cl, %dl
|
||||
; AVX512-NEXT: movl %eax, %ebx
|
||||
; AVX512-NEXT: andb $1, %bl
|
||||
; AVX512-NEXT: negb %bl
|
||||
; AVX512-NEXT: cmpb %al, %bl
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: orb %cl, %bl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: kmovd %ecx, %k2
|
||||
; AVX512-NEXT: kshiftlw $3, %k2, %k2
|
||||
|
@ -2273,21 +2305,34 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: kmovd %r8d, %k1
|
||||
; AVX512-NEXT: kmovd %esi, %k2
|
||||
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512-NEXT: kmovd %edx, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $14, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $3, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $14, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %esi, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $13, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $4, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $13, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $12, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: movb %al, (%rdi)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<4 x i1>, <4 x i1>} @llvm.smul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
||||
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
||||
|
|
|
@ -1532,21 +1532,28 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
|
|||
;
|
||||
; AVX512-LABEL: umulo_v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX512-NEXT: vmovq %xmm1, %rsi
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX512-NEXT: vpextrq $1, %xmm1, %r8
|
||||
; AVX512-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512-NEXT: mulq %rdx
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: movq %rax, %rsi
|
||||
; AVX512-NEXT: seto %r9b
|
||||
; AVX512-NEXT: movq %rcx, %rax
|
||||
; AVX512-NEXT: mulq %rsi
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: mulq %r8
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vmovq %rsi, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k1
|
||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %r9d, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k0, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: korw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
|
@ -1945,6 +1952,7 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
;
|
||||
; AVX512-LABEL: umulo_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||
|
@ -1956,40 +1964,60 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
; AVX512-NEXT: kmovd %k2, %r10d
|
||||
; AVX512-NEXT: andb $1, %r10b
|
||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||
; AVX512-NEXT: kmovd %k0, %esi
|
||||
; AVX512-NEXT: kmovd %k2, %r11d
|
||||
; AVX512-NEXT: andb $1, %r11b
|
||||
; AVX512-NEXT: kshiftrw $2, %k1, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %ebx
|
||||
; AVX512-NEXT: andb $1, %bl
|
||||
; AVX512-NEXT: kshiftrw $1, %k0, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %edx
|
||||
; AVX512-NEXT: andb $1, %dl
|
||||
; AVX512-NEXT: kshiftrw $1, %k1, %k2
|
||||
; AVX512-NEXT: kmovd %k2, %esi
|
||||
; AVX512-NEXT: andb $1, %sil
|
||||
; AVX512-NEXT: kshiftrw $2, %k1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $1, %al
|
||||
; AVX512-NEXT: kmovd %k1, %ecx
|
||||
; AVX512-NEXT: andb $1, %cl
|
||||
; AVX512-NEXT: kmovd %k2, %eax
|
||||
; AVX512-NEXT: andb $1, %al
|
||||
; AVX512-NEXT: kmovd %k0, %edx
|
||||
; AVX512-NEXT: andb $1, %dl
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: mulb %dl
|
||||
; AVX512-NEXT: mulb %cl
|
||||
; AVX512-NEXT: movl %eax, %r8d
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: testb $-2, %r8b
|
||||
; AVX512-NEXT: setne %dl
|
||||
; AVX512-NEXT: orb %al, %dl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: movl %esi, %eax
|
||||
; AVX512-NEXT: mulb %cl
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $15, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k0, %k0
|
||||
; AVX512-NEXT: movl %edx, %eax
|
||||
; AVX512-NEXT: mulb %sil
|
||||
; AVX512-NEXT: movl %eax, %edx
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: testb $-2, %dl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kshiftlw $1, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k0, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $14, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $14, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $3, %k0, %k2
|
||||
; AVX512-NEXT: movl %r11d, %eax
|
||||
; AVX512-NEXT: mulb %bl
|
||||
; AVX512-NEXT: movl %eax, %esi
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: testb $-2, %sil
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: orb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||
; AVX512-NEXT: kshiftlw $2, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||
; AVX512-NEXT: kmovd %eax, %k3
|
||||
; AVX512-NEXT: kshiftlw $2, %k3, %k3
|
||||
; AVX512-NEXT: korw %k3, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512-NEXT: movl %r9d, %eax
|
||||
|
@ -1997,8 +2025,8 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
; AVX512-NEXT: # kill: def $al killed $al def $eax
|
||||
; AVX512-NEXT: seto %cl
|
||||
; AVX512-NEXT: testb $-2, %al
|
||||
; AVX512-NEXT: setne %dl
|
||||
; AVX512-NEXT: orb %cl, %dl
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: orb %cl, %bl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: kmovd %ecx, %k2
|
||||
; AVX512-NEXT: kshiftlw $3, %k2, %k2
|
||||
|
@ -2006,21 +2034,34 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
|||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: kmovd %r8d, %k1
|
||||
; AVX512-NEXT: kmovd %esi, %k2
|
||||
; AVX512-NEXT: kxorw %k0, %k2, %k0
|
||||
; AVX512-NEXT: kshiftrw $2, %k0, %k2
|
||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512-NEXT: kmovd %edx, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $14, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: kshiftrw $3, %k0, %k1
|
||||
; AVX512-NEXT: kmovd %eax, %k2
|
||||
; AVX512-NEXT: kxorw %k2, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $3, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $14, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %esi, %k2
|
||||
; AVX512-NEXT: kshiftlw $15, %k2, %k2
|
||||
; AVX512-NEXT: kshiftrw $13, %k2, %k2
|
||||
; AVX512-NEXT: korw %k2, %k1, %k1
|
||||
; AVX512-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512-NEXT: kshiftlw $4, %k1, %k1
|
||||
; AVX512-NEXT: kshiftlw $13, %k0, %k0
|
||||
; AVX512-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512-NEXT: kshiftrw $12, %k1, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: movb %al, (%rdi)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
|
||||
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0
|
||||
|
|
Loading…
Reference in New Issue