forked from OSchip/llvm-project
[DAG] SimplifyDemandedBits - fold AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
If all the demanded bits of the AND mask covering the inserted subvector 'X' are known to be one, then the mask isn't affecting the subvector at all. In which case, if the base vector 'C' is undef/constant, then move the AND mask up to just (constant) fold it directly. Addresses some of the regressions from D129150, particularly the cases where we're attempting to zero the upper elements of a widened vector. Differential Revision: https://reviews.llvm.org/D129290
This commit is contained in:
parent
fca79b78c4
commit
b53046122f
|
@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
}
|
||||
}
|
||||
|
||||
// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
|
||||
// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
|
||||
if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
(Op0.getOperand(0).isUndef() ||
|
||||
ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
|
||||
Op0->hasOneUse()) {
|
||||
unsigned NumSubElts =
|
||||
Op0.getOperand(1).getValueType().getVectorNumElements();
|
||||
unsigned SubIdx = Op0.getConstantOperandVal(2);
|
||||
APInt DemandedSub =
|
||||
APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
|
||||
KnownBits KnownSubMask =
|
||||
TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
|
||||
if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
|
||||
SDValue NewAnd =
|
||||
TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
|
||||
SDValue NewInsert =
|
||||
TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
|
||||
Op0.getOperand(1), Op0.getOperand(2));
|
||||
return TLO.CombineTo(Op, NewInsert);
|
||||
}
|
||||
}
|
||||
|
||||
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1130,11 +1130,10 @@ define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-ve
|
|||
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0
|
||||
; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k0
|
||||
; CHECK-NEXT: kshiftrd $1, %k0, %k1
|
||||
; CHECK-NEXT: movq $-3, %rax
|
||||
; CHECK-NEXT: kmovq %rax, %k2
|
||||
; CHECK-NEXT: kandq %k2, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlq $63, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrq $62, %k2, %k2
|
||||
; CHECK-NEXT: kshiftlq $63, %k1, %k1
|
||||
; CHECK-NEXT: kshiftrq $63, %k1, %k1
|
||||
; CHECK-NEXT: korq %k2, %k1, %k1
|
||||
; CHECK-NEXT: movq $-5, %rax
|
||||
; CHECK-NEXT: kmovq %rax, %k2
|
||||
|
|
|
@ -709,15 +709,14 @@ define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
|
|||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movb $-3, %al
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: vucomish 8(%ebp), %xmm2
|
||||
; X86-NEXT: setnp %al
|
||||
; X86-NEXT: sete %cl
|
||||
; X86-NEXT: testb %al, %cl
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: kandb %k0, %k1, %k0
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X86-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X86-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; X86-NEXT: vucomish 10(%ebp), %xmm2
|
||||
; X86-NEXT: setnp %al
|
||||
|
@ -741,9 +740,8 @@ define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
|
|||
; X64-NEXT: testb %al, %cl
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: movb $-3, %al
|
||||
; X64-NEXT: kmovd %eax, %k1
|
||||
; X64-NEXT: kandb %k1, %k0, %k0
|
||||
; X64-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X64-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X64-NEXT: vpsrld $16, %xmm3, %xmm3
|
||||
; X64-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; X64-NEXT: vucomish %xmm3, %xmm2
|
||||
|
@ -771,12 +769,11 @@ define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
|
|||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movb $-3, %al
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: vcomish 8(%ebp), %xmm2
|
||||
; X86-NEXT: seta %al
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: kandb %k0, %k1, %k0
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X86-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X86-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; X86-NEXT: vcomish 10(%ebp), %xmm2
|
||||
; X86-NEXT: seta %al
|
||||
|
@ -791,12 +788,11 @@ define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
|
|||
;
|
||||
; X64-LABEL: test_v2f16_ogt_q:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movb $-3, %al
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: vcomish %xmm3, %xmm2
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: kmovd %eax, %k1
|
||||
; X64-NEXT: kandb %k0, %k1, %k0
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X64-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X64-NEXT: vpsrld $16, %xmm3, %xmm3
|
||||
; X64-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; X64-NEXT: vcomish %xmm3, %xmm2
|
||||
|
@ -821,12 +817,11 @@ define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
|
|||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movb $-3, %al
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: vucomish 8(%ebp), %xmm2
|
||||
; X86-NEXT: setae %al
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: kandb %k0, %k1, %k0
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X86-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X86-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X86-NEXT: vucomish 10(%ebp), %xmm3
|
||||
; X86-NEXT: setae %al
|
||||
|
@ -861,12 +856,11 @@ define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
|
|||
;
|
||||
; X64-LABEL: test_v4f16_oge_q:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movb $-3, %al
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: vucomish %xmm3, %xmm2
|
||||
; X64-NEXT: setae %al
|
||||
; X64-NEXT: kmovd %eax, %k1
|
||||
; X64-NEXT: kandb %k0, %k1, %k0
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X64-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X64-NEXT: vpsrld $16, %xmm3, %xmm4
|
||||
; X64-NEXT: vpsrld $16, %xmm2, %xmm5
|
||||
; X64-NEXT: vucomish %xmm4, %xmm5
|
||||
|
@ -913,13 +907,12 @@ define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
|
|||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movb $-3, %al
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: vmovsh 8(%ebp), %xmm3
|
||||
; X86-NEXT: vcomish %xmm2, %xmm3
|
||||
; X86-NEXT: seta %al
|
||||
; X86-NEXT: kmovd %eax, %k1
|
||||
; X86-NEXT: kandb %k0, %k1, %k0
|
||||
; X86-NEXT: kmovd %eax, %k0
|
||||
; X86-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X86-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X86-NEXT: vpsrld $16, %xmm2, %xmm3
|
||||
; X86-NEXT: vmovsh 10(%ebp), %xmm4
|
||||
; X86-NEXT: vcomish %xmm3, %xmm4
|
||||
|
@ -957,12 +950,11 @@ define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
|
|||
;
|
||||
; X64-LABEL: test_v4f16_olt_q:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movb $-3, %al
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: vcomish %xmm2, %xmm3
|
||||
; X64-NEXT: seta %al
|
||||
; X64-NEXT: kmovd %eax, %k1
|
||||
; X64-NEXT: kandb %k0, %k1, %k0
|
||||
; X64-NEXT: kmovd %eax, %k0
|
||||
; X64-NEXT: kshiftlb $7, %k0, %k0
|
||||
; X64-NEXT: kshiftrb $7, %k0, %k0
|
||||
; X64-NEXT: vpsrld $16, %xmm2, %xmm4
|
||||
; X64-NEXT: vpsrld $16, %xmm3, %xmm5
|
||||
; X64-NEXT: vcomish %xmm4, %xmm5
|
||||
|
|
|
@ -97,12 +97,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512-32-NEXT: movl %esp, %ebp
|
||||
; AVX512-32-NEXT: andl $-16, %esp
|
||||
; AVX512-32-NEXT: subl $16, %esp
|
||||
; AVX512-32-NEXT: movw $-3, %ax
|
||||
; AVX512-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2
|
||||
; AVX512-32-NEXT: seta %al
|
||||
; AVX512-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512-32-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512-32-NEXT: andl $1, %eax
|
||||
; AVX512-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512-32-NEXT: vcomiss 12(%ebp), %xmm2
|
||||
; AVX512-32-NEXT: seta %al
|
||||
|
@ -117,12 +115,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
;
|
||||
; AVX512-64-LABEL: test_v2f32_ogt_s:
|
||||
; AVX512-64: # %bb.0:
|
||||
; AVX512-64-NEXT: movw $-3, %ax
|
||||
; AVX512-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
|
||||
; AVX512-64-NEXT: seta %al
|
||||
; AVX512-64-NEXT: kmovw %eax, %k1
|
||||
; AVX512-64-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512-64-NEXT: andl $1, %eax
|
||||
; AVX512-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
|
||||
|
@ -142,12 +138,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512F-32-NEXT: subl $16, %esp
|
||||
; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-32-NEXT: movw $-3, %ax
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2
|
||||
; AVX512F-32-NEXT: seta %al
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512F-32-NEXT: andl $1, %eax
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512F-32-NEXT: vcomiss 12(%ebp), %xmm2
|
||||
; AVX512F-32-NEXT: seta %al
|
||||
|
@ -166,12 +160,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512F-64: # %bb.0:
|
||||
; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-64-NEXT: movw $-3, %ax
|
||||
; AVX512F-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2
|
||||
; AVX512F-64-NEXT: seta %al
|
||||
; AVX512F-64-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-64-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512F-64-NEXT: andl $1, %eax
|
||||
; AVX512F-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2
|
||||
|
@ -288,15 +280,13 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512-32-NEXT: movl %esp, %ebp
|
||||
; AVX512-32-NEXT: andl $-16, %esp
|
||||
; AVX512-32-NEXT: subl $16, %esp
|
||||
; AVX512-32-NEXT: movw $-3, %ax
|
||||
; AVX512-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512-32-NEXT: vucomiss 8(%ebp), %xmm2
|
||||
; AVX512-32-NEXT: setnp %al
|
||||
; AVX512-32-NEXT: sete %cl
|
||||
; AVX512-32-NEXT: testb %al, %cl
|
||||
; AVX512-32-NEXT: setne %al
|
||||
; AVX512-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512-32-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512-32-NEXT: andl $1, %eax
|
||||
; AVX512-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512-32-NEXT: vucomiss 12(%ebp), %xmm2
|
||||
; AVX512-32-NEXT: setnp %al
|
||||
|
@ -319,10 +309,8 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512-64-NEXT: sete %cl
|
||||
; AVX512-64-NEXT: testb %al, %cl
|
||||
; AVX512-64-NEXT: setne %al
|
||||
; AVX512-64-NEXT: andl $1, %eax
|
||||
; AVX512-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512-64-NEXT: movw $-3, %ax
|
||||
; AVX512-64-NEXT: kmovw %eax, %k1
|
||||
; AVX512-64-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512-64-NEXT: vucomiss %xmm3, %xmm2
|
||||
|
@ -345,15 +333,13 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512F-32-NEXT: subl $16, %esp
|
||||
; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-32-NEXT: movw $-3, %ax
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-32-NEXT: vucomiss 8(%ebp), %xmm2
|
||||
; AVX512F-32-NEXT: setnp %al
|
||||
; AVX512F-32-NEXT: sete %cl
|
||||
; AVX512F-32-NEXT: testb %al, %cl
|
||||
; AVX512F-32-NEXT: setne %al
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512F-32-NEXT: andl $1, %eax
|
||||
; AVX512F-32-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512F-32-NEXT: vucomiss 12(%ebp), %xmm2
|
||||
; AVX512F-32-NEXT: setnp %al
|
||||
|
@ -380,10 +366,8 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
|
|||
; AVX512F-64-NEXT: sete %cl
|
||||
; AVX512F-64-NEXT: testb %al, %cl
|
||||
; AVX512F-64-NEXT: setne %al
|
||||
; AVX512F-64-NEXT: andl $1, %eax
|
||||
; AVX512F-64-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-64-NEXT: movw $-3, %ax
|
||||
; AVX512F-64-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-64-NEXT: kandw %k1, %k0, %k0
|
||||
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2
|
||||
|
|
|
@ -2926,10 +2926,8 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
|
|||
; AVX512F-NEXT: vmovq %rcx, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: seto %al
|
||||
; AVX512F-NEXT: movw $-3, %cx
|
||||
; AVX512F-NEXT: kmovw %ecx, %k0
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512F-NEXT: andl $1, %eax
|
||||
; AVX512F-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-NEXT: kmovw %edx, %k1
|
||||
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftrw $14, %k1, %k1
|
||||
|
@ -2952,10 +2950,8 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
|
|||
; AVX512BW-NEXT: vmovq %rcx, %xmm1
|
||||
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512BW-NEXT: seto %al
|
||||
; AVX512BW-NEXT: movw $-3, %cx
|
||||
; AVX512BW-NEXT: kmovd %ecx, %k0
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: andl $1, %eax
|
||||
; AVX512BW-NEXT: kmovw %eax, %k0
|
||||
; AVX512BW-NEXT: kmovd %edx, %k1
|
||||
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512BW-NEXT: kshiftrw $14, %k1, %k1
|
||||
|
|
|
@ -2604,10 +2604,8 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
|
|||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: seto %al
|
||||
; AVX512F-NEXT: movw $-3, %cx
|
||||
; AVX512F-NEXT: kmovw %ecx, %k0
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512F-NEXT: andl $1, %eax
|
||||
; AVX512F-NEXT: kmovw %eax, %k0
|
||||
; AVX512F-NEXT: kmovw %r8d, %k1
|
||||
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512F-NEXT: kshiftrw $14, %k1, %k1
|
||||
|
@ -2631,10 +2629,8 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
|
|||
; AVX512BW-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX512BW-NEXT: seto %al
|
||||
; AVX512BW-NEXT: movw $-3, %cx
|
||||
; AVX512BW-NEXT: kmovd %ecx, %k0
|
||||
; AVX512BW-NEXT: kmovd %eax, %k1
|
||||
; AVX512BW-NEXT: kandw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: andl $1, %eax
|
||||
; AVX512BW-NEXT: kmovw %eax, %k0
|
||||
; AVX512BW-NEXT: kmovd %r8d, %k1
|
||||
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
|
||||
; AVX512BW-NEXT: kshiftrw $14, %k1, %k1
|
||||
|
|
Loading…
Reference in New Issue