[DAG] SimplifyDemandedBits - fold AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)

If all the demanded bits of the AND mask covering the inserted subvector 'X' are known to be one, then the mask isn't affecting the subvector at all.

In which case, if the base vector 'C' is undef/constant, then move the AND mask up to just (constant) fold it directly.

Addresses some of the regressions from D129150, particularly the cases where we're attempting to zero the upper elements of a widened vector.

Differential Revision: https://reviews.llvm.org/D129290
This commit is contained in:
Simon Pilgrim 2022-07-08 16:08:22 +01:00
parent fca79b78c4
commit b53046122f
9 changed files with 2233 additions and 2281 deletions

View File

@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
(Op0.getOperand(0).isUndef() ||
ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
Op0->hasOneUse()) {
unsigned NumSubElts =
Op0.getOperand(1).getValueType().getVectorNumElements();
unsigned SubIdx = Op0.getConstantOperandVal(2);
APInt DemandedSub =
APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
KnownBits KnownSubMask =
TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
SDValue NewAnd =
TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
SDValue NewInsert =
TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
Op0.getOperand(1), Op0.getOperand(2));
return TLO.CombineTo(Op, NewInsert);
}
}
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1130,11 +1130,10 @@ define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-ve
; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0
; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k0
; CHECK-NEXT: kshiftrd $1, %k0, %k1
; CHECK-NEXT: movq $-3, %rax
; CHECK-NEXT: kmovq %rax, %k2
; CHECK-NEXT: kandq %k2, %k1, %k1
; CHECK-NEXT: kshiftlq $63, %k0, %k2
; CHECK-NEXT: kshiftrq $62, %k2, %k2
; CHECK-NEXT: kshiftlq $63, %k1, %k1
; CHECK-NEXT: kshiftrq $63, %k1, %k1
; CHECK-NEXT: korq %k2, %k1, %k1
; CHECK-NEXT: movq $-5, %rax
; CHECK-NEXT: kmovq %rax, %k2

View File

@ -709,15 +709,14 @@ define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $-3, %al
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: vucomish 8(%ebp), %xmm2
; X86-NEXT: setnp %al
; X86-NEXT: sete %cl
; X86-NEXT: testb %al, %cl
; X86-NEXT: setne %al
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: kandb %k0, %k1, %k0
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: vpsrld $16, %xmm2, %xmm2
; X86-NEXT: vucomish 10(%ebp), %xmm2
; X86-NEXT: setnp %al
@ -741,9 +740,8 @@ define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
; X64-NEXT: testb %al, %cl
; X64-NEXT: setne %al
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: movb $-3, %al
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: kandb %k1, %k0, %k0
; X64-NEXT: kshiftlb $7, %k0, %k0
; X64-NEXT: kshiftrb $7, %k0, %k0
; X64-NEXT: vpsrld $16, %xmm3, %xmm3
; X64-NEXT: vpsrld $16, %xmm2, %xmm2
; X64-NEXT: vucomish %xmm3, %xmm2
@ -771,12 +769,11 @@ define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $-3, %al
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: vcomish 8(%ebp), %xmm2
; X86-NEXT: seta %al
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: kandb %k0, %k1, %k0
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: vpsrld $16, %xmm2, %xmm2
; X86-NEXT: vcomish 10(%ebp), %xmm2
; X86-NEXT: seta %al
@ -791,12 +788,11 @@ define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <
;
; X64-LABEL: test_v2f16_ogt_q:
; X64: # %bb.0:
; X64-NEXT: movb $-3, %al
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: vcomish %xmm3, %xmm2
; X64-NEXT: seta %al
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: kandb %k0, %k1, %k0
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: kshiftlb $7, %k0, %k0
; X64-NEXT: kshiftrb $7, %k0, %k0
; X64-NEXT: vpsrld $16, %xmm3, %xmm3
; X64-NEXT: vpsrld $16, %xmm2, %xmm2
; X64-NEXT: vcomish %xmm3, %xmm2
@ -821,12 +817,11 @@ define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $-3, %al
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: vucomish 8(%ebp), %xmm2
; X86-NEXT: setae %al
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: kandb %k0, %k1, %k0
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: vpsrld $16, %xmm2, %xmm3
; X86-NEXT: vucomish 10(%ebp), %xmm3
; X86-NEXT: setae %al
@ -861,12 +856,11 @@ define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
;
; X64-LABEL: test_v4f16_oge_q:
; X64: # %bb.0:
; X64-NEXT: movb $-3, %al
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: vucomish %xmm3, %xmm2
; X64-NEXT: setae %al
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: kandb %k0, %k1, %k0
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: kshiftlb $7, %k0, %k0
; X64-NEXT: kshiftrb $7, %k0, %k0
; X64-NEXT: vpsrld $16, %xmm3, %xmm4
; X64-NEXT: vpsrld $16, %xmm2, %xmm5
; X64-NEXT: vucomish %xmm4, %xmm5
@ -913,13 +907,12 @@ define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movb $-3, %al
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: vmovsh 8(%ebp), %xmm3
; X86-NEXT: vcomish %xmm2, %xmm3
; X86-NEXT: seta %al
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: kandb %k0, %k1, %k0
; X86-NEXT: kmovd %eax, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: vpsrld $16, %xmm2, %xmm3
; X86-NEXT: vmovsh 10(%ebp), %xmm4
; X86-NEXT: vcomish %xmm3, %xmm4
@ -957,12 +950,11 @@ define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <
;
; X64-LABEL: test_v4f16_olt_q:
; X64: # %bb.0:
; X64-NEXT: movb $-3, %al
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: vcomish %xmm2, %xmm3
; X64-NEXT: seta %al
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: kandb %k0, %k1, %k0
; X64-NEXT: kmovd %eax, %k0
; X64-NEXT: kshiftlb $7, %k0, %k0
; X64-NEXT: kshiftrb $7, %k0, %k0
; X64-NEXT: vpsrld $16, %xmm2, %xmm4
; X64-NEXT: vpsrld $16, %xmm3, %xmm5
; X64-NEXT: vcomish %xmm4, %xmm5

View File

@ -97,12 +97,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-32-NEXT: movl %esp, %ebp
; AVX512-32-NEXT: andl $-16, %esp
; AVX512-32-NEXT: subl $16, %esp
; AVX512-32-NEXT: movw $-3, %ax
; AVX512-32-NEXT: kmovw %eax, %k0
; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2
; AVX512-32-NEXT: seta %al
; AVX512-32-NEXT: kmovw %eax, %k1
; AVX512-32-NEXT: kandw %k0, %k1, %k0
; AVX512-32-NEXT: andl $1, %eax
; AVX512-32-NEXT: kmovw %eax, %k0
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512-32-NEXT: vcomiss 12(%ebp), %xmm2
; AVX512-32-NEXT: seta %al
@ -117,12 +115,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
;
; AVX512-64-LABEL: test_v2f32_ogt_s:
; AVX512-64: # %bb.0:
; AVX512-64-NEXT: movw $-3, %ax
; AVX512-64-NEXT: kmovw %eax, %k0
; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
; AVX512-64-NEXT: seta %al
; AVX512-64-NEXT: kmovw %eax, %k1
; AVX512-64-NEXT: kandw %k0, %k1, %k0
; AVX512-64-NEXT: andl $1, %eax
; AVX512-64-NEXT: kmovw %eax, %k0
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512-64-NEXT: vcomiss %xmm3, %xmm2
@ -142,12 +138,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-32-NEXT: movw $-3, %ax
; AVX512F-32-NEXT: kmovw %eax, %k0
; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2
; AVX512F-32-NEXT: seta %al
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
; AVX512F-32-NEXT: andl $1, %eax
; AVX512F-32-NEXT: kmovw %eax, %k0
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512F-32-NEXT: vcomiss 12(%ebp), %xmm2
; AVX512F-32-NEXT: seta %al
@ -166,12 +160,10 @@ define <2 x i32> @test_v2f32_ogt_s(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-64-NEXT: movw $-3, %ax
; AVX512F-64-NEXT: kmovw %eax, %k0
; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2
; AVX512F-64-NEXT: seta %al
; AVX512F-64-NEXT: kmovw %eax, %k1
; AVX512F-64-NEXT: kandw %k0, %k1, %k0
; AVX512F-64-NEXT: andl $1, %eax
; AVX512F-64-NEXT: kmovw %eax, %k0
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2
@ -288,15 +280,13 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-32-NEXT: movl %esp, %ebp
; AVX512-32-NEXT: andl $-16, %esp
; AVX512-32-NEXT: subl $16, %esp
; AVX512-32-NEXT: movw $-3, %ax
; AVX512-32-NEXT: kmovw %eax, %k0
; AVX512-32-NEXT: vucomiss 8(%ebp), %xmm2
; AVX512-32-NEXT: setnp %al
; AVX512-32-NEXT: sete %cl
; AVX512-32-NEXT: testb %al, %cl
; AVX512-32-NEXT: setne %al
; AVX512-32-NEXT: kmovw %eax, %k1
; AVX512-32-NEXT: kandw %k0, %k1, %k0
; AVX512-32-NEXT: andl $1, %eax
; AVX512-32-NEXT: kmovw %eax, %k0
; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512-32-NEXT: vucomiss 12(%ebp), %xmm2
; AVX512-32-NEXT: setnp %al
@ -319,10 +309,8 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512-64-NEXT: sete %cl
; AVX512-64-NEXT: testb %al, %cl
; AVX512-64-NEXT: setne %al
; AVX512-64-NEXT: andl $1, %eax
; AVX512-64-NEXT: kmovw %eax, %k0
; AVX512-64-NEXT: movw $-3, %ax
; AVX512-64-NEXT: kmovw %eax, %k1
; AVX512-64-NEXT: kandw %k1, %k0, %k0
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512-64-NEXT: vucomiss %xmm3, %xmm2
@ -345,15 +333,13 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-32-NEXT: subl $16, %esp
; AVX512F-32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-32-NEXT: movw $-3, %ax
; AVX512F-32-NEXT: kmovw %eax, %k0
; AVX512F-32-NEXT: vucomiss 8(%ebp), %xmm2
; AVX512F-32-NEXT: setnp %al
; AVX512F-32-NEXT: sete %cl
; AVX512F-32-NEXT: testb %al, %cl
; AVX512F-32-NEXT: setne %al
; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: kandw %k0, %k1, %k0
; AVX512F-32-NEXT: andl $1, %eax
; AVX512F-32-NEXT: kmovw %eax, %k0
; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512F-32-NEXT: vucomiss 12(%ebp), %xmm2
; AVX512F-32-NEXT: setnp %al
@ -380,10 +366,8 @@ define <2 x i32> @test_v2f32_oeq_q(<2 x i32> %a, <2 x i32> %b, <2 x float> %f1,
; AVX512F-64-NEXT: sete %cl
; AVX512F-64-NEXT: testb %al, %cl
; AVX512F-64-NEXT: setne %al
; AVX512F-64-NEXT: andl $1, %eax
; AVX512F-64-NEXT: kmovw %eax, %k0
; AVX512F-64-NEXT: movw $-3, %ax
; AVX512F-64-NEXT: kmovw %eax, %k1
; AVX512F-64-NEXT: kandw %k1, %k0, %k0
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2

View File

@ -2926,10 +2926,8 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; AVX512F-NEXT: vmovq %rcx, %xmm1
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512F-NEXT: seto %al
; AVX512F-NEXT: movw $-3, %cx
; AVX512F-NEXT: kmovw %ecx, %k0
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: kandw %k0, %k1, %k0
; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: kmovw %eax, %k0
; AVX512F-NEXT: kmovw %edx, %k1
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
; AVX512F-NEXT: kshiftrw $14, %k1, %k1
@ -2952,10 +2950,8 @@ define <2 x i32> @smulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; AVX512BW-NEXT: vmovq %rcx, %xmm1
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512BW-NEXT: seto %al
; AVX512BW-NEXT: movw $-3, %cx
; AVX512BW-NEXT: kmovd %ecx, %k0
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: kandw %k0, %k1, %k0
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: kmovw %eax, %k0
; AVX512BW-NEXT: kmovd %edx, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $14, %k1, %k1

View File

@ -2604,10 +2604,8 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; AVX512F-NEXT: vmovq %rax, %xmm1
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512F-NEXT: seto %al
; AVX512F-NEXT: movw $-3, %cx
; AVX512F-NEXT: kmovw %ecx, %k0
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: kandw %k0, %k1, %k0
; AVX512F-NEXT: andl $1, %eax
; AVX512F-NEXT: kmovw %eax, %k0
; AVX512F-NEXT: kmovw %r8d, %k1
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
; AVX512F-NEXT: kshiftrw $14, %k1, %k1
@ -2631,10 +2629,8 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) nounwind {
; AVX512BW-NEXT: vmovq %rax, %xmm1
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX512BW-NEXT: seto %al
; AVX512BW-NEXT: movw $-3, %cx
; AVX512BW-NEXT: kmovd %ecx, %k0
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: kandw %k0, %k1, %k0
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: kmovw %eax, %k0
; AVX512BW-NEXT: kmovd %r8d, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $14, %k1, %k1