[DAGCombine] Fold (x & ~y) | y patterns

Fold (x & ~y) | y and it's four commuted variants to x | y. This pattern
can in particular appear when a vselect c, x, -1 is expanded to
(x & ~c) | (-1 & c) and combined to (x & ~c) | c.

This change has some overlap with D59066, which avoids creating a
vselect of this form in the first place during uaddsat expansion.

Differential Revision: https://reviews.llvm.org/D59174

llvm-svn: 356333
This commit is contained in:
Nikita Popov 2019-03-17 15:45:38 +00:00
parent 6a6e808b69
commit 9a4453592b
6 changed files with 56 additions and 64 deletions

View File

@ -5279,6 +5279,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(
SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == ISD::AND) {
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
return SDValue();
}
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -5426,6 +5443,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
return Combined;
if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
return Combined;
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))

View File

@ -454,8 +454,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
; CHECK-NEXT: movi v1.4s, #42
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, %a
@ -470,8 +469,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
; CHECK-NEXT: mvni v2.4s, #42
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
@ -503,8 +501,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, %a
@ -521,8 +518,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
; CHECK-NEXT: dup v2.2d, x9
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
@ -637,8 +633,7 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, %y
%c = icmp ugt <4 x i32> %x, %a
@ -652,8 +647,7 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%a = add <4 x i32> %x, %y
@ -682,8 +676,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, %y
%c = icmp ugt <2 x i64> %x, %a
@ -697,8 +690,7 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%noty = xor <2 x i64> %y, <i64 -1, i64 -1>
%a = add <2 x i64> %x, %y

View File

@ -359,8 +359,7 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: out_constant_varx_mone_invmask:
; CHECK: // %bb.0:
; CHECK-NEXT: bic w8, w0, w2
; CHECK-NEXT: orr w0, w8, w2
; CHECK-NEXT: orr w0, w0, w2
; CHECK-NEXT: ret
%notmask = xor i32 %mask, -1
%mx = and i32 %notmask, %x
@ -442,8 +441,7 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: out_constant_mone_vary:
; CHECK: // %bb.0:
; CHECK-NEXT: bic w8, w1, w2
; CHECK-NEXT: orr w0, w2, w8
; CHECK-NEXT: orr w0, w1, w2
; CHECK-NEXT: ret
%notmask = xor i32 %mask, -1
%mx = and i32 %mask, -1

View File

@ -34,7 +34,6 @@ define <4 x i32> @in_constant_varx_mone(<4 x i32> %x, <4 x i32> %y, <4 x i32> %m
define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: out_constant_varx_mone_invmask:
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
@ -119,8 +118,7 @@ define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i
define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: out_constant_mone_vary:
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
%mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
@ -132,8 +130,7 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: in_constant_mone_vary:
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
%n1 = and <4 x i32> %n0, %mask

View File

@ -587,15 +587,13 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_varx_mone_invmask:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: andl %edi, %eax
; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_varx_mone_invmask:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
; CHECK-BMI-NEXT: movl %edi, %eax
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
@ -722,15 +720,13 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_mone_vary:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: andl %esi, %eax
; CHECK-NOBMI-NEXT: movl %esi, %eax
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_mone_vary:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: movl %esi, %eax
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1

View File

@ -85,26 +85,21 @@ define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py,
; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
@ -311,26 +306,21 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i3
; CHECK-SSE1-LABEL: out_constant_mone_vary:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_constant_mone_vary:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: out_constant_mone_vary:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
@ -355,17 +345,14 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32
;
; CHECK-SSE2-LABEL: in_constant_mone_vary:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_mone_vary:
; CHECK-XOP: # %bb.0:
; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16