[SDAG] allow vector types for select->logic folds

This prepares codegen for a change that will remove the identical
folds from IR because they are not poison-safe. See
D93065 / D97360
for details.

We already generically support scalar types, and there are various
target-specific transforms that overlap the vector folds. For example,
x86 recognizes the and patterns, but not or. We can end up with 1
extra instruction there, but I think that is still preferred over the
blendv alternative that loads a constant vector.

If this is not optimal, then it should be fixed with a later transform
(this change is not expected to result in any regressions because
InstCombine currently does the same thing).

Removing custom code and supporting undefs in constant-pattern-matching
can be follow-up changes.

Differential Revision: https://reviews.llvm.org/D97730
This commit is contained in:
Sanjay Patel 2021-03-02 08:49:22 -05:00
parent 4096ae06f4
commit 7fce3322a2
3 changed files with 20 additions and 22 deletions

View File

@ -9302,31 +9302,32 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
} }
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SELECT && "Expected a select"); assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
"Expected a (v)select");
SDValue Cond = N->getOperand(0); SDValue Cond = N->getOperand(0);
SDValue T = N->getOperand(1), F = N->getOperand(2); SDValue T = N->getOperand(1), F = N->getOperand(2);
EVT VT = N->getValueType(0); EVT VT = N->getValueType(0);
if (VT != Cond.getValueType() || VT != MVT::i1) if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
return SDValue(); return SDValue();
// select Cond, Cond, F --> or Cond, F // select Cond, Cond, F --> or Cond, F
// select Cond, 1, F --> or Cond, F // select Cond, 1, F --> or Cond, F
if (Cond == T || isOneConstant(T)) if (Cond == T || isOneOrOneSplat(T))
return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
// select Cond, T, Cond --> and Cond, T // select Cond, T, Cond --> and Cond, T
// select Cond, T, 0 --> and Cond, T // select Cond, T, 0 --> and Cond, T
if (Cond == F || isNullConstant(F)) if (Cond == F || isNullOrNullSplat(F))
return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
// select Cond, T, 1 --> or (not Cond), T // select Cond, T, 1 --> or (not Cond), T
if (isOneConstant(F)) { if (isOneOrOneSplat(F)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
} }
// select Cond, 0, F --> and (not Cond), F // select Cond, 0, F --> and (not Cond), F
if (isNullConstant(T)) { if (isNullOrNullSplat(T)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
} }
@ -9788,6 +9789,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2)) if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V; return V;
if (SDValue V = foldBoolSelectToLogic(N, DAG))
return V;
// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
return DAG.getSelect(DL, VT, F, N2, N1); return DAG.getSelect(DL, VT, F, N2, N1);

View File

@ -66,7 +66,7 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
@ -80,10 +80,8 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: xtn v1.4h, v1.4s
; CHECK-NEXT: movi v2.4h, #1
; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w
@ -96,9 +94,8 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: xtn v1.4h, v1.4s
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w
@ -111,12 +108,8 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v3.4h, v0.4s
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: bic v1.8b, v2.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w

View File

@ -233,7 +233,7 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 ; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w
@ -244,9 +244,9 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
; CHECK-LABEL: and_not_vec: ; CHECK-LABEL: and_not_vec:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w
@ -258,9 +258,10 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
; CHECK-LABEL: or_not_vec: ; CHECK-LABEL: or_not_vec:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 ; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = icmp eq <4 x i32> %x, %y %a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w %b = icmp sgt <4 x i32> %z, %w