forked from OSchip/llvm-project
[AArch64][NEON] Match (or (and -a b) (and (a+1) b)) => bit select
With this patch vbslq_f32(vnegq_s32(a), b, c) lowers to a BIT instruction. Co-authored-by: Paul Walker <paul.walker@arm.com> Differential Revision: https://reviews.llvm.org/D100304
This commit is contained in:
parent
ecf93a716c
commit
22c017f0f9
|
@ -12582,6 +12582,44 @@ static SDValue tryCombineToBSL(SDNode *N,
|
|||
if (N1.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
// InstCombine does (not (neg a)) => (add a -1).
|
||||
// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
|
||||
// Loop over all combinations of AND operands.
|
||||
for (int i = 1; i >= 0; --i) {
|
||||
for (int j = 1; j >= 0; --j) {
|
||||
SDValue O0 = N0->getOperand(i);
|
||||
SDValue O1 = N1->getOperand(j);
|
||||
SDValue Sub, Add, SubSibling, AddSibling;
|
||||
|
||||
// Find a SUB and an ADD operand, one from each AND.
|
||||
if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
|
||||
Sub = O0;
|
||||
Add = O1;
|
||||
SubSibling = N0->getOperand(1 - i);
|
||||
AddSibling = N1->getOperand(1 - j);
|
||||
} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
|
||||
Add = O0;
|
||||
Sub = O1;
|
||||
AddSibling = N0->getOperand(1 - i);
|
||||
SubSibling = N1->getOperand(1 - j);
|
||||
} else
|
||||
continue;
|
||||
|
||||
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
|
||||
continue;
|
||||
|
||||
// Constant ones is always righthand operand of the Add.
|
||||
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
|
||||
continue;
|
||||
|
||||
if (Sub.getOperand(1) != Add.getOperand(0))
|
||||
continue;
|
||||
|
||||
return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
|
||||
}
|
||||
}
|
||||
|
||||
// (or (and a b) (and (not a) c)) => (bsl a b c)
|
||||
// We only have to look for constant vectors here since the general, variable
|
||||
// case can be handled in TableGen.
|
||||
unsigned Bits = VT.getScalarSizeInBits();
|
||||
|
|
|
@ -0,0 +1,238 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64"
|
||||
|
||||
; Check that an expanded vbsl(vneg(pre_cond), left, right) lowers to a VBSL
|
||||
; during ISEL.
|
||||
;
|
||||
; Subtly different from a plain vector bit select: operand representing the
|
||||
; condition has been negated (-v, not to be confused with bitwise_not(v)).
|
||||
|
||||
; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0000(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0000:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_0 = and <4 x i32> %neg_cond, %left
|
||||
%right_bits_0 = and <4 x i32> %min_cond, %right
|
||||
%bsl0000 = or <4 x i32> %right_bits_0, %left_bits_0
|
||||
ret <4 x i32> %bsl0000
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0001(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0001:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_1 = and <4 x i32> %left, %neg_cond
|
||||
%right_bits_0 = and <4 x i32> %min_cond, %right
|
||||
%bsl0001 = or <4 x i32> %right_bits_0, %left_bits_1
|
||||
ret <4 x i32> %bsl0001
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0010(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0010:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_0 = and <4 x i32> %neg_cond, %left
|
||||
%right_bits_1 = and <4 x i32> %right, %min_cond
|
||||
%bsl0010 = or <4 x i32> %right_bits_1, %left_bits_0
|
||||
ret <4 x i32> %bsl0010
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0011(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0011:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_1 = and <4 x i32> %left, %neg_cond
|
||||
%right_bits_1 = and <4 x i32> %right, %min_cond
|
||||
%bsl0011 = or <4 x i32> %right_bits_1, %left_bits_1
|
||||
ret <4 x i32> %bsl0011
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0100(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0100:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_0 = and <4 x i32> %neg_cond, %left
|
||||
%right_bits_0 = and <4 x i32> %min_cond, %right
|
||||
%bsl0100 = or <4 x i32> %left_bits_0, %right_bits_0
|
||||
ret <4 x i32> %bsl0100
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0101(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0101:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_0 = and <4 x i32> %neg_cond, %left
|
||||
%right_bits_1 = and <4 x i32> %right, %min_cond
|
||||
%bsl0101 = or <4 x i32> %left_bits_0, %right_bits_1
|
||||
ret <4 x i32> %bsl0101
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0110(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0110:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_1 = and <4 x i32> %left, %neg_cond
|
||||
%right_bits_0 = and <4 x i32> %min_cond, %right
|
||||
%bsl0110 = or <4 x i32> %left_bits_1, %right_bits_0
|
||||
ret <4 x i32> %bsl0110
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_0111(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_0111:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%left_bits_1 = and <4 x i32> %left, %neg_cond
|
||||
%right_bits_1 = and <4 x i32> %right, %min_cond
|
||||
%bsl0111 = or <4 x i32> %left_bits_1, %right_bits_1
|
||||
ret <4 x i32> %bsl0111
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1000(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1000:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_0 = and <4 x i32> %min_cond, %left
|
||||
%flip_cond_right_bits_0 = and <4 x i32> %neg_cond, %right
|
||||
%bsl1000 = or <4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_0
|
||||
ret <4 x i32> %bsl1000
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1001(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1001:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_1 = and <4 x i32> %left, %min_cond
|
||||
%flip_cond_right_bits_0 = and <4 x i32> %neg_cond, %right
|
||||
%bsl1001 = or <4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_1
|
||||
ret <4 x i32> %bsl1001
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1010(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1010:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_0 = and <4 x i32> %min_cond, %left
|
||||
%flip_cond_right_bits_1 = and <4 x i32> %right, %neg_cond
|
||||
%bsl1010 = or <4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_0
|
||||
ret <4 x i32> %bsl1010
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1011(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1011:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_1 = and <4 x i32> %left, %min_cond
|
||||
%flip_cond_right_bits_1 = and <4 x i32> %right, %neg_cond
|
||||
%bsl1011 = or <4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_1
|
||||
ret <4 x i32> %bsl1011
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1100(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1100:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_0 = and <4 x i32> %min_cond, %left
|
||||
%flip_cond_right_bits_0 = and <4 x i32> %neg_cond, %right
|
||||
%bsl1100 = or <4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_0
|
||||
ret <4 x i32> %bsl1100
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1101(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1101:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_0 = and <4 x i32> %min_cond, %left
|
||||
%flip_cond_right_bits_1 = and <4 x i32> %right, %neg_cond
|
||||
%bsl1101 = or <4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_1
|
||||
ret <4 x i32> %bsl1101
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1110(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1110:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_1 = and <4 x i32> %left, %min_cond
|
||||
%flip_cond_right_bits_0 = and <4 x i32> %neg_cond, %right
|
||||
%bsl1110 = or <4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_0
|
||||
ret <4 x i32> %bsl1110
|
||||
}
|
||||
|
||||
define <4 x i32> @vbsl_neg_cond_1111(<4 x i32> %pre_cond, <4 x i32> %left, <4 x i32> %right) #0 {
|
||||
; CHECK-LABEL: vbsl_neg_cond_1111:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg v0.4s, v0.4s
|
||||
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%neg_cond = sub <4 x i32> zeroinitializer, %pre_cond
|
||||
%min_cond = add <4 x i32> %pre_cond, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%flip_cond_left_bits_1 = and <4 x i32> %left, %min_cond
|
||||
%flip_cond_right_bits_1 = and <4 x i32> %right, %neg_cond
|
||||
%bsl1111 = or <4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_1
|
||||
ret <4 x i32> %bsl1111
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+neon" }
|
Loading…
Reference in New Issue