[WebAssembly] Lower vselect to v128.bitselect

We were previously expanding vselect and matching on the expansion to
generate bitselects, but in some cases the expansion would be further
combined and a bitselect would not get generated. This patch improves
codegen in those cases by legalizing vselect and lowering it to
v128.bitselect. The old pattern that matches the expansion is still
useful for lowering IR that already uses the expansion rather than a
select operation.

Differential Revision: https://reviews.llvm.org/D83734
This commit is contained in:
Thomas Lively 2020-07-16 11:11:19 -07:00
parent 9adf7461f7
commit f0f9787646
3 changed files with 97 additions and 2 deletions

View File

@ -156,8 +156,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// There is no i8x16.mul instruction
setOperationAction(ISD::MUL, MVT::v16i8, Expand);
// There are no vector select instructions
for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
// There is no vector conditional select instruction
// TODO: Implement SELECT_V128
for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
setOperationAction(Op, T, Expand);

View File

@ -574,6 +574,16 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
(!cast<Instruction>("BITSELECT_"#vec_t)
V128:$v1, V128:$v2, V128:$c)>;
// Also implement vselect in terms of bitselect
foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
[v4f32, v4i32], [v2f64, v2i64]] in
def : Pat<(types[0] (vselect
(types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
)),
(!cast<Instruction>("BITSELECT_"#types[0])
V128:$v1, V128:$v2, V128:$c
)>;
//===----------------------------------------------------------------------===//
// Integer unary arithmetic
//===----------------------------------------------------------------------===//

View File

@ -21,6 +21,18 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
ret <16 x i8> %res
}
; CHECK-LABEL: vselect_cmp_v16i8:
; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
<16 x i8> %x, <16 x i8> %y) {
%c = icmp slt <16 x i8> %a, %b
%res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %res
}
; CHECK-LABEL: select_v16i8:
; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -91,6 +103,18 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
ret <8 x i16> %res
}
; CHECK-LABEL: vselect_cmp_v8i16:
; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
<8 x i16> %x, <8 x i16> %y) {
%c = icmp slt <8 x i16> %a, %b
%res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %res
}
; CHECK-LABEL: select_v8i16:
; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -161,6 +185,17 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
ret <4 x i32> %res
}
; CHECK-LABEL: vselect_cmp_v4i32:
; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
<4 x i32> %x, <4 x i32> %y) {
%c = icmp slt <4 x i32> %a, %b
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %res
}
; CHECK-LABEL: select_v4i32:
; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
@ -232,6 +267,31 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
ret <2 x i64> %res
}
; CHECK-LABEL: vselect_cmp_v2i64:
; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
<2 x i64> %x, <2 x i64> %y) {
%c = icmp slt <2 x i64> %a, %b
%res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %res
}
; CHECK-LABEL: select_v2i64:
; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -305,6 +365,18 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
ret <4 x float> %res
}
; CHECK-LABEL: vselect_cmp_v4f32:
; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
<4 x float> %x, <4 x float> %y) {
%c = fcmp olt <4 x float> %a, %b
%res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
ret <4 x float> %res
}
; CHECK-LABEL: select_v4f32:
; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -375,6 +447,18 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
ret <2 x double> %res
}
; CHECK-LABEL: vselect_cmp_v2f64:
; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
<2 x double> %x, <2 x double> %y) {
%c = fcmp olt <2 x double> %a, %b
%res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
ret <2 x double> %res
}
; CHECK-LABEL: select_v2f64:
; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}