forked from OSchip/llvm-project
[WebAssembly] Lower vselect to v128.bitselect
We were previously expanding vselect and matching on the expansion to generate bitselects, but in some cases the expansion would be further combined and a bitselect would not get generated. This patch improves codegen in those cases by legalizing vselect and lowering it to v128.bitselect. The old pattern that matches the expansion is still useful for lowering IR that already uses the expansion rather than a select operation. Differential Revision: https://reviews.llvm.org/D83734
This commit is contained in:
parent
9adf7461f7
commit
f0f9787646
|
@ -156,8 +156,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
|
|||
// There is no i8x16.mul instruction
|
||||
setOperationAction(ISD::MUL, MVT::v16i8, Expand);
|
||||
|
||||
// There are no vector select instructions
|
||||
for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
|
||||
// There is no vector conditional select instruction
|
||||
// TODO: Implement SELECT_V128
|
||||
for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
|
||||
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
|
||||
MVT::v2f64})
|
||||
setOperationAction(Op, T, Expand);
|
||||
|
|
|
@ -574,6 +574,16 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
|
|||
(!cast<Instruction>("BITSELECT_"#vec_t)
|
||||
V128:$v1, V128:$v2, V128:$c)>;
|
||||
|
||||
// Also implement vselect in terms of bitselect
|
||||
foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
|
||||
[v4f32, v4i32], [v2f64, v2i64]] in
|
||||
def : Pat<(types[0] (vselect
|
||||
(types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
|
||||
)),
|
||||
(!cast<Instruction>("BITSELECT_"#types[0])
|
||||
V128:$v1, V128:$v2, V128:$c
|
||||
)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer unary arithmetic
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -21,6 +21,18 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
|
|||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v16i8:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
|
||||
<16 x i8> %x, <16 x i8> %y) {
|
||||
%c = icmp slt <16 x i8> %a, %b
|
||||
%res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v16i8:
|
||||
; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
|
@ -91,6 +103,18 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
|
|||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v8i16:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
|
||||
<8 x i16> %x, <8 x i16> %y) {
|
||||
%c = icmp slt <8 x i16> %a, %b
|
||||
%res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v8i16:
|
||||
; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
|
@ -161,6 +185,17 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
|
|||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v4i32:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
|
||||
<4 x i32> %x, <4 x i32> %y) {
|
||||
%c = icmp slt <4 x i32> %a, %b
|
||||
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v4i32:
|
||||
; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
|
||||
|
@ -232,6 +267,31 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
|
|||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v2i64:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
|
||||
; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
|
||||
; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
|
||||
; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
|
||||
; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
|
||||
; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
|
||||
; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
|
||||
; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
|
||||
; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
|
||||
; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
|
||||
<2 x i64> %x, <2 x i64> %y) {
|
||||
%c = icmp slt <2 x i64> %a, %b
|
||||
%res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v2i64:
|
||||
; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
|
@ -305,6 +365,18 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
|
|||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v4f32:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
|
||||
<4 x float> %x, <4 x float> %y) {
|
||||
%c = fcmp olt <4 x float> %a, %b
|
||||
%res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v4f32:
|
||||
; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
|
@ -375,6 +447,18 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
|
|||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: vselect_cmp_v2f64:
|
||||
; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
|
||||
<2 x double> %x, <2 x double> %y) {
|
||||
%c = fcmp olt <2 x double> %a, %b
|
||||
%res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_v2f64:
|
||||
; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
|
||||
; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
|
||||
|
|
Loading…
Reference in New Issue