[WebAssembly] Lower vselect to v128.bitselect

We were previously expanding vselect and matching on the expansion to generate bitselects, but in some cases the expansion would be further combined and a bitselect would not get generated. This patch improves codegen in those cases by legalizing vselect and lowering it to v128.bitselect. The old pattern that matches the expansion is still useful for lowering IR that already uses the expansion rather than a select operation. Differential Revision: https://reviews.llvm.org/D83734
2020-07-16 11:11:19 -07:00 · 2020-07-16 11:11:19 -07:00 · f0f9787646
parent 9adf7461f7
commit f0f9787646
3 changed files with 97 additions and 2 deletions
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@ -156,8 +156,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
    // There is no i8x16.mul instruction
    setOperationAction(ISD::MUL, MVT::v16i8, Expand);

-    // There are no vector select instructions
-    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
+    // There is no vector conditional select instruction
+    // TODO: Implement SELECT_V128
+    for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
                     MVT::v2f64})
        setOperationAction(Op, T, Expand);
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@ -574,6 +574,16 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
            (!cast<Instruction>("BITSELECT_"#vec_t)
              V128:$v1, V128:$v2, V128:$c)>;

+// Also implement vselect in terms of bitselect
+foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
+                 [v4f32, v4i32], [v2f64, v2i64]] in
+  def : Pat<(types[0] (vselect
+              (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
+            )),
+            (!cast<Instruction>("BITSELECT_"#types[0])
+              V128:$v1, V128:$v2, V128:$c
+            )>;
+
 //===----------------------------------------------------------------------===//
 // Integer unary arithmetic
 //===----------------------------------------------------------------------===//
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@ -21,6 +21,18 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
  ret <16 x i8> %res
 }

+; CHECK-LABEL: vselect_cmp_v16i8:
+; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
+                                    <16 x i8> %x, <16 x i8> %y) {
+  %c = icmp slt <16 x i8> %a, %b
+  %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
+  ret <16 x i8> %res
+}
+
 ; CHECK-LABEL: select_v16i8:
 ; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -91,6 +103,18 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
  ret <8 x i16> %res
 }

+; CHECK-LABEL: vselect_cmp_v8i16:
+; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
+                                           <8 x i16> %x, <8 x i16> %y) {
+  %c = icmp slt <8 x i16> %a, %b
+  %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %res
+}
+
 ; CHECK-LABEL: select_v8i16:
 ; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -161,6 +185,17 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
  ret <4 x i32> %res
 }

+; CHECK-LABEL: vselect_cmp_v4i32:
+; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
+                                    <4 x i32> %x, <4 x i32> %y) {
+  %c = icmp slt <4 x i32> %a, %b
+  %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %res
+}

 ; CHECK-LABEL: select_v4i32:
 ; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
@ -232,6 +267,31 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
  ret <2 x i64> %res
 }

+; CHECK-LABEL: vselect_cmp_v2i64:
+; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
+; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
+; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
+; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
+; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
+                                    <2 x i64> %x, <2 x i64> %y) {
+  %c = icmp slt <2 x i64> %a, %b
+  %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %res
+}
+
 ; CHECK-LABEL: select_v2i64:
 ; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -305,6 +365,18 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
  ret <4 x float> %res
 }

+; CHECK-LABEL: vselect_cmp_v4f32:
+; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
+                                      <4 x float> %x, <4 x float> %y) {
+  %c = fcmp olt <4 x float> %a, %b
+  %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %res
+}
+
 ; CHECK-LABEL: select_v4f32:
 ; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@ -375,6 +447,18 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
  ret <2 x double> %res
 }

+; CHECK-LABEL: vselect_cmp_v2f64:
+; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
+                                       <2 x double> %x, <2 x double> %y) {
+  %c = fcmp olt <2 x double> %a, %b
+  %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %res
+}
+
 ; CHECK-LABEL: select_v2f64:
 ; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}