[WebAssembly] Bitselect intrinsic and instruction

Summary: Depends on D52755. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D52805 llvm-svn: 343739
2018-10-03 23:02:23 +00:00 · 2018-10-03 23:02:23 +00:00 · 5d461c96bd
parent f7868ec25b
commit 5d461c96bd
7 changed files with 209 additions and 7 deletions
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@ -91,6 +91,10 @@ def int_wasm_atomic_notify:
 // SIMD intrinsics
 //===----------------------------------------------------------------------===//

+def int_wasm_bitselect :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
 def int_wasm_anytrue :
  Intrinsic<[llvm_i32_ty],
            [llvm_anyvector_ty],
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@ -24,5 +24,6 @@ HANDLE_NODETYPE(BR_TABLE)
 HANDLE_NODETYPE(SHUFFLE)
 HANDLE_NODETYPE(ANYTRUE)
 HANDLE_NODETYPE(ALLTRUE)
+HANDLE_NODETYPE(BITSELECT)

 // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@ -965,6 +965,11 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  switch (IntNo) {
  default:
    return {}; // Don't custom lower most intrinsics.
+
+  case Intrinsic::wasm_bitselect:
+    return DAG.getNode(WebAssemblyISD::BITSELECT, DL, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
  case Intrinsic::wasm_anytrue:
  case Intrinsic::wasm_alltrue: {
    unsigned OpCode = IntNo == Intrinsic::wasm_anytrue
@ -972,6 +977,7 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          : WebAssemblyISD::ALLTRUE;
    return DAG.getNode(OpCode, DL, Op.getValueType(), Op.getOperand(1));
  }
+
  case Intrinsic::wasm_lsda:
    // TODO For now, just return 0 not to crash
    return DAG.getConstant(0, DL, Op.getValueType());
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@ -20,8 +20,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;

 // Custom nodes for custom operations
 def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
+def wasm_bitselect_t : SDTypeProfile<1, 3,
+  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]
+>;
 def wasm_reduce_t : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>;
 def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
+def wasm_bitselect : SDNode<"WebAssemblyISD::BITSELECT", wasm_bitselect_t>;
 def wasm_anytrue : SDNode<"WebAssemblyISD::ANYTRUE", wasm_reduce_t>;
 def wasm_alltrue : SDNode<"WebAssemblyISD::ALLTRUE", wasm_reduce_t>;

@ -193,6 +197,16 @@ multiclass SIMDNot<ValueType vec_t, PatFrag splat_pat, ValueType lane_t> {
                           )],
                           "v128.not\t$dst, $vec", "v128.not", 63>;
 }
+multiclass Bitselect<ValueType vec_t> {
+  defm BITSELECT_#vec_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
+           [(set (vec_t V128:$dst),
+             (vec_t (wasm_bitselect
+               (vec_t V128:$c), (vec_t V128:$v1), (vec_t V128:$v2)
+             ))
+           )],
+           "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 64>;
+}
 multiclass SIMDReduceVec<ValueType vec_t, string vec, string name, SDNode op,
                      bits<32> simdop> {
  defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
@ -380,6 +394,9 @@ defm "" : SIMDNot<v8i16, splat8, i32>;
 defm "" : SIMDNot<v4i32, splat4, i32>;
 defm "" : SIMDNot<v2i64, splat2, i64>;

+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+defm "" : Bitselect<vec_t>;
+
 defm ANYTRUE : SIMDReduce<"any_true", wasm_anytrue, 65>;
 defm ALLTRUE : SIMDReduce<"all_true", wasm_alltrue, 69>;

@ -443,6 +460,13 @@ def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;

 }

+// Bitselect is equivalent to (c & v1) | (~c & v2)
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+  def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
+              (and (vnot V128:$c), (vec_t V128:$v2)))),
+            (!cast<Instruction>("BITSELECT_"#vec_t)
+              V128:$v1, V128:$v2, V128:$c)>;
+
 // Lower float comparisons that don't care about NaN to standard
 // WebAssembly float comparisons. These instructions are generated in
 // the target-independent expansion of unordered comparisons and
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@ -1,9 +1,9 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM,SIMD128-VM-SLOW
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM,SIMD128-VM-FAST
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128,NO-SIMD128-SLOW
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128,NO-SIMD128-FAST

 ; Test that basic SIMD128 arithmetic operations assemble as expected.

@ -165,6 +165,27 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) {
  ret <16 x i8> %a
 }

+; CHECK-LABEL: bitselect_v16i8:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.or
+; SIMD128-FAST-NEXT: return
+define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+  %masked_v1 = and <16 x i8> %c, %v1
+  %inv_mask = xor <16 x i8> %c,
+    <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+     i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %masked_v2 = and <16 x i8> %inv_mask, %v2
+  %a = or <16 x i8> %masked_v1, %masked_v2
+  ret <16 x i8> %a
+}
+
 ; ==============================================================================
 ; 8 x i16
 ; ==============================================================================
@ -313,6 +334,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) {
  ret <8 x i16> %a
 }

+; CHECK-LABEL: bitselect_v8i16:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.or
+; SIMD128-FAST-NEXT: return
+define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+  %masked_v1 = and <8 x i16> %v1, %c
+  %inv_mask = xor <8 x i16>
+    <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
+    %c
+  %masked_v2 = and <8 x i16> %v2, %inv_mask
+  %a = or <8 x i16> %masked_v1, %masked_v2
+  ret <8 x i16> %a
+}
+
 ; ==============================================================================
 ; 4 x i32
 ; ==============================================================================
@ -458,6 +500,25 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) {
  ret <4 x i32> %a
 }

+; CHECK-LABEL: bitselect_v4i32:
+; NO-SIMD128-NOT: v128
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.or
+; SIMD128-FAST-NEXT: return
+define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+  %masked_v1 = and <4 x i32> %c, %v1
+  %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
+  %masked_v2 = and <4 x i32> %inv_mask, %v2
+  %a = or <4 x i32> %masked_v2, %masked_v1
+  ret <4 x i32> %a
+}
+
 ; ==============================================================================
 ; 2 x i64
 ; ==============================================================================
@ -653,6 +714,26 @@ define <2 x i64> @not_v2i64(<2 x i64> %x) {
  ret <2 x i64> %a
 }

+; CHECK-LABEL: bitselect_v2i64:
+; NO-SIMD128-NOT: v128
+; SIMD128-VM-NOT: v128
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
+; SIMD128-FAST-NEXT: v128.not
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.and
+; SIMD128-FAST-NEXT: v128.or
+; SIMD128-FAST-NEXT: return
+define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+  %masked_v1 = and <2 x i64> %v1, %c
+  %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
+  %masked_v2 = and <2 x i64> %v2, %inv_mask
+  %a = or <2 x i64> %masked_v2, %masked_v1
+  ret <2 x i64> %a
+}
+
 ; ==============================================================================
 ; 4 x float
 ; ==============================================================================
@ -761,7 +842,6 @@ define <2 x double> @abs_v2f64(<2 x double> %x) {
  ret <2 x double> %a
 }

-
 ; CHECK-LABEL: add_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-VM-NOT: f62x2
--- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@ -33,6 +33,19 @@ define i32 @all_v16i8(<16 x i8> %x) {
  ret i32 %a
 }

+; CHECK-LABEL: bitselect_v16i8:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <16 x i8> @llvm.wasm.bitselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
+  %a = call <16 x i8> @llvm.wasm.bitselect.v16i8(
+    <16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2
+  )
+  ret <16 x i8> %a
+}
+
 ; ==============================================================================
 ; 8 x i16
 ; ==============================================================================
@ -58,6 +71,19 @@ define i32 @all_v8i16(<8 x i16> %x) {
  ret i32 %a
 }

+; CHECK-LABEL: bitselect_v8i16:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <8 x i16> @llvm.wasm.bitselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
+  %a = call <8 x i16> @llvm.wasm.bitselect.v8i16(
+    <8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2
+  )
+  ret <8 x i16> %a
+}
+
 ; ==============================================================================
 ; 4 x i32
 ; ==============================================================================
@ -83,6 +109,19 @@ define i32 @all_v4i32(<4 x i32> %x) {
  ret i32 %a
 }

+; CHECK-LABEL: bitselect_v4i32:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
+  %a = call <4 x i32> @llvm.wasm.bitselect.v4i32(
+    <4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2
+  )
+  ret <4 x i32> %a
+}
+
 ; ==============================================================================
 ; 2 x i64
 ; ==============================================================================
@ -107,3 +146,48 @@ define i32 @all_v2i64(<2 x i64> %x) {
  %a = call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> %x)
  ret i32 %a
 }
+
+; CHECK-LABEL: bitselect_v2i64:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x i64> @llvm.wasm.bitselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
+  %a = call <2 x i64> @llvm.wasm.bitselect.v2i64(
+    <2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2
+  )
+  ret <2 x i64> %a
+}
+
+; ==============================================================================
+; 4 x f32
+; ==============================================================================
+; CHECK-LABEL: bitselect_v4f32:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.bitselect.v4f32(<4 x float>, <4 x float>, <4 x float>)
+define <4 x float> @bitselect_v4f32(<4 x float> %c, <4 x float> %v1, <4 x float> %v2) {
+  %a = call <4 x float> @llvm.wasm.bitselect.v4f32(
+    <4 x float> %c, <4 x float> %v1, <4 x float> %v2
+  )
+  ret <4 x float> %a
+}
+
+; ==============================================================================
+; 2 x f64
+; ==============================================================================
+; CHECK-LABEL: bitselect_v2f64:
+; SIMD128-NEXT: .param v128, v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.bitselect.v2f64(<2 x double>, <2 x double>, <2 x double>)
+define <2 x double> @bitselect_v2f64(<2 x double> %c, <2 x double> %v1, <2 x double> %v2) {
+  %a = call <2 x double> @llvm.wasm.bitselect.v2f64(
+    <2 x double> %c, <2 x double> %v1, <2 x double> %v2
+  )
+  ret <2 x double> %a
+}
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@ -193,6 +193,9 @@
    # CHECK: v128.not # encoding: [0xfd,0x3f]
    v128.not

+    # CHECK: v128.bitselect # encoding: [0xfd,0x40]
+    v128.bitselect
+
    # CHECK: i8x16.any_true # encoding: [0xfd,0x41]
    i8x16.any_true