[WebAssembly] Add atomic.fence instruction

Summary: This adds `atomic.fence` instruction: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#fence-operator And we now emit the new `atomic.fence` instruction for multithread fences, rather than the prevous `atomic.rmw` hack. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, tlively, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66794 llvm-svn: 370272
2019-08-28 23:13:43 +00:00 · 2019-08-28 23:13:43 +00:00 · d85fd5a3f4
parent 5be949e3d0
commit d85fd5a3f4
5 changed files with 115 additions and 100 deletions
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@ -88,88 +88,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
    uint64_t SyncScopeID =
        cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
    MachineSDNode *Fence = nullptr;
    switch (SyncScopeID) {
-    case SyncScope::SingleThread: {
+    case SyncScope::SingleThread:
      // We lower a single-thread fence to a pseudo compiler barrier instruction
      // preventing instruction reordering. This will not be emitted in final
      // binary.
-      MachineSDNode *Fence =
+      Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
-          CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+                                     DL,                 // debug loc
-                                 DL,                 // debug loc
+                                     MVT::Other,         // outchain type
-                                 MVT::Other,         // outchain type
+                                     Node->getOperand(0) // inchain
-                                 Node->getOperand(0) // inchain
+      );
-          );
+      break;
-      ReplaceNode(Node, Fence);
+    case SyncScope::System:
-      CurDAG->RemoveDeadNode(Node);
+      // Currently wasm only supports sequentially consistent atomics, so we
-      return;
+      // always set the order to 0 (sequentially consistent).
-    }
+      Fence = CurDAG->getMachineNode(
-
+          WebAssembly::ATOMIC_FENCE,
-    case SyncScope::System: {
+          DL,                                         // debug loc
-      // For non-emscripten systems, we have not decided on what we should
+          MVT::Other,                                 // outchain type
-      // traslate fences to yet.
+          CurDAG->getTargetConstant(0, DL, MVT::i32), // order
-      if (!Subtarget->getTargetTriple().isOSEmscripten())
+          Node->getOperand(0)                         // inchain
-        report_fatal_error(
+      );
-            "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
+      break;
      // Wasm does not have a fence instruction, but because all atomic
      // instructions in wasm are sequentially consistent, we translate a
      // fence to an idempotent atomic RMW instruction to a linear memory
      // address. All atomic instructions in wasm are sequentially consistent,
      // but this is to ensure a fence also prevents reordering of non-atomic
      // instructions in the VM. Even though LLVM IR's fence instruction does
      // not say anything about its relationship with non-atomic instructions,
      // we think this is more user-friendly.
      //
      // While any address can work, here we use a value stored in
      // __stack_pointer wasm global because there's high chance that area is
      // in cache.
      //
      // So the selected instructions will be in the form of:
      //   %addr = get_global $__stack_pointer
      //   %0 = i32.const 0
      //   i32.atomic.rmw.or %addr, %0
      SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
          "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
      MachineSDNode *GetGlobal =
          CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
                                 DL,                          // debug loc
                                 MVT::i32,                    // result type
                                 StackPtrSym // __stack_pointer symbol
          );
      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
      auto *MMO = MF.getMachineMemOperand(
          MachinePointerInfo::getUnknownStack(MF),
          // FIXME Volatile isn't really correct, but currently all LLVM
          // atomic instructions are treated as volatiles in the backend, so
          // we should be consistent.
          MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
              MachineMemOperand::MOStore,
          4, 4, AAMDNodes(), nullptr, SyncScope::System,
          AtomicOrdering::SequentiallyConsistent);
      MachineSDNode *Const0 =
          CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
      MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
          WebAssembly::ATOMIC_RMW_OR_I32, // opcode
          DL,                             // debug loc
          MVT::i32,                       // result type
          MVT::Other,                     // outchain type
          {
              Zero,                  // alignment
              Zero,                  // offset
              SDValue(GetGlobal, 0), // __stack_pointer
              SDValue(Const0, 0),    // OR with 0 to make it idempotent
              Node->getOperand(0)    // inchain
          });
      CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
      ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
      CurDAG->RemoveDeadNode(Node);
      return;
    }
    default:
      llvm_unreachable("Unknown scope!");
    }
    ReplaceNode(Node, Fence);
    CurDAG->RemoveDeadNode(Node);
    return;
  }
  case ISD::GlobalTLSAddress: {
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@ -126,6 +126,19 @@ def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
 def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
 } // Predicates = [HasAtomics]
 //===----------------------------------------------------------------------===//
 // Atomic fences
 //===----------------------------------------------------------------------===//
 // A compiler fence instruction that prevents reordering of instructions.
 let Defs = [ARGUMENTS] in {
 let isPseudo = 1, hasSideEffects = 1 in
 defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
 let hasSideEffects = 1 in
 defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence",
                               0x03>;
 } // Defs = [ARGUMENTS]
 //===----------------------------------------------------------------------===//
 // Atomic loads
 //===----------------------------------------------------------------------===//
@ -887,13 +900,3 @@ defm : TerRMWTruncExtPattern<
  ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
  ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
  ATOMIC_RMW32_U_CMPXCHG_I64>;
 //===----------------------------------------------------------------------===//
 // Atomic fences
 //===----------------------------------------------------------------------===//
 // A compiler fence instruction that prevents reordering of instructions.
 let Defs = [ARGUMENTS] in {
 let isPseudo = 1, hasSideEffects = 1 in
 defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
 } // Defs = [ARGUMENTS]
--- a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll
+++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll
@ -1,19 +1,12 @@
 ; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
-; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
+; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s
 ; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
 ; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
-; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes
+; A multithread fence is lowered to an atomic.fence instruction.
 ; A multithread fence turns into 'global.get $__stack_pointer' followed by an
 ; idempotent atomicrmw instruction.
 ; CHECK-LABEL: multithread_fence:
-; CHECK:      global.get  $push[[SP:[0-9]+]]=, __stack_pointer
+; CHECK:  atomic.fence
 ; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
 ; CHECK-NEXT: i32.atomic.rmw.or  $drop=, 0($pop[[SP]]), $pop[[ZERO]]
 ; NOATOMIC-NOT: i32.atomic.rmw.or
 define void @multithread_fence() {
  fence seq_cst
@ -23,10 +16,9 @@ define void @multithread_fence() {
 ; Fences with weaker memory orderings than seq_cst should be treated the same
 ; because atomic memory access in wasm are sequentially consistent.
 ; CHECK-LABEL: multithread_weak_fence:
-; CHECK:  global.get  $push{{.+}}=, __stack_pointer
+; CHECK:       atomic.fence
-; CHECK:  i32.atomic.rmw.or
+; CHECK-NEXT:  atomic.fence
-; CHECK:  i32.atomic.rmw.or
+; CHECK-NEXT:  atomic.fence
 ; CHECK:  i32.atomic.rmw.or
 define void @multithread_weak_fence() {
  fence acquire
  fence release
@ -37,7 +29,8 @@ define void @multithread_weak_fence() {
 ; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
 ; that acts as a compiler barrier. The barrier should not be emitted to .s file.
 ; CHECK-LABEL: singlethread_fence:
-; CHECK-NOT:  compiler_fence
+; CHECK-NOT: compiler_fence
 ; CHECK-NOT: atomic_fence
 define void @singlethread_fence() {
  fence syncscope("singlethread") seq_cst
  fence syncscope("singlethread") acquire
--- a/llvm/test/CodeGen/WebAssembly/atomic-fence.mir
+++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.mir
@ -0,0 +1,68 @@
 # RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-reg-stackify -run-pass wasm-explicit-locals %s -o - | FileCheck %s
 # In the two tests below, without compiler_fence or atomic.fence in between,
 # atomic.notify and i32.add will be reordered by register stackify pass to meet
 # 'call @foo''s requirements. But because we have fences between atomic.notify
 # and i32.add, they cannot be reordered, and local.set and local.get are
 # inserted to save and load atomic.notify's return value.
 --- |
  target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
  target triple = "wasm32-unknown-unknown"
  declare void @foo(i32, i32)
  define void @compiler_fence_test(i32) {
    ret void
  }
  define void @atomic_fence_test(i32) {
    ret void
  }
 ...
 ---
 # CHECK-LABEL: name: compiler_fence_test
 name: compiler_fence_test
 liveins:
  - { reg: '$arguments' }
 tracksRegLiveness: true
 body: |
  bb.0:
    ; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
    ; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
    ; CHECK: COMPILER_FENCE
    ; CHECK: ADD_I32
    ; CHECK: LOCAL_GET_I32 [[LOCAL]]
    ; CHECK: CALL_VOID @foo
    liveins: $arguments
    %0:i32 = CONST_I32 0, implicit-def $arguments
    %1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
    COMPILER_FENCE implicit-def $arguments
    %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
    CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
    RETURN_VOID implicit-def $arguments
 ...
 ---
 # CHECK-LABEL: name: atomic_fence_test
 name: atomic_fence_test
 liveins:
  - { reg: '$arguments' }
 tracksRegLiveness: true
 body: |
  bb.0:
    ; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
    ; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
    ; CHECK: ATOMIC_FENCE
    ; CHECK: ADD_I32
    ; CHECK: LOCAL_GET_I32 [[LOCAL]]
    ; CHECK: CALL_VOID @foo
    liveins: $arguments
    %0:i32 = CONST_I32 0, implicit-def $arguments
    %1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
    ATOMIC_FENCE 0, implicit-def $arguments
    %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
    CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
    RETURN_VOID implicit-def $arguments
 ...
--- a/llvm/test/MC/WebAssembly/atomics-encodings.s
+++ b/llvm/test/MC/WebAssembly/atomics-encodings.s
@ -10,6 +10,9 @@ main:
  # CHECK:  i64.atomic.wait 0 # encoding: [0xfe,0x02,0x03,0x00]
  i64.atomic.wait 0
  # CHECK: atomic.fence # encoding: [0xfe,0x03,0x00]
  atomic.fence
  # CHECK: i32.atomic.load 0 # encoding: [0xfe,0x10,0x02,0x00]
  i32.atomic.load 0
  # CHECK: i64.atomic.load 4 # encoding: [0xfe,0x11,0x03,0x04]