forked from OSchip/llvm-project
[WebAssembly] Add atomic.fence instruction
Summary: This adds `atomic.fence` instruction: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#fence-operator And we now emit the new `atomic.fence` instruction for multithread fences, rather than the prevous `atomic.rmw` hack. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, tlively, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66794 llvm-svn: 370272
This commit is contained in:
parent
5be949e3d0
commit
d85fd5a3f4
|
@ -88,88 +88,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
|
||||||
|
|
||||||
uint64_t SyncScopeID =
|
uint64_t SyncScopeID =
|
||||||
cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
|
cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
|
||||||
|
MachineSDNode *Fence = nullptr;
|
||||||
switch (SyncScopeID) {
|
switch (SyncScopeID) {
|
||||||
case SyncScope::SingleThread: {
|
case SyncScope::SingleThread:
|
||||||
// We lower a single-thread fence to a pseudo compiler barrier instruction
|
// We lower a single-thread fence to a pseudo compiler barrier instruction
|
||||||
// preventing instruction reordering. This will not be emitted in final
|
// preventing instruction reordering. This will not be emitted in final
|
||||||
// binary.
|
// binary.
|
||||||
MachineSDNode *Fence =
|
Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
|
||||||
CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
|
DL, // debug loc
|
||||||
DL, // debug loc
|
MVT::Other, // outchain type
|
||||||
MVT::Other, // outchain type
|
Node->getOperand(0) // inchain
|
||||||
Node->getOperand(0) // inchain
|
);
|
||||||
);
|
break;
|
||||||
ReplaceNode(Node, Fence);
|
case SyncScope::System:
|
||||||
CurDAG->RemoveDeadNode(Node);
|
// Currently wasm only supports sequentially consistent atomics, so we
|
||||||
return;
|
// always set the order to 0 (sequentially consistent).
|
||||||
}
|
Fence = CurDAG->getMachineNode(
|
||||||
|
WebAssembly::ATOMIC_FENCE,
|
||||||
case SyncScope::System: {
|
DL, // debug loc
|
||||||
// For non-emscripten systems, we have not decided on what we should
|
MVT::Other, // outchain type
|
||||||
// traslate fences to yet.
|
CurDAG->getTargetConstant(0, DL, MVT::i32), // order
|
||||||
if (!Subtarget->getTargetTriple().isOSEmscripten())
|
Node->getOperand(0) // inchain
|
||||||
report_fatal_error(
|
);
|
||||||
"ATOMIC_FENCE is not yet supported in non-emscripten OSes");
|
break;
|
||||||
|
|
||||||
// Wasm does not have a fence instruction, but because all atomic
|
|
||||||
// instructions in wasm are sequentially consistent, we translate a
|
|
||||||
// fence to an idempotent atomic RMW instruction to a linear memory
|
|
||||||
// address. All atomic instructions in wasm are sequentially consistent,
|
|
||||||
// but this is to ensure a fence also prevents reordering of non-atomic
|
|
||||||
// instructions in the VM. Even though LLVM IR's fence instruction does
|
|
||||||
// not say anything about its relationship with non-atomic instructions,
|
|
||||||
// we think this is more user-friendly.
|
|
||||||
//
|
|
||||||
// While any address can work, here we use a value stored in
|
|
||||||
// __stack_pointer wasm global because there's high chance that area is
|
|
||||||
// in cache.
|
|
||||||
//
|
|
||||||
// So the selected instructions will be in the form of:
|
|
||||||
// %addr = get_global $__stack_pointer
|
|
||||||
// %0 = i32.const 0
|
|
||||||
// i32.atomic.rmw.or %addr, %0
|
|
||||||
SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
|
|
||||||
"__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
|
|
||||||
MachineSDNode *GetGlobal =
|
|
||||||
CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
|
|
||||||
DL, // debug loc
|
|
||||||
MVT::i32, // result type
|
|
||||||
StackPtrSym // __stack_pointer symbol
|
|
||||||
);
|
|
||||||
|
|
||||||
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
||||||
auto *MMO = MF.getMachineMemOperand(
|
|
||||||
MachinePointerInfo::getUnknownStack(MF),
|
|
||||||
// FIXME Volatile isn't really correct, but currently all LLVM
|
|
||||||
// atomic instructions are treated as volatiles in the backend, so
|
|
||||||
// we should be consistent.
|
|
||||||
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
|
|
||||||
MachineMemOperand::MOStore,
|
|
||||||
4, 4, AAMDNodes(), nullptr, SyncScope::System,
|
|
||||||
AtomicOrdering::SequentiallyConsistent);
|
|
||||||
MachineSDNode *Const0 =
|
|
||||||
CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
|
|
||||||
MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
|
|
||||||
WebAssembly::ATOMIC_RMW_OR_I32, // opcode
|
|
||||||
DL, // debug loc
|
|
||||||
MVT::i32, // result type
|
|
||||||
MVT::Other, // outchain type
|
|
||||||
{
|
|
||||||
Zero, // alignment
|
|
||||||
Zero, // offset
|
|
||||||
SDValue(GetGlobal, 0), // __stack_pointer
|
|
||||||
SDValue(Const0, 0), // OR with 0 to make it idempotent
|
|
||||||
Node->getOperand(0) // inchain
|
|
||||||
});
|
|
||||||
|
|
||||||
CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
|
|
||||||
ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
|
|
||||||
CurDAG->RemoveDeadNode(Node);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown scope!");
|
llvm_unreachable("Unknown scope!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReplaceNode(Node, Fence);
|
||||||
|
CurDAG->RemoveDeadNode(Node);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ISD::GlobalTLSAddress: {
|
case ISD::GlobalTLSAddress: {
|
||||||
|
|
|
@ -126,6 +126,19 @@ def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
|
||||||
def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
|
def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
|
||||||
} // Predicates = [HasAtomics]
|
} // Predicates = [HasAtomics]
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Atomic fences
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// A compiler fence instruction that prevents reordering of instructions.
|
||||||
|
let Defs = [ARGUMENTS] in {
|
||||||
|
let isPseudo = 1, hasSideEffects = 1 in
|
||||||
|
defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
|
||||||
|
let hasSideEffects = 1 in
|
||||||
|
defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence",
|
||||||
|
0x03>;
|
||||||
|
} // Defs = [ARGUMENTS]
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Atomic loads
|
// Atomic loads
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -887,13 +900,3 @@ defm : TerRMWTruncExtPattern<
|
||||||
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
|
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
|
||||||
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
|
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
|
||||||
ATOMIC_RMW32_U_CMPXCHG_I64>;
|
ATOMIC_RMW32_U_CMPXCHG_I64>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Atomic fences
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// A compiler fence instruction that prevents reordering of instructions.
|
|
||||||
let Defs = [ARGUMENTS] in {
|
|
||||||
let isPseudo = 1, hasSideEffects = 1 in
|
|
||||||
defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
|
|
||||||
} // Defs = [ARGUMENTS]
|
|
||||||
|
|
|
@ -1,19 +1,12 @@
|
||||||
; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
|
; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
|
||||||
; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
|
; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s
|
||||||
; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
|
|
||||||
; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
|
|
||||||
|
|
||||||
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||||
target triple = "wasm32-unknown-unknown"
|
target triple = "wasm32-unknown-unknown"
|
||||||
|
|
||||||
; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes
|
; A multithread fence is lowered to an atomic.fence instruction.
|
||||||
|
|
||||||
; A multithread fence turns into 'global.get $__stack_pointer' followed by an
|
|
||||||
; idempotent atomicrmw instruction.
|
|
||||||
; CHECK-LABEL: multithread_fence:
|
; CHECK-LABEL: multithread_fence:
|
||||||
; CHECK: global.get $push[[SP:[0-9]+]]=, __stack_pointer
|
; CHECK: atomic.fence
|
||||||
; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
|
|
||||||
; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop[[SP]]), $pop[[ZERO]]
|
|
||||||
; NOATOMIC-NOT: i32.atomic.rmw.or
|
; NOATOMIC-NOT: i32.atomic.rmw.or
|
||||||
define void @multithread_fence() {
|
define void @multithread_fence() {
|
||||||
fence seq_cst
|
fence seq_cst
|
||||||
|
@ -23,10 +16,9 @@ define void @multithread_fence() {
|
||||||
; Fences with weaker memory orderings than seq_cst should be treated the same
|
; Fences with weaker memory orderings than seq_cst should be treated the same
|
||||||
; because atomic memory access in wasm are sequentially consistent.
|
; because atomic memory access in wasm are sequentially consistent.
|
||||||
; CHECK-LABEL: multithread_weak_fence:
|
; CHECK-LABEL: multithread_weak_fence:
|
||||||
; CHECK: global.get $push{{.+}}=, __stack_pointer
|
; CHECK: atomic.fence
|
||||||
; CHECK: i32.atomic.rmw.or
|
; CHECK-NEXT: atomic.fence
|
||||||
; CHECK: i32.atomic.rmw.or
|
; CHECK-NEXT: atomic.fence
|
||||||
; CHECK: i32.atomic.rmw.or
|
|
||||||
define void @multithread_weak_fence() {
|
define void @multithread_weak_fence() {
|
||||||
fence acquire
|
fence acquire
|
||||||
fence release
|
fence release
|
||||||
|
@ -37,7 +29,8 @@ define void @multithread_weak_fence() {
|
||||||
; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
|
; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
|
||||||
; that acts as a compiler barrier. The barrier should not be emitted to .s file.
|
; that acts as a compiler barrier. The barrier should not be emitted to .s file.
|
||||||
; CHECK-LABEL: singlethread_fence:
|
; CHECK-LABEL: singlethread_fence:
|
||||||
; CHECK-NOT: compiler_fence
|
; CHECK-NOT: compiler_fence
|
||||||
|
; CHECK-NOT: atomic_fence
|
||||||
define void @singlethread_fence() {
|
define void @singlethread_fence() {
|
||||||
fence syncscope("singlethread") seq_cst
|
fence syncscope("singlethread") seq_cst
|
||||||
fence syncscope("singlethread") acquire
|
fence syncscope("singlethread") acquire
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-reg-stackify -run-pass wasm-explicit-locals %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
# In the two tests below, without compiler_fence or atomic.fence in between,
|
||||||
|
# atomic.notify and i32.add will be reordered by register stackify pass to meet
|
||||||
|
# 'call @foo''s requirements. But because we have fences between atomic.notify
|
||||||
|
# and i32.add, they cannot be reordered, and local.set and local.get are
|
||||||
|
# inserted to save and load atomic.notify's return value.
|
||||||
|
|
||||||
|
--- |
|
||||||
|
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||||
|
target triple = "wasm32-unknown-unknown"
|
||||||
|
|
||||||
|
declare void @foo(i32, i32)
|
||||||
|
define void @compiler_fence_test(i32) {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @atomic_fence_test(i32) {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
...
|
||||||
|
---
|
||||||
|
# CHECK-LABEL: name: compiler_fence_test
|
||||||
|
name: compiler_fence_test
|
||||||
|
liveins:
|
||||||
|
- { reg: '$arguments' }
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
|
||||||
|
; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
|
||||||
|
; CHECK: COMPILER_FENCE
|
||||||
|
; CHECK: ADD_I32
|
||||||
|
; CHECK: LOCAL_GET_I32 [[LOCAL]]
|
||||||
|
; CHECK: CALL_VOID @foo
|
||||||
|
|
||||||
|
liveins: $arguments
|
||||||
|
%0:i32 = CONST_I32 0, implicit-def $arguments
|
||||||
|
%1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
|
||||||
|
COMPILER_FENCE implicit-def $arguments
|
||||||
|
%2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
|
||||||
|
CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
|
||||||
|
RETURN_VOID implicit-def $arguments
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
# CHECK-LABEL: name: atomic_fence_test
|
||||||
|
name: atomic_fence_test
|
||||||
|
liveins:
|
||||||
|
- { reg: '$arguments' }
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
|
||||||
|
; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
|
||||||
|
; CHECK: ATOMIC_FENCE
|
||||||
|
; CHECK: ADD_I32
|
||||||
|
; CHECK: LOCAL_GET_I32 [[LOCAL]]
|
||||||
|
; CHECK: CALL_VOID @foo
|
||||||
|
|
||||||
|
liveins: $arguments
|
||||||
|
%0:i32 = CONST_I32 0, implicit-def $arguments
|
||||||
|
%1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
|
||||||
|
ATOMIC_FENCE 0, implicit-def $arguments
|
||||||
|
%2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
|
||||||
|
CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
|
||||||
|
RETURN_VOID implicit-def $arguments
|
||||||
|
...
|
||||||
|
|
|
@ -10,6 +10,9 @@ main:
|
||||||
# CHECK: i64.atomic.wait 0 # encoding: [0xfe,0x02,0x03,0x00]
|
# CHECK: i64.atomic.wait 0 # encoding: [0xfe,0x02,0x03,0x00]
|
||||||
i64.atomic.wait 0
|
i64.atomic.wait 0
|
||||||
|
|
||||||
|
# CHECK: atomic.fence # encoding: [0xfe,0x03,0x00]
|
||||||
|
atomic.fence
|
||||||
|
|
||||||
# CHECK: i32.atomic.load 0 # encoding: [0xfe,0x10,0x02,0x00]
|
# CHECK: i32.atomic.load 0 # encoding: [0xfe,0x10,0x02,0x00]
|
||||||
i32.atomic.load 0
|
i32.atomic.load 0
|
||||||
# CHECK: i64.atomic.load 4 # encoding: [0xfe,0x11,0x03,0x04]
|
# CHECK: i64.atomic.load 4 # encoding: [0xfe,0x11,0x03,0x04]
|
||||||
|
|
Loading…
Reference in New Issue