[WebAssembly] Add the rest of the atomic loads

Add extending loads and constant offset patterns
A bit more refactoring of the tablegen to make the patterns fairly nice and
uniform between the regular and atomic loads.

Differential Revision: https://reviews.llvm.org/D38523

llvm-svn: 315022
This commit is contained in:
Derek Schuff 2017-10-05 21:18:42 +00:00
parent 7ac2db6a48
commit 885dc59297
9 changed files with 658 additions and 74 deletions

View File

@ -111,6 +111,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::LOAD8_U_I32:
case WebAssembly::LOAD8_S_I64:
case WebAssembly::LOAD8_U_I64:
case WebAssembly::ATOMIC_LOAD8_U_I32:
case WebAssembly::ATOMIC_LOAD8_U_I64:
case WebAssembly::STORE8_I32:
case WebAssembly::STORE8_I64:
return 0;
@ -118,6 +120,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::LOAD16_U_I32:
case WebAssembly::LOAD16_S_I64:
case WebAssembly::LOAD16_U_I64:
case WebAssembly::ATOMIC_LOAD16_U_I32:
case WebAssembly::ATOMIC_LOAD16_U_I64:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE16_I64:
return 1;
@ -129,11 +133,13 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::LOAD32_U_I64:
case WebAssembly::STORE32_I64:
case WebAssembly::ATOMIC_LOAD_I32:
case WebAssembly::ATOMIC_LOAD32_U_I64:
return 2;
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_F64:
case WebAssembly::STORE_I64:
case WebAssembly::STORE_F64:
case WebAssembly::ATOMIC_LOAD_I64:
return 3;
default:
llvm_unreachable("Only loads and stores have p2align values");

View File

@ -17,19 +17,180 @@
//===----------------------------------------------------------------------===//
let Defs = [ARGUMENTS] in {
// TODO: add the rest of the atomic loads
def ATOMIC_LOAD_I32 : CLoadI32<"i32.atomic.load", 0xfe10>;
def ATOMIC_LOAD_I64 : CLoadI64<"i64.atomic.load", 0xfe11>;
def ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
def ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
} // Defs = [ARGUMENTS]
// Select loads with no constant offset.
let Predicates = [HasAtomics] in {
class ALoadPatNoOffset<ValueType ty, SDNode node, I inst> :
Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>;
def : ALoadPatNoOffset<i32, atomic_load, ATOMIC_LOAD_I32>;
def : ALoadPatNoOffset<i64, atomic_load, ATOMIC_LOAD_I64>;
def : LoadPatNoOffset<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatNoOffset<i64, atomic_load_64, ATOMIC_LOAD_I64>;
}
// Select loads with a constant offset.
// Pattern with address + immediate offset
def : LoadPatImmOff<i32, atomic_load_32, regPlusImm, ATOMIC_LOAD_I32>;
def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>;
def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>;
def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
def : LoadPatGlobalAddr<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatGlobalAddr<i64, atomic_load_64, ATOMIC_LOAD_I64>;
def : LoadPatExternalSym<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatExternalSym<i64, atomic_load_64, ATOMIC_LOAD_I64>;
// Select loads with just a constant offset.
def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
def : LoadPatExternSymOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
} // Predicates = [HasAtomics]
// Extending loads. Note that there are only zero-extending atomic loads, no
// sign-extending loads.
let Defs = [ARGUMENTS] in {
def ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
def ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
def ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
def ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
def ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
} // Defs = [ARGUMENTS]
// Fragments for exending loads. These are different from regular loads because
// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
// therefore don't have the extension type field. So instead of matching that,
// we match the patterns that the type legalizer expands them to.
// We directly match zext patterns and select the zext atomic loads.
// i32 (zext (i8 (atomic_load_8))) gets legalized to
// i32 (and (i32 (atomic_load_8)), 255)
// These can be selected to a single zero-extending atomic load instruction.
def zext_aload_8 : PatFrag<(ops node:$addr),
(and (i32 (atomic_load_8 node:$addr)), 255)>;
def zext_aload_16 : PatFrag<(ops node:$addr),
(and (i32 (atomic_load_16 node:$addr)), 65535)>;
// Unlike regular loads, extension to i64 is handled differently than i32.
// i64 (zext (i8 (atomic_load_8))) gets legalized to
// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255)
def zext_aload_8_64 :
PatFrag<(ops node:$addr),
(and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>;
def zext_aload_16_64 :
PatFrag<(ops node:$addr),
(and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>;
def zext_aload_32_64 :
PatFrag<(ops node:$addr),
(zext (i32 (atomic_load node:$addr)))>;
// We don't have single sext atomic load instructions. So for sext loads, we
// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit
// results) and select a zext load; the next instruction will be sext_inreg
// which is selected by itself.
def anyext_aload_8_64 :
PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>;
def anyext_aload_16_64 :
PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;
let Predicates = [HasAtomics] in {
// Select zero-extending loads with no constant offset.
def : LoadPatNoOffset<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatNoOffset<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatNoOffset<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatNoOffset<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatNoOffset<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
// Select sign-extending loads with no constant offset
def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatNoOffset<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatNoOffset<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64
// Zero-extending loads with constant offset
def : LoadPatImmOff<i32, zext_aload_8, regPlusImm, ATOMIC_LOAD8_U_I32>;
def : LoadPatImmOff<i32, zext_aload_16, regPlusImm, ATOMIC_LOAD16_U_I32>;
def : LoadPatImmOff<i32, zext_aload_8, or_is_add, ATOMIC_LOAD8_U_I32>;
def : LoadPatImmOff<i32, zext_aload_16, or_is_add, ATOMIC_LOAD16_U_I32>;
def : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
def : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
def : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, ATOMIC_LOAD32_U_I64>;
def : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
def : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
def : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, ATOMIC_LOAD32_U_I64>;
// Sign-extending loads with constant offset
def : LoadPatImmOff<i32, atomic_load_8, regPlusImm, ATOMIC_LOAD8_U_I32>;
def : LoadPatImmOff<i32, atomic_load_16, regPlusImm, ATOMIC_LOAD16_U_I32>;
def : LoadPatImmOff<i32, atomic_load_8, or_is_add, ATOMIC_LOAD8_U_I32>;
def : LoadPatImmOff<i32, atomic_load_16, or_is_add, ATOMIC_LOAD16_U_I32>;
def : LoadPatImmOff<i64, anyext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
def : LoadPatImmOff<i64, anyext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
def : LoadPatImmOff<i64, anyext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
def : LoadPatImmOff<i64, anyext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
def : LoadPatGlobalAddr<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatGlobalAddr<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddr<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddr<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatGlobalAddr<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
def : LoadPatGlobalAddr<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatGlobalAddr<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddr<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddr<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatExternalSym<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatExternalSym<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatExternalSym<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatExternalSym<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatExternalSym<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
def : LoadPatExternalSym<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatExternalSym<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatExternalSym<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatExternalSym<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
// Extending loads with just a constant offset
def : LoadPatOffsetOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatOffsetOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatOffsetOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatOffsetOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatOffsetOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
def : LoadPatOffsetOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatOffsetOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatOffsetOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatOffsetOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatGlobalAddrOffOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatGlobalAddrOffOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
def : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddrOffOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatExternSymOffOnly<i32, zext_aload_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatExternSymOffOnly<i32, zext_aload_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatExternSymOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatExternSymOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
def : LoadPatExternSymOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
def : LoadPatExternSymOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
def : LoadPatExternSymOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatExternSymOffOnly<i64, anyext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatExternSymOffOnly<i64, anyext_aload_16_64, ATOMIC_LOAD16_U_I64>;
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic stores

View File

@ -55,28 +55,19 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
let Defs = [ARGUMENTS] in {
// Classes to define both atomic and non-atomic integer loads
class CLoadI32<string Name, int Opcode> :
I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>;
class CLoadI64<string Name, int Opcode> :
I<(outs I64:$dst),
// Defines atomic and non-atomic loads, regular and extending.
class WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> :
I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>;
// Basic load.
// FIXME: When we can break syntax compatibility, reorder the fields in the
// asmstrings to match the binary encoding.
def LOAD_I32 : CLoadI32<"i32.load", 0x28>;
def LOAD_I64 : CLoadI64<"i64.load", 0x29>;
def LOAD_F32 : I<(outs F32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
def LOAD_F64 : I<(outs F64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
def LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28>;
def LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
def LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
def LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
} // Defs = [ARGUMENTS]
@ -153,36 +144,16 @@ def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
let Defs = [ARGUMENTS] in {
// Extending load.
def LOAD8_S_I32 : I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
def LOAD8_U_I32 : I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
def LOAD16_S_I32 : I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
def LOAD16_U_I32 : I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
def LOAD8_S_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
def LOAD8_U_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
def LOAD16_S_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
def LOAD16_U_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
def LOAD32_S_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
def LOAD32_U_I64 : I<(outs I64:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
[], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
def LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
def LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
def LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e>;
def LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f>;
def LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30>;
def LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31>;
def LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32>;
def LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x32>;
def LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
def LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
} // Defs = [ARGUMENTS]
@ -290,7 +261,6 @@ def : LoadPatNoOffset<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatNoOffset<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatNoOffset<i64, extloadi32, LOAD32_U_I64>;
// Select "don't care" extending loads with a constant offset.
def : LoadPatImmOff<i32, extloadi8, regPlusImm, LOAD8_U_I32>;
def : LoadPatImmOff<i32, extloadi16, regPlusImm, LOAD16_U_I32>;
@ -313,7 +283,6 @@ def : LoadPatExternalSym<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatExternalSym<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatExternalSym<i64, extloadi32, LOAD32_U_I64>;
// Select "don't care" extending loads with just a constant offset.
def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
def : LoadPatOffsetOnly<i32, extloadi16, LOAD16_U_I32>;

View File

@ -97,6 +97,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::LOAD32_S_I64:
case WebAssembly::LOAD32_U_I64:
case WebAssembly::ATOMIC_LOAD_I32:
case WebAssembly::ATOMIC_LOAD8_U_I32:
case WebAssembly::ATOMIC_LOAD16_U_I32:
case WebAssembly::ATOMIC_LOAD_I64:
case WebAssembly::ATOMIC_LOAD8_U_I64:
case WebAssembly::ATOMIC_LOAD16_U_I64:
case WebAssembly::ATOMIC_LOAD32_U_I64:
RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
break;
case WebAssembly::STORE_I32:

View File

@ -1,16 +0,0 @@
; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics | FileCheck %s
; Test that atomic loads are assembled properly.
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"
; CHECK-LABEL: load_i32_atomic:
; CHECK: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @load_i32_atomic(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 4
ret i32 %v
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
; Test loads and stores with custom alignment values.
@ -210,3 +210,29 @@ define void @sti16_a4(i16 *%p, i16 %v) {
store i16 %v, i16* %p, align 4
ret void
}
; Atomics.
; Wasm atomics have the alignment field, but it must always have the
; type's natural alignment.
; CHECK-LABEL: ldi32_atomic_a4:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @ldi32_atomic_a4(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 4
ret i32 %v
}
; 8 is greater than the default alignment so it is rounded down to 4
; CHECK-LABEL: ldi32_atomic_a8:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @ldi32_atomic_a8(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 8
ret i32 %v
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
; Test loads and stores with custom alignment values.
@ -323,3 +323,26 @@ define void @sti32_a8(i32 *%p, i64 %w) {
store i32 %v, i32* %p, align 8
ret void
}
; Atomics.
; CHECK-LABEL: ldi64_atomic_a8:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i64{{$}}
; CHECK-NEXT: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @ldi64_atomic_a8(i64 *%p) {
%v = load atomic i64, i64* %p seq_cst, align 8
ret i64 %v
}
; 16 is greater than the default alignment so it is ignored.
; CHECK-LABEL: ldi64_atomic_a16:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i64{{$}}
; CHECK-NEXT: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @ldi64_atomic_a16(i64 *%p) {
%v = load atomic i64, i64* %p seq_cst, align 16
ret i64 %v
}

View File

@ -0,0 +1,102 @@
; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s
; Test that extending loads are assembled properly.
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"
; CHECK-LABEL: sext_i8_i32:
; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}}
; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i32 @sext_i8_i32(i8 *%p) {
%v = load atomic i8, i8* %p seq_cst, align 1
%e = sext i8 %v to i32
ret i32 %e
}
; CHECK-LABEL: zext_i8_i32:
; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i8_i32(i8 *%p) {
e1:
%v = load atomic i8, i8* %p seq_cst, align 1
%e = zext i8 %v to i32
ret i32 %e
}
; CHECK-LABEL: sext_i16_i32:
; CHECK: i32.atomic.load16_u $push0=, 0($0){{$}}
; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i32 @sext_i16_i32(i16 *%p) {
%v = load atomic i16, i16* %p seq_cst, align 2
%e = sext i16 %v to i32
ret i32 %e
}
; CHECK-LABEL: zext_i16_i32:
; CHECK: i32.atomic.load16_u $push0=, 0($0){{$}}
; CHECK-NEXT: return $pop0{{$}}
define i32 @zext_i16_i32(i16 *%p) {
%v = load atomic i16, i16* %p seq_cst, align 2
%e = zext i16 %v to i32
ret i32 %e
}
; CHECK-LABEL: sext_i8_i64:
; CHECK: i64.atomic.load8_u $push0=, 0($0){{$}}
; CHECK: i64.extend8_s $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i64 @sext_i8_i64(i8 *%p) {
%v = load atomic i8, i8* %p seq_cst, align 1
%e = sext i8 %v to i64
ret i64 %e
}
; CHECK-LABEL: zext_i8_i64:
; CHECK: i64.atomic.load8_u $push0=, 0($0){{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i8_i64(i8 *%p) {
%v = load atomic i8, i8* %p seq_cst, align 1
%e = zext i8 %v to i64
ret i64 %e
}
; CHECK-LABEL: sext_i16_i64:
; CHECK: i64.atomic.load16_u $push0=, 0($0){{$}}
; CHECK: i64.extend16_s $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i64 @sext_i16_i64(i16 *%p) {
%v = load atomic i16, i16* %p seq_cst, align 2
%e = sext i16 %v to i64
ret i64 %e
}
; CHECK-LABEL: zext_i16_i64:
; CHECK: i64.atomic.load16_u $push0=, 0($0){{$}}
; CHECK-NEXT: return $pop0{{$}}
define i64 @zext_i16_i64(i16 *%p) {
%v = load atomic i16, i16* %p seq_cst, align 2
%e = zext i16 %v to i64
ret i64 %e
}
; CHECK-LABEL: sext_i32_i64:
; CHECK: i32.atomic.load $push0=, 0($0){{$}}
; CHECK: i64.extend_s/i32 $push1=, $pop0{{$}}
; CHECK-NEXT: return $pop1{{$}}
define i64 @sext_i32_i64(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 4
%e = sext i32 %v to i64
ret i64 %e
}
; CHECK-LABEL: zext_i32_i64:
; CHECK: i64.atomic.load32_u $push0=, 0($0){{$}}
; CHECK: return $pop0{{$}}
define i64 @zext_i32_i64(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 4
%e = zext i32 %v to i64
ret i64 %e
}

View File

@ -0,0 +1,307 @@
; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics | FileCheck %s
; Test that atomic loads are assembled properly.
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"
; CHECK-LABEL: load_i32_no_offset:
; CHECK: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @load_i32_no_offset(i32 *%p) {
%v = load atomic i32, i32* %p seq_cst, align 4
ret i32 %v
}
; With an nuw add, we can fold an offset.
; CHECK-LABEL: load_i32_with_folded_offset:
; CHECK: i32.atomic.load $push0=, 24($0){{$}}
define i32 @load_i32_with_folded_offset(i32* %p) {
%q = ptrtoint i32* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i32*
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; With an inbounds gep, we can fold an offset.
; CHECK-LABEL: load_i32_with_folded_gep_offset:
; CHECK: i32.atomic.load $push0=, 24($0){{$}}
define i32 @load_i32_with_folded_gep_offset(i32* %p) {
%s = getelementptr inbounds i32, i32* %p, i32 6
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; We can't fold a negative offset though, even with an inbounds gep.
; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
; CHECK: i32.const $push0=, -24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) {
%s = getelementptr inbounds i32, i32* %p, i32 -6
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; Without nuw, and even with nsw, we can't fold an offset.
; CHECK-LABEL: load_i32_with_unfolded_offset:
; CHECK: i32.const $push0=, 24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
define i32 @load_i32_with_unfolded_offset(i32* %p) {
%q = ptrtoint i32* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i32*
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; Without inbounds, we can't fold a gep offset.
; CHECK-LABEL: load_i32_with_unfolded_gep_offset:
; CHECK: i32.const $push0=, 24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
define i32 @load_i32_with_unfolded_gep_offset(i32* %p) {
%s = getelementptr i32, i32* %p, i32 6
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; CHECK-LABEL: load_i64_no_offset:
; CHECK: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @load_i64_no_offset(i64 *%p) {
%v = load atomic i64, i64* %p seq_cst, align 8
ret i64 %v
}
; Same as above but with i64.
; CHECK-LABEL: load_i64_with_folded_offset:
; CHECK: i64.atomic.load $push0=, 24($0){{$}}
define i64 @load_i64_with_folded_offset(i64* %p) {
%q = ptrtoint i64* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i64*
%t = load atomic i64, i64* %s seq_cst, align 8
ret i64 %t
}
; Same as above but with i64.
; CHECK-LABEL: load_i64_with_folded_gep_offset:
; CHECK: i64.atomic.load $push0=, 24($0){{$}}
define i64 @load_i64_with_folded_gep_offset(i64* %p) {
%s = getelementptr inbounds i64, i64* %p, i32 3
%t = load atomic i64, i64* %s seq_cst, align 8
ret i64 %t
}
; Same as above but with i64.
; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset:
; CHECK: i32.const $push0=, -24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) {
%s = getelementptr inbounds i64, i64* %p, i32 -3
%t = load atomic i64, i64* %s seq_cst, align 8
ret i64 %t
}
; Same as above but with i64.
; CHECK-LABEL: load_i64_with_unfolded_offset:
; CHECK: i32.const $push0=, 24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
define i64 @load_i64_with_unfolded_offset(i64* %p) {
%q = ptrtoint i64* %p to i32
%r = add nsw i32 %q, 24
%s = inttoptr i32 %r to i64*
%t = load atomic i64, i64* %s seq_cst, align 8
ret i64 %t
}
; Same as above but with i64.
; CHECK-LABEL: load_i64_with_unfolded_gep_offset:
; CHECK: i32.const $push0=, 24{{$}}
; CHECK: i32.add $push1=, $0, $pop0{{$}}
; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
define i64 @load_i64_with_unfolded_gep_offset(i64* %p) {
%s = getelementptr i64, i64* %p, i32 3
%t = load atomic i64, i64* %s seq_cst, align 8
ret i64 %t
}
; CHECK-LABEL: load_i32_with_folded_or_offset:
; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
define i32 @load_i32_with_folded_or_offset(i32 %x) {
%and = and i32 %x, -4
%t0 = inttoptr i32 %and to i8*
%arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
%t1 = load atomic i8, i8* %arrayidx seq_cst, align 8
%conv = sext i8 %t1 to i32
ret i32 %conv
}
; When loading from a fixed address, materialize a zero.
; CHECK-LABEL: load_i32_from_numeric_address
; CHECK: i32.const $push0=, 0{{$}}
; CHECK: i32.atomic.load $push1=, 42($pop0){{$}}
define i32 @load_i32_from_numeric_address() {
%s = inttoptr i32 42 to i32*
%t = load atomic i32, i32* %s seq_cst, align 4
ret i32 %t
}
; CHECK-LABEL: load_i32_from_global_address
; CHECK: i32.const $push0=, 0{{$}}
; CHECK: i32.atomic.load $push1=, gv($pop0){{$}}
@gv = global i32 0
define i32 @load_i32_from_global_address() {
%t = load atomic i32, i32* @gv seq_cst, align 4
ret i32 %t
}
; Fold an offset into a sign-extending load.
; CHECK-LABEL: load_i8_s_with_folded_offset:
; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
; CHECK-NEXT: i32.extend8_s $push1=, $pop0
define i32 @load_i8_s_with_folded_offset(i8* %p) {
%q = ptrtoint i8* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i8*
%t = load atomic i8, i8* %s seq_cst, align 1
%u = sext i8 %t to i32
ret i32 %u
}
; Fold a gep offset into a sign-extending load.
; CHECK-LABEL: load_i8_s_with_folded_gep_offset:
; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
; CHECK-NEXT: i32.extend8_s $push1=, $pop0
define i32 @load_i8_s_with_folded_gep_offset(i8* %p) {
%s = getelementptr inbounds i8, i8* %p, i32 24
%t = load atomic i8, i8* %s seq_cst, align 1
%u = sext i8 %t to i32
ret i32 %u
}
; CHECK-LABEL: load_i16_s_i64_with_folded_gep_offset:
; CHECK: i64.atomic.load16_u $push0=, 6($0){{$}}
define i64 @load_i16_s_i64_with_folded_gep_offset(i16* %p) {
%s = getelementptr inbounds i16, i16* %p, i32 3
%t = load atomic i16, i16* %s seq_cst, align 2
%u = zext i16 %t to i64
ret i64 %u
}
; CHECK-LABEL: load_i64_with_folded_or_offset:
; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
define i64 @load_i64_with_folded_or_offset(i32 %x) {
%and = and i32 %x, -4
%t0 = inttoptr i32 %and to i8*
%arrayidx = getelementptr inbounds i8, i8* %t0, i32 2
%t1 = load atomic i8, i8* %arrayidx seq_cst, align 8
%conv = sext i8 %t1 to i64
ret i64 %conv
}
; Fold an offset into a zero-extending load.
; CHECK-LABEL: load_i16_u_with_folded_offset:
; CHECK: i32.atomic.load16_u $push0=, 24($0){{$}}
define i32 @load_i16_u_with_folded_offset(i8* %p) {
%q = ptrtoint i8* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i16*
%t = load atomic i16, i16* %s seq_cst, align 2
%u = zext i16 %t to i32
ret i32 %u
}
; Fold a gep offset into a zero-extending load.
; CHECK-LABEL: load_i8_u_with_folded_gep_offset:
; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
define i32 @load_i8_u_with_folded_gep_offset(i8* %p) {
%s = getelementptr inbounds i8, i8* %p, i32 24
%t = load atomic i8, i8* %s seq_cst, align 1
%u = zext i8 %t to i32
ret i32 %u
}
; When loading from a fixed address, materialize a zero.
; As above but with extending load.
; CHECK-LABEL: load_zext_i32_from_numeric_address
; CHECK: i32.const $push0=, 0{{$}}
; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}}
define i32 @load_zext_i32_from_numeric_address() {
%s = inttoptr i32 42 to i16*
%t = load atomic i16, i16* %s seq_cst, align 2
%u = zext i16 %t to i32
ret i32 %u
}
; CHECK-LABEL: load_sext_i32_from_global_address
; CHECK: i32.const $push0=, 0{{$}}
; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}}
; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
@gv8 = global i8 0
define i32 @load_sext_i32_from_global_address() {
%t = load atomic i8, i8* @gv8 seq_cst, align 1
%u = sext i8 %t to i32
ret i32 %u
}
; Fold an offset into a sign-extending load.
; As above but 32 extended to 64 bit.
; CHECK-LABEL: load_i32_i64_s_with_folded_offset:
; CHECK: i32.atomic.load $push0=, 24($0){{$}}
; CHECK-NEXT: i64.extend_s/i32 $push1=, $pop0{{$}}
define i64 @load_i32_i64_s_with_folded_offset(i32* %p) {
%q = ptrtoint i32* %p to i32
%r = add nuw i32 %q, 24
%s = inttoptr i32 %r to i32*
%t = load atomic i32, i32* %s seq_cst, align 4
%u = sext i32 %t to i64
ret i64 %u
}
; Fold a gep offset into a zero-extending load.
; As above but 32 extended to 64 bit.
; CHECK-LABEL: load_i32_i64_u_with_folded_gep_offset:
; CHECK: i64.atomic.load32_u $push0=, 96($0){{$}}
define i64 @load_i32_i64_u_with_folded_gep_offset(i32* %p) {
%s = getelementptr inbounds i32, i32* %p, i32 24
%t = load atomic i32, i32* %s seq_cst, align 4
%u = zext i32 %t to i64
ret i64 %u
}
; i8 return value should test anyext loads
; CHECK-LABEL: ldi8_a1:
; CHECK: i32.atomic.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i8 @ldi8_a1(i8 *%p) {
%v = load atomic i8, i8* %p seq_cst, align 1
ret i8 %v
}