[AMDGPU] Add patterns for i8/i16 local atomic load/store

Add patterns for i8/i16 local atomic load/store.

Added tests for new patterns.

Copied atomic_[store/load]_local.ll to GlobalISel directory.

Differential Revision: https://reviews.llvm.org/D111869
This commit is contained in:
Piotr Sobczak 2021-10-15 10:56:20 +02:00
parent 9635168083
commit d869921004
7 changed files with 407 additions and 0 deletions

View File

@ -422,6 +422,16 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
let MemoryVT = i16;
}
def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i8;
}
def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i16;
}
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i32;

View File

@ -714,6 +714,10 @@ foreach vt = Reg32Types.types in {
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
}
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
@ -774,6 +778,10 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;

View File

@ -333,6 +333,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
def atomic_load_8_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i8;
}
def atomic_load_16_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i16;
}
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
@ -423,6 +435,14 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
} // End IsLoad = 1
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_8_glue node:$ptr)> {
let MemoryVT = i8;
}
def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_16_glue node:$ptr)> {
let MemoryVT = i16;
}
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_32_glue node:$ptr)> {
let MemoryVT = i32;
@ -509,6 +529,18 @@ def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
let AddressSpaces = StoreAddress_local.AddrSpaces in {
def atomic_store_local_8_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i8;
}
def atomic_store_local_16_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i16;
}
def atomic_store_local_32_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {

View File

@ -0,0 +1,154 @@
; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u8 v0, v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
%load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
ret i8 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
%load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
ret i8 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u16 v0, v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
%load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
ret i16 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
%load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
ret i16 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
%load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
ret i32 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
%load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
ret i32 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
%load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
ret i64 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
%load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
ret i64 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
%gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
%load = load atomic float, float addrspace(3)* %gep monotonic, align 4
ret float %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
%gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
%load = load atomic double, double addrspace(3)* %gep monotonic, align 8
ret double %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
%load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
ret i8* %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
%load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
ret i8 addrspace(3)* %load
}

View File

@ -0,0 +1,103 @@
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b8 v0, v1{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b16 v0, v1{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b32 v0, v1{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) {
%gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_i64:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) {
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8
ret void
}

View File

@ -1,6 +1,56 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u8 v0, v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
%load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
ret i8 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
%load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
ret i8 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u16 v0, v0{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
%load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
ret i16 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
%load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
ret i16 %load
}
; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0

View File

@ -1,6 +1,56 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b8 v0, v1{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b16 v0, v1{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
ret void
}
; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0