forked from OSchip/llvm-project
[AMDGPU] Add patterns for i8/i16 local atomic load/store
Add patterns for i8/i16 local atomic load/store. Added tests for new patterns. Copied atomic_[store/load]_local.ll to GlobalISel directory. Differential Revision: https://reviews.llvm.org/D111869
This commit is contained in:
parent
9635168083
commit
d869921004
|
@ -422,6 +422,16 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
|
|||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
|
||||
def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i32;
|
||||
|
|
|
@ -714,6 +714,10 @@ foreach vt = Reg32Types.types in {
|
|||
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
|
||||
}
|
||||
|
||||
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
|
||||
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
|
||||
defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
|
||||
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
|
||||
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
|
||||
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
|
||||
|
||||
|
@ -774,6 +778,10 @@ foreach vt = Reg32Types.types in {
|
|||
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
|
||||
}
|
||||
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
|
||||
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
|
||||
|
||||
|
|
|
@ -333,6 +333,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
|
|||
let IsNonExtLoad = 1;
|
||||
}
|
||||
|
||||
def atomic_load_8_glue : PatFrag<(ops node:$ptr),
|
||||
(AMDGPUatomic_ld_glue node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
|
||||
def atomic_load_16_glue : PatFrag<(ops node:$ptr),
|
||||
(AMDGPUatomic_ld_glue node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
|
||||
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
|
||||
(AMDGPUatomic_ld_glue node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
|
@ -423,6 +435,14 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
|
|||
} // End IsLoad = 1
|
||||
|
||||
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
|
||||
def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
|
||||
(atomic_load_8_glue node:$ptr)> {
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
|
||||
(atomic_load_16_glue node:$ptr)> {
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
|
||||
(atomic_load_32_glue node:$ptr)> {
|
||||
let MemoryVT = i32;
|
||||
|
@ -509,6 +529,18 @@ def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
|
|||
|
||||
let AddressSpaces = StoreAddress_local.AddrSpaces in {
|
||||
|
||||
def atomic_store_local_8_m0 : PatFrag <
|
||||
(ops node:$value, node:$ptr),
|
||||
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i8;
|
||||
}
|
||||
def atomic_store_local_16_m0 : PatFrag <
|
||||
(ops node:$value, node:$ptr),
|
||||
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
|
||||
let IsAtomic = 1;
|
||||
let MemoryVT = i16;
|
||||
}
|
||||
def atomic_store_local_32_m0 : PatFrag <
|
||||
(ops node:$value, node:$ptr),
|
||||
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u8 v0, v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
|
||||
%load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
|
||||
ret i8 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
|
||||
%load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
|
||||
ret i8 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u16 v0, v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
|
||||
%load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
|
||||
ret i16 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
|
||||
%load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
|
||||
ret i16 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b32 v0, v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
|
||||
%load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
|
||||
%load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
|
||||
ret i64 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
|
||||
ret i64 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic float, float addrspace(3)* %gep monotonic, align 4
|
||||
ret float %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic double, double addrspace(3)* %gep monotonic, align 8
|
||||
ret double %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
|
||||
ret i8* %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
|
||||
%load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
|
||||
ret i8 addrspace(3)* %load
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b8 v0, v1{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
|
||||
store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
|
||||
store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b16 v0, v1{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
|
||||
store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
|
||||
store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b32 v0, v1{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
|
||||
store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) {
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
|
||||
store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i64:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
|
||||
store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) {
|
||||
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
|
||||
store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,6 +1,56 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u8 v0, v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
|
||||
%load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
|
||||
ret i8 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
|
||||
%load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
|
||||
ret i8 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u16 v0, v0{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
|
||||
%load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
|
||||
ret i16 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
|
||||
%load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
|
||||
ret i16 %load
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
|
|
|
@ -1,6 +1,56 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b8 v0, v1{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
|
||||
store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
|
||||
%gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
|
||||
store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b16 v0, v1{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
|
||||
store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
; CI-NEXT: s_mov_b32 m0
|
||||
; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
|
||||
store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NOT: s_mov_b32 m0
|
||||
|
|
Loading…
Reference in New Issue