forked from OSchip/llvm-project
AMDGPU: Select i8/i16 global and flat atomic load/store
As far as I know these should be atomic anyway, as long as the address is aligned. Unaligned atomics hit an ugly error in AtomicExpand.
This commit is contained in:
parent
7c71ce97e7
commit
df29ec2f54
|
@ -898,6 +898,10 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
|
|||
"buffer_load_dwordx4", v4i32
|
||||
>;
|
||||
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
|
||||
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
|
||||
|
@ -1794,6 +1798,10 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
|
|||
>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX6GFX7 in {
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i32, atomic_store_global_8>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i16, atomic_store_global_8>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i32, atomic_store_global_16>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i16, atomic_store_global_16>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_global_32>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_global_64>;
|
||||
} // End Predicates = isGFX6GFX7
|
||||
|
|
|
@ -1089,6 +1089,10 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
|
|||
|
||||
let OtherPredicates = [HasFlatAddressSpace] in {
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
|
||||
|
@ -1126,6 +1130,11 @@ def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
|
|||
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_flat_8, i32>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_flat_8, i16>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_flat_16, i32>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_flat_16, i16>;
|
||||
|
||||
|
||||
foreach as = [ "flat", "global" ] in {
|
||||
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
|
||||
|
@ -1310,6 +1319,10 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
|
|||
|
||||
let OtherPredicates = [HasFlatGlobalInsts] in {
|
||||
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
|
||||
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
|
||||
|
@ -1369,6 +1382,10 @@ defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>
|
|||
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
|
||||
}
|
||||
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_global_8, i32>;
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_global_8, i16>;
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_global_16, i32>;
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_global_16, i16>;
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
|
||||
defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>;
|
||||
|
||||
|
|
|
@ -1128,3 +1128,149 @@ entry:
|
|||
store atomic float %in, float* %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i8_offset:
|
||||
; CIVI: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i8_offset(i8* %in, i8* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i8, i8* %in, i64 16
|
||||
%val = load atomic i8, i8* %gep seq_cst, align 1
|
||||
store i8 %val, i8* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i8:
|
||||
; GCN: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
|
||||
; GCN: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i8(i8* %in, i8* %out) {
|
||||
entry:
|
||||
%val = load atomic i8, i8* %in seq_cst, align 1
|
||||
store i8 %val, i8* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i8_addr64_offset:
|
||||
; CIVI: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i8_addr64_offset(i8* %in, i8* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i8, i8* %in, i64 %index
|
||||
%gep = getelementptr i8, i8* %ptr, i64 16
|
||||
%val = load atomic i8, i8* %gep seq_cst, align 1
|
||||
store i8 %val, i8* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i8_offset:
|
||||
; CIVI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, i8* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i8, i8* %out, i64 16
|
||||
store atomic i8 %in, i8* %gep seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i8:
|
||||
; GCN: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i8(i8 %in, i8* %out) {
|
||||
entry:
|
||||
store atomic i8 %in, i8* %out seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i8_addr64_offset:
|
||||
; CIVI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i8_addr64_offset(i8 %in, i8* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i8, i8* %out, i64 %index
|
||||
%gep = getelementptr i8, i8* %ptr, i64 16
|
||||
store atomic i8 %in, i8* %gep seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i16_offset:
|
||||
; CIVI: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i16_offset(i16* %in, i16* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i16, i16* %in, i64 8
|
||||
%val = load atomic i16, i16* %gep seq_cst, align 2
|
||||
store i16 %val, i16* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i16:
|
||||
; GCN: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
|
||||
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i16(i16* %in, i16* %out) {
|
||||
entry:
|
||||
%val = load atomic i16, i16* %in seq_cst, align 2
|
||||
store i16 %val, i16* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i16_addr64_offset:
|
||||
; CIVI: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GFX9: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
|
||||
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define amdgpu_kernel void @atomic_load_i16_addr64_offset(i16* %in, i16* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i16, i16* %in, i64 %index
|
||||
%gep = getelementptr i16, i16* %ptr, i64 8
|
||||
%val = load atomic i16, i16* %gep seq_cst, align 2
|
||||
store i16 %val, i16* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i16_offset:
|
||||
; CIVI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, i16* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i16, i16* %out, i64 8
|
||||
store atomic i16 %in, i16* %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i16:
|
||||
; GCN: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i16(i16 %in, i16* %out) {
|
||||
entry:
|
||||
store atomic i16 %in, i16* %out seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i16_addr64_offset:
|
||||
; CIVI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i16_addr64_offset(i16 %in, i16* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i16, i16* %out, i64 %index
|
||||
%gep = getelementptr i16, i16* %ptr, i64 8
|
||||
store atomic i16 %in, i16* %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_f16_offset:
|
||||
; CIVI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
; GFX9: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_f16_offset(half %in, half* %out) {
|
||||
entry:
|
||||
%gep = getelementptr half, half* %out, i64 8
|
||||
store atomic half %in, half* %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_f16:
|
||||
; GCN: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
|
||||
define amdgpu_kernel void @atomic_store_f16(half %in, half* %out) {
|
||||
entry:
|
||||
store atomic half %in, half* %out seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -1316,3 +1316,124 @@ entry:
|
|||
store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i8_offset:
|
||||
; SIVI: buffer_load_ubyte [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
||||
; SIVI: buffer_store_byte [[RET]]
|
||||
|
||||
; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_load_i8_offset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i8, i8 addrspace(1)* %in, i64 16
|
||||
%val = load atomic i8, i8 addrspace(1)* %gep seq_cst, align 1
|
||||
store i8 %val, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i8_negoffset:
|
||||
; SI: buffer_load_ubyte [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
|
||||
; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
|
||||
; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
|
||||
; VI: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
|
||||
; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_load_i8_negoffset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i8, i8 addrspace(1)* %in, i64 -512
|
||||
%val = load atomic i8, i8 addrspace(1)* %gep seq_cst, align 1
|
||||
store i8 %val, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i8_offset:
|
||||
; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
|
||||
; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, i8 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i8, i8 addrspace(1)* %out, i64 16
|
||||
store atomic i8 %in, i8 addrspace(1)* %gep seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i8:
|
||||
; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
|
||||
define amdgpu_kernel void @atomic_store_i8(i8 %in, i8 addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic i8 %in, i8 addrspace(1)* %out seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i16_offset:
|
||||
; SIVI: buffer_load_ushort [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
||||
; SIVI: buffer_store_short [[RET]]
|
||||
|
||||
; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_load_i16_offset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i16, i16 addrspace(1)* %in, i64 8
|
||||
%val = load atomic i16, i16 addrspace(1)* %gep seq_cst, align 2
|
||||
store i16 %val, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_load_i16_negoffset:
|
||||
; SI: buffer_load_ushort [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
|
||||
; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
|
||||
; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
|
||||
; VI: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
|
||||
; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
|
||||
define amdgpu_kernel void @atomic_load_i16_negoffset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i16, i16 addrspace(1)* %in, i64 -256
|
||||
%val = load atomic i16, i16 addrspace(1)* %gep seq_cst, align 2
|
||||
store i16 %val, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i16_offset:
|
||||
; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
|
||||
; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, i16 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i16, i16 addrspace(1)* %out, i64 8
|
||||
store atomic i16 %in, i16 addrspace(1)* %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_i16:
|
||||
; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
|
||||
define amdgpu_kernel void @atomic_store_i16(i16 %in, i16 addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic i16 %in, i16 addrspace(1)* %out seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_f16_offset:
|
||||
; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
|
||||
; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
define amdgpu_kernel void @atomic_store_f16_offset(half %in, half addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr half, half addrspace(1)* %out, i64 8
|
||||
store atomic half %in, half addrspace(1)* %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_store_f16:
|
||||
; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
|
||||
; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
|
||||
define amdgpu_kernel void @atomic_store_f16(half %in, half addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic half %in, half addrspace(1)* %out seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue