forked from OSchip/llvm-project
133 lines
5.3 KiB
LLVM
133 lines
5.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
|
|
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
|
|
@lds1 = internal addrspace(3) global [4 x i32] undef, align 256
|
|
|
|
; These two objects should be allocated at the same constant offsets
|
|
; from the base.
|
|
define amdgpu_kernel void @alloc_lds_gds(i32 addrspace(1)* %out) #1 {
|
|
; GCN-LABEL: alloc_lds_gds:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 5
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_mov_b32 m0, 16
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_endpgm
|
|
%gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3
|
|
%val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel
|
|
%gep.lds = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3
|
|
%val1 = atomicrmw add i32 addrspace(3)* %gep.lds, i32 5 acq_rel
|
|
ret void
|
|
}
|
|
|
|
; The LDS alignment shouldn't change offset of GDS.
|
|
define amdgpu_kernel void @alloc_lds_gds_align(i32 addrspace(1)* %out) #1 {
|
|
; GCN-LABEL: alloc_lds_gds_align:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 5
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_mov_b32 m0, 16
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:140
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_endpgm
|
|
%gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3
|
|
%val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel
|
|
|
|
%gep.lds0 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3
|
|
%val1 = atomicrmw add i32 addrspace(3)* %gep.lds0, i32 5 acq_rel
|
|
|
|
%gep.lds1 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds1, i32 0, i32 3
|
|
%val2 = atomicrmw add i32 addrspace(3)* %gep.lds1, i32 5 acq_rel
|
|
ret void
|
|
}
|
|
|
|
@gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8
|
|
@gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32
|
|
|
|
define amdgpu_kernel void @gds_global_align(i32 addrspace(1)* %out) {
|
|
; GCN-LABEL: gds_global_align:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 5
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_mov_b32 m0, 32
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:28 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_endpgm
|
|
%gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3
|
|
%val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
|
|
%gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3
|
|
%val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @gds_global_align_plus_attr(i32 addrspace(1)* %out) #0 {
|
|
; GCN-LABEL: gds_global_align_plus_attr:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 5
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_movk_i32 m0, 0x420
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:1052 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:1036 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_endpgm
|
|
%gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3
|
|
%val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
|
|
%gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3
|
|
%val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel
|
|
ret void
|
|
}
|
|
|
|
@small.gds = internal addrspace(2) global i8 undef, align 1
|
|
@gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4
|
|
|
|
define amdgpu_kernel void @gds_extern_align(i32 addrspace(1)* %out, [4 x i32] addrspace(2)* %gds.arg) #0 {
|
|
; GCN-LABEL: gds_extern_align:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x8
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 5
|
|
; GCN-NEXT: s_movk_i32 m0, 0x401
|
|
; GCN-NEXT: s_movk_i32 s1, 0x400
|
|
; GCN-NEXT: ;;#ASMSTART
|
|
; GCN-NEXT: ; use s1
|
|
; GCN-NEXT: ;;#ASMEND
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s0
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_wbinvl1
|
|
; GCN-NEXT: s_endpgm
|
|
call void asm sideeffect "; use $0","s"(i8 addrspace(2)* @small.gds)
|
|
%gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* %gds.arg, i32 0, i32 3
|
|
%val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-gds-size"="1024" }
|