forked from OSchip/llvm-project
AMDGPU: Add baseline tests for global saddr matching
This commit is contained in:
parent
24182f14b6
commit
087dcbe9bc
|
@ -0,0 +1,74 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; amdgcn atomic csub
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps float @global_csub_saddr_i32_rtn(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
|
||||
; GCN-LABEL: global_csub_saddr_i32_rtn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_add_co_u32_e64 v2, s0, s2, v0
|
||||
; GCN-NEXT: ; implicit-def: $vcc_hi
|
||||
; GCN-NEXT: v_add_co_ci_u32_e64 v3, s0, s3, 0, s0
|
||||
; GCN-NEXT: global_atomic_csub v0, v[2:3], v1, off glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%cast.gep0 = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
|
||||
%rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep0, i32 %data)
|
||||
%cast.rtn = bitcast i32 %rtn to float
|
||||
ret float %cast.rtn
|
||||
}
|
||||
|
||||
define amdgpu_ps float @global_csub_saddr_i32_rtn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
|
||||
; GCN-LABEL: global_csub_saddr_i32_rtn_neg128:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_add_co_u32_e64 v2, s0, s2, v0
|
||||
; GCN-NEXT: ; implicit-def: $vcc_hi
|
||||
; GCN-NEXT: v_add_co_ci_u32_e64 v3, s0, s3, 0, s0
|
||||
; GCN-NEXT: global_atomic_csub v0, v[2:3], v1, off offset:-128 glc
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
|
||||
%cast.gep1 = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
|
||||
%rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep1, i32 %data)
|
||||
%cast.rtn = bitcast i32 %rtn to float
|
||||
ret float %cast.rtn
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_csub_saddr_i32_nortn(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
|
||||
; GCN-LABEL: global_csub_saddr_i32_nortn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_add_co_u32_e64 v2, s0, s2, v0
|
||||
; GCN-NEXT: v_add_co_ci_u32_e64 v3, s0, s3, 0, s0
|
||||
; GCN-NEXT: global_atomic_csub v0, v[2:3], v1, off glc
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%cast.gep0 = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
|
||||
%unused = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep0, i32 %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_csub_saddr_i32_nortn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
|
||||
; GCN-LABEL: global_csub_saddr_i32_nortn_neg128:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_add_co_u32_e64 v2, s0, s2, v0
|
||||
; GCN-NEXT: v_add_co_ci_u32_e64 v3, s0, s3, 0, s0
|
||||
; GCN-NEXT: global_atomic_csub v0, v[2:3], v1, off offset:-128 glc
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
|
||||
%cast.gep1 = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
|
||||
%unused = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep1, i32 %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0
|
||||
|
||||
attributes #0 = { argmemonly nounwind willreturn }
|
|
@ -0,0 +1,75 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefixes=GCN,GFX908 %s
|
||||
|
||||
; Test using saddr addressing mode of global_* flat atomic instructions.
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; amdgcn global atomic fadd
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps void @global_fadd_saddr_f32_nortn(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
|
||||
; GCN-LABEL: global_fadd_saddr_f32_nortn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, s2, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GCN-NEXT: global_atomic_add_f32 v[2:3], v1, off
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%cast.gep0 = bitcast i8 addrspace(1)* %gep0 to float addrspace(1)*
|
||||
call void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep0, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_fadd_saddr_f32_nortn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
|
||||
; GCN-LABEL: global_fadd_saddr_f32_nortn_neg128:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, s2, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GCN-NEXT: global_atomic_add_f32 v[2:3], v1, off offset:-128
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
|
||||
%cast.gep1 = bitcast i8 addrspace(1)* %gep1 to float addrspace(1)*
|
||||
call void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* %cast.gep1, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_fadd_saddr_v2f16_nortn(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x half> %data) {
|
||||
; GCN-LABEL: global_fadd_saddr_v2f16_nortn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, s2, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GCN-NEXT: global_atomic_pk_add_f16 v[2:3], v1, off
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%cast.gep0 = bitcast i8 addrspace(1)* %gep0 to <2 x half> addrspace(1)*
|
||||
call void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep0, <2 x half> %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_fadd_saddr_v2f16_nortn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x half> %data) {
|
||||
; GCN-LABEL: global_fadd_saddr_v2f16_nortn_neg128:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, s2, v0
|
||||
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GCN-NEXT: global_atomic_pk_add_f16 v[2:3], v1, off offset:-128
|
||||
; GCN-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
|
||||
%gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
|
||||
%cast.gep1 = bitcast i8 addrspace(1)* %gep1 to <2 x half> addrspace(1)*
|
||||
call void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* %cast.gep1, <2 x half> %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.global.atomic.fadd.f32.p1f32(float addrspace(1)* nocapture, float) #0
|
||||
declare void @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
|
||||
|
||||
attributes #0 = { argmemonly nounwind willreturn }
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue