forked from OSchip/llvm-project
234 lines
8.8 KiB
LLVM
234 lines
8.8 KiB
LLVM
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
|
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
|
|
|
|
declare amdgpu_gfx float @extern_func(float) #0
|
|
declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
|
|
|
|
@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
|
|
|
|
define amdgpu_gfx float @no_stack(float %arg0) #0 {
|
|
%add = fadd float %arg0, 1.0
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%add = fadd float %arg0, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%add = fadd float %arg0, %val
|
|
%stack2 = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack2
|
|
%val2 = load volatile float, float addrspace(5)* %stack2
|
|
%add2 = fadd float %add, %val2
|
|
ret float %add2
|
|
}
|
|
|
|
define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
|
|
bb0:
|
|
%cmp = fcmp ogt float %arg0, 0.0
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%add = fadd float %arg0, %val
|
|
br label %bb2
|
|
|
|
bb2:
|
|
%res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
|
|
bb0:
|
|
br label %bb1
|
|
|
|
bb1:
|
|
%ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%add = fadd float %arg0, %val
|
|
%cmp = icmp sgt i32 %ctr, 0
|
|
%newctr = sub i32 %ctr, 1
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
|
|
bb2:
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
|
|
%res = call amdgpu_gfx float @simple_stack(float %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%res = call amdgpu_gfx float @simple_stack(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
|
|
%res = call amdgpu_gfx float @extern_func(float %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%res = call amdgpu_gfx float @extern_func(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
|
|
%res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
|
|
%fptr = load void()*, void()* addrspace(4)* @funcptr
|
|
call amdgpu_gfx void %fptr()
|
|
ret float %arg0
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%fptr = load void()*, void()* addrspace(4)* @funcptr
|
|
call amdgpu_gfx void %fptr()
|
|
%add = fadd float %arg0, %val
|
|
ret float %add
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
|
|
%stack = alloca float, i32 4, align 4, addrspace(5)
|
|
store volatile float 2.0, float addrspace(5)* %stack
|
|
%val = load volatile float, float addrspace(5)* %stack
|
|
%res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
|
|
%add = fadd float %res, %val
|
|
ret float %add
|
|
}
|
|
|
|
@lds = internal addrspace(3) global [64 x float] undef
|
|
|
|
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
|
|
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
|
|
%val = load float, float addrspace(3)* %lds_ptr
|
|
ret float %val
|
|
}
|
|
|
|
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
|
|
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
|
|
%val = load float, float addrspace(3)* %lds_ptr
|
|
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
|
|
ret float %res
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|
|
; GCN: amdpal.pipelines:
|
|
; GCN-NEXT: - .registers:
|
|
; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
|
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
|
; GCN-NEXT: .shader_functions:
|
|
; GCN-NEXT: dynamic_stack:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
|
|
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
|
|
; GCN-NEXT: dynamic_stack_loop:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
|
|
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
|
|
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
|
|
; GCN-NEXT: multiple_stack:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
|
; GCN-NEXT: no_stack:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
|
; GCN-NEXT: no_stack_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x24{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
|
; GCN-NEXT: no_stack_extern_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
|
|
; GCN-NEXT: no_stack_extern_call_many_args:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
|
|
; GCN-NEXT: no_stack_indirect_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
|
|
; GCN-NEXT: simple_lds:
|
|
; GCN-NEXT: .lds_size: 0x100{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
|
; GCN-NEXT: simple_lds_recurse:
|
|
; GCN-NEXT: .lds_size: 0x100{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x26{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x29{{$}}
|
|
; GCN-NEXT: simple_stack:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2{{$}}
|
|
; GCN-NEXT: simple_stack_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x24{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x4{{$}}
|
|
; GCN-NEXT: simple_stack_extern_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
|
|
; GCN-NEXT: simple_stack_indirect_call:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
|
|
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
|
|
; GCN-NEXT: simple_stack_recurse:
|
|
; GCN-NEXT: .lds_size: 0{{$}}
|
|
; GCN-NEXT: .sgpr_count: 0x26{{$}}
|
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
|
; GCN-NEXT: .vgpr_count: 0x2a{{$}}
|
|
; GCN-NEXT: ...
|