forked from OSchip/llvm-project
[AMDGPU] Report minimum scratch size in code object v5 and later by default
This change sets -amdgpu-assume-{external-call-stack-size | dynamic-stack-object-size} options to zero by default for code object v5 and later. The runtime is expected to adjust the scratch size if the amdhsa_uses_dynamic_stack bit in the kernel descriptor is set. Differential Revision: https://reviews.llvm.org/D128346
This commit is contained in:
parent
b55e76de8b
commit
3759398b4b
|
@ -3884,11 +3884,12 @@ The fields used by CP for code objects before V3 also match those specified in
|
|||
63:32 4 bytes PRIVATE_SEGMENT_FIXED_SIZE The amount of fixed
|
||||
private address space
|
||||
memory required for a
|
||||
work-item in bytes.
|
||||
Additional space may need to
|
||||
be added to this value if
|
||||
the call stack has
|
||||
non-inlined function calls.
|
||||
work-item in bytes. When
|
||||
this cannot be predicted,
|
||||
code object v4 and older
|
||||
sets this value to be
|
||||
higher than the minimum
|
||||
requirement.
|
||||
95:64 4 bytes KERNARG_SIZE The size of the kernarg
|
||||
memory pointed to by the
|
||||
AQL dispatch packet. The
|
||||
|
|
|
@ -43,9 +43,9 @@ using namespace llvm::AMDGPU;
|
|||
char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
|
||||
char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
|
||||
|
||||
// We need to tell the runtime some amount ahead of time if we don't know the
|
||||
// true stack size. Assume a smaller number if this is only due to dynamic /
|
||||
// non-entry block allocas.
|
||||
// In code object v4 and older, we need to tell the runtime some amount ahead of
|
||||
// time if we don't know the true stack size. Assume a smaller number if this is
|
||||
// only due to dynamic / non-entry block allocas.
|
||||
static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
|
||||
"amdgpu-assume-external-call-stack-size",
|
||||
cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
|
||||
|
@ -109,6 +109,15 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
|
|||
CallGraph CG = CallGraph(M);
|
||||
auto End = po_end(&CG);
|
||||
|
||||
// By default, for code object v5 and later, track only the minimum scratch
|
||||
// size
|
||||
if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) {
|
||||
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
|
||||
AssumedStackSizeForDynamicSizeObjects = 0;
|
||||
if (!AssumedStackSizeForExternalCall.getNumOccurrences())
|
||||
AssumedStackSizeForExternalCall = 0;
|
||||
}
|
||||
|
||||
for (auto IT = po_begin(&CG); IT != End; ++IT) {
|
||||
Function *F = IT->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-V5 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
|
||||
|
||||
|
@ -182,6 +183,9 @@ declare void @external() #0
|
|||
; NumSgprs: 48
|
||||
; NumVgprs: 24
|
||||
; GCN: ScratchSize: 16384
|
||||
;
|
||||
; GCN-V5-LABEL: {{^}}usage_external:
|
||||
; GCN-V5: ScratchSize: 0
|
||||
define amdgpu_kernel void @usage_external() #0 {
|
||||
call void @external()
|
||||
ret void
|
||||
|
@ -194,6 +198,9 @@ declare void @external_recurse() #2
|
|||
; NumSgprs: 48
|
||||
; NumVgprs: 24
|
||||
; GCN: ScratchSize: 16384
|
||||
;
|
||||
; GCN-V5-LABEL: {{^}}usage_external_recurse:
|
||||
; GCN-V5: ScratchSize: 0
|
||||
define amdgpu_kernel void @usage_external_recurse() #0 {
|
||||
call void @external_recurse()
|
||||
ret void
|
||||
|
@ -201,6 +208,9 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
|
|||
|
||||
; GCN-LABEL: {{^}}direct_recursion_use_stack:
|
||||
; GCN: ScratchSize: 18448{{$}}
|
||||
;
|
||||
; GCN-V5-LABEL: {{^}}direct_recursion_use_stack:
|
||||
; GCN-V5: ScratchSize: 2064{{$}}
|
||||
define void @direct_recursion_use_stack(i32 %val) #2 {
|
||||
%alloca = alloca [512 x i32], align 4, addrspace(5)
|
||||
call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
|
||||
|
@ -220,6 +230,9 @@ ret:
|
|||
; GCN: is_ptr64 = 1
|
||||
; GCN: is_dynamic_callstack = 1
|
||||
; GCN: workitem_private_segment_byte_size = 18448{{$}}
|
||||
;
|
||||
; GCN-V5-LABEL: {{^}}usage_direct_recursion:
|
||||
; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}
|
||||
define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
|
||||
call void @direct_recursion_use_stack(i32 %n)
|
||||
ret void
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s
|
||||
|
||||
|
@ -110,6 +112,9 @@ bb.2:
|
|||
}
|
||||
; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
|
||||
; DEFAULTSIZE: ; ScratchSize: 4112
|
||||
; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 16
|
||||
; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
|
||||
; DEFAULTSIZE-V5: ; ScratchSize: 16
|
||||
|
||||
; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
|
||||
; ASSUME1024: ; ScratchSize: 1040
|
||||
|
@ -203,6 +208,9 @@ bb.1:
|
|||
|
||||
; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
|
||||
; DEFAULTSIZE: ; ScratchSize: 4160
|
||||
; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 64
|
||||
; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
|
||||
; DEFAULTSIZE-V5: ; ScratchSize: 64
|
||||
|
||||
; ASSUME1024: .amdhsa_private_segment_fixed_size 1088
|
||||
; ASSUME1024: ; ScratchSize: 1088
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs --amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=V5 %s
|
||||
|
||||
; CHECK-LABEL: {{^}}recursive:
|
||||
; CHECK: ScratchSize: 16
|
||||
|
@ -28,9 +29,13 @@ define void @tail_recursive_with_stack() {
|
|||
ret void
|
||||
}
|
||||
|
||||
; For an arbitrary recursive call, report a large number for unknown stack usage.
|
||||
; For an arbitrary recursive call, report a large number for unknown stack
|
||||
; usage for code object v4 and older
|
||||
; CHECK-LABEL: {{^}}calls_recursive:
|
||||
; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}}
|
||||
;
|
||||
; V5-LABEL: {{^}}calls_recursive:
|
||||
; V5: .amdhsa_private_segment_fixed_size 0{{$}}
|
||||
define amdgpu_kernel void @calls_recursive() {
|
||||
call void @recursive()
|
||||
ret void
|
||||
|
@ -51,6 +56,9 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
|
|||
|
||||
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
|
||||
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
|
||||
;
|
||||
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
|
||||
; V5: .amdhsa_private_segment_fixed_size 0{{$}}
|
||||
define amdgpu_kernel void @kernel_calls_tail_recursive() {
|
||||
call void @tail_recursive()
|
||||
ret void
|
||||
|
@ -58,6 +66,9 @@ define amdgpu_kernel void @kernel_calls_tail_recursive() {
|
|||
|
||||
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
|
||||
; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
|
||||
;
|
||||
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
|
||||
; V5: .amdhsa_private_segment_fixed_size 8{{$}}
|
||||
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
|
||||
call void @tail_recursive_with_stack()
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue