[AMDGPU] Set optional PAL metadata

Set informational fields in the .shader_functions table.

Also correct the documentation, .scratch_memory_size and .lds_size are
integers.

Differential Revision: https://reviews.llvm.org/D105116
This commit is contained in:
Sebastian Neubauer 2021-06-29 11:22:25 +02:00
parent 0c29f45ac9
commit db646de3ee
5 changed files with 106 additions and 14 deletions

View File

@ -11251,10 +11251,8 @@ within a map that has been added by the same *vendor-name*.
".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value
2 integers is implementation defined, and can not be relied on between
different builds of the compiler.
".scratch_memory_size" sequence of Size in bytes of scratch memory used by the shader.
2 integers
".lds_size" sequence of Size in bytes of LDS memory.
2 integers
".scratch_memory_size" integer Size in bytes of scratch memory used by the shader.
".lds_size" integer Size in bytes of LDS memory.
".vgpr_count" integer Number of VGPRs used by the shader.
".sgpr_count" integer Number of SGPRs used by the shader.
".stack_frame_size_in_bytes" integer Amount of stack size used by the shader.

View File

@ -1395,10 +1395,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
MD->setFunctionScratchSize(MF, MFI.getStackSize());
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
// Set optional info
MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)

View File

@ -243,6 +243,27 @@ void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
}
// Set the amount of LDS used in bytes in the metadata.
void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
unsigned Val) {
auto Node = getShaderFunction(MF.getFunction().getName());
Node[".lds_size"] = MsgPackDoc.getNode(Val);
}
// Set the number of used vgprs in the metadata.
void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
unsigned Val) {
auto Node = getShaderFunction(MF.getFunction().getName());
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
// Set the number of used vgprs in the metadata.
void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
unsigned Val) {
auto Node = getShaderFunction(MF.getFunction().getName());
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {

View File

@ -80,6 +80,21 @@ public:
// Set the stack frame size of a function in the metadata.
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
// Set the amount of LDS used in bytes in the metadata. This is an optional
// advisory record for logging etc; wave dispatch actually uses the rsrc1
// register for the shader stage to determine the amount of LDS to allocate.
void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
// Set the number of used vgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of vgprs to allocate.
void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void setWave32(unsigned CC);

View File

@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
; Make sure this interacts well with -amdgpu-fixed-function-abi
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
declare float @extern_func(float) #0
declare float @extern_func_many_args(<64 x float>) #0
@ -147,40 +147,92 @@ attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers:
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: dynamic_stack_loop:
; GCN-NEXT: .lds_size: 0{{$}}
; SDAG-NEXT: .sgpr_count: 0x22{{$}}
; GISEL-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: multiple_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: no_stack_extern_call_many_args:
; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_lds:
; GCN-NEXT: .lds_size: 0x100{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: simple_lds_recurse:
; GCN-NEXT: .lds_size: 0x100{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: simple_stack_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x22{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: simple_stack_extern_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack_indirect_call:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack_recurse:
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: ...