forked from OSchip/llvm-project
[AMDGPU] Set optional PAL metadata
Set informational fields in the .shader_functions table. Also correct the documentation, .scratch_memory_size and .lds_size are integers. Differential Revision: https://reviews.llvm.org/D105116
This commit is contained in:
parent
0c29f45ac9
commit
db646de3ee
|
@ -11251,10 +11251,8 @@ within a map that has been added by the same *vendor-name*.
|
|||
".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value
|
||||
2 integers is implementation defined, and can not be relied on between
|
||||
different builds of the compiler.
|
||||
".scratch_memory_size" sequence of Size in bytes of scratch memory used by the shader.
|
||||
2 integers
|
||||
".lds_size" sequence of Size in bytes of LDS memory.
|
||||
2 integers
|
||||
".scratch_memory_size" integer Size in bytes of scratch memory used by the shader.
|
||||
".lds_size" integer Size in bytes of LDS memory.
|
||||
".vgpr_count" integer Number of VGPRs used by the shader.
|
||||
".sgpr_count" integer Number of SGPRs used by the shader.
|
||||
".stack_frame_size_in_bytes" integer Amount of stack size used by the shader.
|
||||
|
|
|
@ -1395,10 +1395,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
|
|||
auto *MD = getTargetStreamer()->getPALMetadata();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
MD->setFunctionScratchSize(MF, MFI.getStackSize());
|
||||
|
||||
// Set compute registers
|
||||
MD->setRsrc1(CallingConv::AMDGPU_CS,
|
||||
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
|
||||
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
|
||||
|
||||
// Set optional info
|
||||
MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
|
||||
MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
|
||||
MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
|
||||
}
|
||||
|
||||
// This is supposed to be log2(Size)
|
||||
|
|
|
@ -243,6 +243,27 @@ void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
|
|||
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
|
||||
}
|
||||
|
||||
// Set the amount of LDS used in bytes in the metadata.
|
||||
void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
|
||||
unsigned Val) {
|
||||
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||
Node[".lds_size"] = MsgPackDoc.getNode(Val);
|
||||
}
|
||||
|
||||
// Set the number of used vgprs in the metadata.
|
||||
void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
|
||||
unsigned Val) {
|
||||
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
|
||||
}
|
||||
|
||||
// Set the number of used vgprs in the metadata.
|
||||
void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
|
||||
unsigned Val) {
|
||||
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
|
||||
}
|
||||
|
||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||
// shader of the given calling convention.
|
||||
void AMDGPUPALMetadata::setWave32(unsigned CC) {
|
||||
|
|
|
@ -80,6 +80,21 @@ public:
|
|||
// Set the stack frame size of a function in the metadata.
|
||||
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
|
||||
|
||||
// Set the amount of LDS used in bytes in the metadata. This is an optional
|
||||
// advisory record for logging etc; wave dispatch actually uses the rsrc1
|
||||
// register for the shader stage to determine the amount of LDS to allocate.
|
||||
void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
|
||||
|
||||
// Set the number of used vgprs in the metadata. This is an optional advisory
|
||||
// record for logging etc; wave dispatch actually uses the rsrc1 register for
|
||||
// the shader stage to determine the number of vgprs to allocate.
|
||||
void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
|
||||
|
||||
// Set the number of used sgprs in the metadata. This is an optional advisory
|
||||
// record for logging etc; wave dispatch actually uses the rsrc1 register for
|
||||
// the shader stage to determine the number of sgprs to allocate.
|
||||
void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
|
||||
|
||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||
// shader of the given calling convention.
|
||||
void setWave32(unsigned CC);
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
|
||||
|
||||
; Make sure this interacts well with -amdgpu-fixed-function-abi
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
||||
|
||||
declare float @extern_func(float) #0
|
||||
declare float @extern_func_many_args(<64 x float>) #0
|
||||
|
@ -147,40 +147,92 @@ attributes #0 = { nounwind }
|
|||
|
||||
; GCN: amdpal.pipelines:
|
||||
; GCN-NEXT: - .registers:
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x24{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GCN-NEXT: dynamic_stack_loop:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; SDAG-NEXT: .sgpr_count: 0x22{{$}}
|
||||
; GISEL-NEXT: .sgpr_count: 0x24{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
|
||||
; GCN-NEXT: multiple_stack:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GCN-NEXT: no_stack:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||
; GCN-NEXT: no_stack_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||
; GCN-NEXT: no_stack_extern_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: no_stack_extern_call_many_args:
|
||||
; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
||||
; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: no_stack_indirect_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: simple_lds:
|
||||
; GCN-NEXT: .lds_size: 0x100{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||
; GCN-NEXT: simple_lds_recurse:
|
||||
; GCN-NEXT: .lds_size: 0x100{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: simple_stack:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x2{{$}}
|
||||
; GCN-NEXT: simple_stack_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GCN-NEXT: .sgpr_count: 0x22{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GCN-NEXT: simple_stack_extern_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: simple_stack_indirect_call:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: simple_stack_recurse:
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||
; GCN-NEXT: ...
|
||||
|
|
Loading…
Reference in New Issue