[AMDGPU] Mark amdgpu_gfx functions as module entry function

- Allows lds allocations
- Writes resource usage into COMPUTE_PGM_RSRC1 registers in PAL metadata

Differential Revision: https://reviews.llvm.org/D92946
This commit is contained in:
Sebastian Neubauer 2020-12-09 17:25:53 +01:00
parent 1c6bc2c0b5
commit 5733167f54
10 changed files with 94 additions and 33 deletions

View File

@ -446,7 +446,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(ConfigSection);
}
if (MFI->isEntryFunction()) {
if (MFI->isModuleEntryFunction()) {
getSIProgramInfo(CurrentProgramInfo, MF);
} else {
auto I = CallGraphResourceInfo.insert(
@ -459,7 +459,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (STM.isAmdPalOS()) {
if (MFI->isEntryFunction())
EmitPALMetadata(MF, CurrentProgramInfo);
else
else if (MFI->isModuleEntryFunction())
emitPALFunctionMetadata(MF);
} else if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
@ -922,7 +922,22 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
const Function *Callee = getCalleeFunction(*CalleeOp);
if (!Callee || Callee->isDeclaration()) {
DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
CallGraphResourceInfo.end();
bool IsExternal = !Callee || Callee->isDeclaration();
if (!IsExternal)
I = CallGraphResourceInfo.find(Callee);
if (IsExternal || I == CallGraphResourceInfo.end()) {
// Avoid crashing on undefined behavior with an illegal call to a
// kernel. If a callsite's calling convention doesn't match the
// function's, it's undefined behavior. If the callsite calling
// convention does match, that would have errored earlier.
// FIXME: The verifier shouldn't allow this.
if (!IsExternal &&
AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
report_fatal_error("invalid call to entry function");
// If this is a call to an external function, we can't do much. Make
// conservative guesses.
@ -943,19 +958,6 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// We force CodeGen to run in SCC order, so the callee's register
// usage etc. should be the cumulative usage of all callees.
auto I = CallGraphResourceInfo.find(Callee);
if (I == CallGraphResourceInfo.end()) {
// Avoid crashing on undefined behavior with an illegal call to a
// kernel. If a callsite's calling convention doesn't match the
// function's, it's undefined behavior. If the callsite calling
// convention does match, that would have errored earlier.
// FIXME: The verifier shouldn't allow this.
if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
report_fatal_error("invalid call to entry function");
llvm_unreachable("callee should have been handled before caller");
}
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
@ -1266,7 +1268,11 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
MD->setStackFrameSize(MF, MFI.getStackSize());
MD->setFunctionScratchSize(MF, MFI.getStackSize());
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
}
// This is supposed to be log2(Size)

View File

@ -1301,7 +1301,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
if (!MFI->isModuleEntryFunction()) {
SDLoc DL(Op);
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(

View File

@ -2260,7 +2260,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
if (!MFI->isModuleEntryFunction()) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),

View File

@ -13,11 +13,13 @@
using namespace llvm;
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
Mode(MF.getFunction()),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
: MachineFunctionInfo(), Mode(MF.getFunction()),
IsEntryFunction(
AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
IsModuleEntryFunction(
AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,

View File

@ -44,10 +44,13 @@ protected:
// State of MODE register, assumed FP mode.
AMDGPU::SIModeRegisterDefaults Mode;
// Kernels + shaders. i.e. functions called by the driver and not called
// Kernels + shaders. i.e. functions called by the hardware and not called
// by other functions.
bool IsEntryFunction = false;
// Entry points called by other functions instead of directly by the hardware.
bool IsModuleEntryFunction = false;
bool NoSignedZerosFPMath = false;
// Function may be memory bound.
@ -77,6 +80,8 @@ public:
return IsEntryFunction;
}
bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
bool hasNoSignedZerosFPMath() const {
return NoSignedZerosFPMath;
}

View File

@ -1068,6 +1068,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
}
}
bool isModuleEntryFunctionCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::AMDGPU_Gfx:
return true;
default:
return isEntryFunctionCC(CC);
}
}
bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}

View File

@ -576,6 +576,15 @@ bool isCompute(CallingConv::ID CC);
LLVM_READNONE
bool isEntryFunctionCC(CallingConv::ID CC);
// These functions are considered entrypoints into the current module, i.e. they
// are allowed to be called from outside the current module. This is different
// from isEntryFunctionCC, which is only true for functions that are entered by
// the hardware. Module entry points include all entry functions but also
// include functions that can be called from other functions inside or outside
// the current module. Module entry functions are allowed to allocate LDS.
LLVM_READNONE
bool isModuleEntryFunctionCC(CallingConv::ID CC);
// FIXME: Remove this when calling conventions cleaned up
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {

View File

@ -238,12 +238,11 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
// Set the scratch size in the metadata.
void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF,
unsigned Val) {
auto Node = MsgPackDoc.getMapNode();
// Set the stack frame size of a function in the metadata.
void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
unsigned Val) {
auto Node = getShaderFunction(MF.getFunction().getName());
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
getShaderFunctions()[MF.getFunction().getName()] = Node;
}
// Set the hardware register bit in PAL metadata to enable wave32 on the
@ -747,6 +746,12 @@ msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
return ShaderFunctions.getMap();
}
// Get (create if necessary) a function in the shader functions map.
msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
auto Functions = getShaderFunctions();
return Functions[Name].getMap(/*Convert=*/true);
}
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {

View File

@ -79,7 +79,7 @@ public:
void setScratchSize(unsigned CC, unsigned Val);
// Set the stack frame size of a function in the metadata.
void setStackFrameSize(const MachineFunction &MF, unsigned Val);
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
@ -130,6 +130,9 @@ private:
// Get (create if necessary) the shader functions map.
msgpack::MapDocNode getShaderFunctions();
// Get (create if necessary) a function in the shader functions map.
msgpack::MapDocNode getShaderFunction(StringRef Name);
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);

View File

@ -1,5 +1,4 @@
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
@ -126,10 +125,29 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
ret float %add
}
@lds = internal addrspace(3) global [64 x float] undef
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
%val = load float, float addrspace(3)* %lds_ptr
ret float %val
}
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
%val = load float, float addrspace(3)* %lds_ptr
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
ret float %res
}
attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers: {}
; GCN-NEXT: - .registers:
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01cf{{$}}
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
@ -148,6 +166,10 @@ attributes #0 = { nounwind }
; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: simple_lds:
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: simple_lds_recurse:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: simple_stack_call: