forked from OSchip/llvm-project
[AMDGPU] Mark amdgpu_gfx functions as module entry function
- Allows lds allocations - Writes resource usage into COMPUTE_PGM_RSRC1 registers in PAL metadata Differential Revision: https://reviews.llvm.org/D92946
This commit is contained in:
parent
1c6bc2c0b5
commit
5733167f54
|
@ -446,7 +446,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
OutStreamer->SwitchSection(ConfigSection);
|
||||
}
|
||||
|
||||
if (MFI->isEntryFunction()) {
|
||||
if (MFI->isModuleEntryFunction()) {
|
||||
getSIProgramInfo(CurrentProgramInfo, MF);
|
||||
} else {
|
||||
auto I = CallGraphResourceInfo.insert(
|
||||
|
@ -459,7 +459,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (STM.isAmdPalOS()) {
|
||||
if (MFI->isEntryFunction())
|
||||
EmitPALMetadata(MF, CurrentProgramInfo);
|
||||
else
|
||||
else if (MFI->isModuleEntryFunction())
|
||||
emitPALFunctionMetadata(MF);
|
||||
} else if (!STM.isAmdHsaOS()) {
|
||||
EmitProgramInfoSI(MF, CurrentProgramInfo);
|
||||
|
@ -922,7 +922,22 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
|||
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
|
||||
|
||||
const Function *Callee = getCalleeFunction(*CalleeOp);
|
||||
if (!Callee || Callee->isDeclaration()) {
|
||||
DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
|
||||
CallGraphResourceInfo.end();
|
||||
bool IsExternal = !Callee || Callee->isDeclaration();
|
||||
if (!IsExternal)
|
||||
I = CallGraphResourceInfo.find(Callee);
|
||||
|
||||
if (IsExternal || I == CallGraphResourceInfo.end()) {
|
||||
// Avoid crashing on undefined behavior with an illegal call to a
|
||||
// kernel. If a callsite's calling convention doesn't match the
|
||||
// function's, it's undefined behavior. If the callsite calling
|
||||
// convention does match, that would have errored earlier.
|
||||
// FIXME: The verifier shouldn't allow this.
|
||||
if (!IsExternal &&
|
||||
AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
|
||||
report_fatal_error("invalid call to entry function");
|
||||
|
||||
// If this is a call to an external function, we can't do much. Make
|
||||
// conservative guesses.
|
||||
|
||||
|
@ -943,19 +958,6 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
|||
// We force CodeGen to run in SCC order, so the callee's register
|
||||
// usage etc. should be the cumulative usage of all callees.
|
||||
|
||||
auto I = CallGraphResourceInfo.find(Callee);
|
||||
if (I == CallGraphResourceInfo.end()) {
|
||||
// Avoid crashing on undefined behavior with an illegal call to a
|
||||
// kernel. If a callsite's calling convention doesn't match the
|
||||
// function's, it's undefined behavior. If the callsite calling
|
||||
// convention does match, that would have errored earlier.
|
||||
// FIXME: The verifier shouldn't allow this.
|
||||
if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
|
||||
report_fatal_error("invalid call to entry function");
|
||||
|
||||
llvm_unreachable("callee should have been handled before caller");
|
||||
}
|
||||
|
||||
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
|
||||
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
|
||||
MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
|
||||
|
@ -1266,7 +1268,11 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
|
|||
void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
|
||||
auto *MD = getTargetStreamer()->getPALMetadata();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
MD->setStackFrameSize(MF, MFI.getStackSize());
|
||||
MD->setFunctionScratchSize(MF, MFI.getStackSize());
|
||||
// Set compute registers
|
||||
MD->setRsrc1(CallingConv::AMDGPU_CS,
|
||||
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
|
||||
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
|
||||
}
|
||||
|
||||
// This is supposed to be log2(Size)
|
||||
|
|
|
@ -1301,7 +1301,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
|||
|
||||
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
|
||||
if (!MFI->isEntryFunction()) {
|
||||
if (!MFI->isModuleEntryFunction()) {
|
||||
SDLoc DL(Op);
|
||||
const Function &Fn = DAG.getMachineFunction().getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
|
|
|
@ -2260,7 +2260,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
|||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
|
||||
if (!MFI->isEntryFunction()) {
|
||||
if (!MFI->isModuleEntryFunction()) {
|
||||
const Function &Fn = MF.getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||
MachineFunctionInfo(),
|
||||
Mode(MF.getFunction()),
|
||||
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
|
||||
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
|
||||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
|
||||
: MachineFunctionInfo(), Mode(MF.getFunction()),
|
||||
IsEntryFunction(
|
||||
AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
|
||||
IsModuleEntryFunction(
|
||||
AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
|
||||
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
|
||||
|
||||
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
|
||||
|
|
|
@ -44,10 +44,13 @@ protected:
|
|||
// State of MODE register, assumed FP mode.
|
||||
AMDGPU::SIModeRegisterDefaults Mode;
|
||||
|
||||
// Kernels + shaders. i.e. functions called by the driver and not called
|
||||
// Kernels + shaders. i.e. functions called by the hardware and not called
|
||||
// by other functions.
|
||||
bool IsEntryFunction = false;
|
||||
|
||||
// Entry points called by other functions instead of directly by the hardware.
|
||||
bool IsModuleEntryFunction = false;
|
||||
|
||||
bool NoSignedZerosFPMath = false;
|
||||
|
||||
// Function may be memory bound.
|
||||
|
@ -77,6 +80,8 @@ public:
|
|||
return IsEntryFunction;
|
||||
}
|
||||
|
||||
bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
|
||||
|
||||
bool hasNoSignedZerosFPMath() const {
|
||||
return NoSignedZerosFPMath;
|
||||
}
|
||||
|
|
|
@ -1068,6 +1068,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
|
|||
}
|
||||
}
|
||||
|
||||
bool isModuleEntryFunctionCC(CallingConv::ID CC) {
|
||||
switch (CC) {
|
||||
case CallingConv::AMDGPU_Gfx:
|
||||
return true;
|
||||
default:
|
||||
return isEntryFunctionCC(CC);
|
||||
}
|
||||
}
|
||||
|
||||
bool hasXNACK(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
|
||||
}
|
||||
|
|
|
@ -576,6 +576,15 @@ bool isCompute(CallingConv::ID CC);
|
|||
LLVM_READNONE
|
||||
bool isEntryFunctionCC(CallingConv::ID CC);
|
||||
|
||||
// These functions are considered entrypoints into the current module, i.e. they
|
||||
// are allowed to be called from outside the current module. This is different
|
||||
// from isEntryFunctionCC, which is only true for functions that are entered by
|
||||
// the hardware. Module entry points include all entry functions but also
|
||||
// include functions that can be called from other functions inside or outside
|
||||
// the current module. Module entry functions are allowed to allocate LDS.
|
||||
LLVM_READNONE
|
||||
bool isModuleEntryFunctionCC(CallingConv::ID CC);
|
||||
|
||||
// FIXME: Remove this when calling conventions cleaned up
|
||||
LLVM_READNONE
|
||||
inline bool isKernel(CallingConv::ID CC) {
|
||||
|
|
|
@ -238,12 +238,11 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
|
|||
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
|
||||
}
|
||||
|
||||
// Set the scratch size in the metadata.
|
||||
void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF,
|
||||
unsigned Val) {
|
||||
auto Node = MsgPackDoc.getMapNode();
|
||||
// Set the stack frame size of a function in the metadata.
|
||||
void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
|
||||
unsigned Val) {
|
||||
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
|
||||
getShaderFunctions()[MF.getFunction().getName()] = Node;
|
||||
}
|
||||
|
||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||
|
@ -747,6 +746,12 @@ msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
|
|||
return ShaderFunctions.getMap();
|
||||
}
|
||||
|
||||
// Get (create if necessary) a function in the shader functions map.
|
||||
msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
|
||||
auto Functions = getShaderFunctions();
|
||||
return Functions[Name].getMap(/*Convert=*/true);
|
||||
}
|
||||
|
||||
// Return the PAL metadata hardware shader stage name.
|
||||
static const char *getStageName(CallingConv::ID CC) {
|
||||
switch (CC) {
|
||||
|
|
|
@ -79,7 +79,7 @@ public:
|
|||
void setScratchSize(unsigned CC, unsigned Val);
|
||||
|
||||
// Set the stack frame size of a function in the metadata.
|
||||
void setStackFrameSize(const MachineFunction &MF, unsigned Val);
|
||||
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
|
||||
|
||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||
// shader of the given calling convention.
|
||||
|
@ -130,6 +130,9 @@ private:
|
|||
// Get (create if necessary) the shader functions map.
|
||||
msgpack::MapDocNode getShaderFunctions();
|
||||
|
||||
// Get (create if necessary) a function in the shader functions map.
|
||||
msgpack::MapDocNode getShaderFunction(StringRef Name);
|
||||
|
||||
// Get (create if necessary) the .hardware_stages entry for the given calling
|
||||
// convention.
|
||||
msgpack::MapDocNode getHwStage(unsigned CC);
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
|
||||
|
||||
|
@ -126,10 +125,29 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
|
|||
ret float %add
|
||||
}
|
||||
|
||||
@lds = internal addrspace(3) global [64 x float] undef
|
||||
|
||||
define amdgpu_gfx float @simple_lds(float %arg0) #0 {
|
||||
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
|
||||
%val = load float, float addrspace(3)* %lds_ptr
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
|
||||
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
|
||||
%val = load float, float addrspace(3)* %lds_ptr
|
||||
%res = call amdgpu_gfx float @simple_lds_recurse(float %val)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
; GCN: amdpal.pipelines:
|
||||
; GCN-NEXT: - .registers: {}
|
||||
; GCN-NEXT: - .registers:
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
||||
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01cf{{$}}
|
||||
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
|
@ -148,6 +166,10 @@ attributes #0 = { nounwind }
|
|||
; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
|
||||
; GCN-NEXT: no_stack_indirect_call:
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: simple_lds:
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||
; GCN-NEXT: simple_lds_recurse:
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; GCN-NEXT: simple_stack:
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
|
||||
; GCN-NEXT: simple_stack_call:
|
||||
|
|
Loading…
Reference in New Issue