forked from OSchip/llvm-project
[AMDGPU] Refactor HSAMetadataStream::emitKernel (NFC)
Move all metadata construction into AMDGPUHSAMetadataStreamer. Differential Revision: https://reviews.llvm.org/D48176 llvm-svn: 336707
This commit is contained in:
parent
dd4d2ac607
commit
2ad2c18b82
|
@ -207,9 +207,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
|||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
HSAMetadataStream.emitKernel(MF->getFunction(),
|
||||
getHSACodeProps(*MF, CurrentProgramInfo),
|
||||
getHSADebugProps(*MF, CurrentProgramInfo));
|
||||
HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
|
||||
|
@ -1197,57 +1195,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
|||
}
|
||||
}
|
||||
|
||||
AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
|
||||
const Function &F = MF.getFunction();
|
||||
|
||||
// Avoid asserting on erroneous cases.
|
||||
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return HSACodeProps;
|
||||
|
||||
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
|
||||
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
|
||||
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
|
||||
HSACodeProps.mKernargSegmentAlign =
|
||||
std::max(uint32_t(4), MFI.getMaxKernArgAlign());
|
||||
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
|
||||
HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR;
|
||||
HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR;
|
||||
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
|
||||
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
|
||||
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
|
||||
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
|
||||
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
|
||||
|
||||
return HSACodeProps;
|
||||
}
|
||||
|
||||
AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
|
||||
|
||||
if (!STM.debuggerSupported())
|
||||
return HSADebugProps;
|
||||
|
||||
HSADebugProps.mDebuggerABIVersion.push_back(1);
|
||||
HSADebugProps.mDebuggerABIVersion.push_back(0);
|
||||
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
HSADebugProps.mPrivateSegmentBufferSGPR =
|
||||
ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
|
||||
HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
|
||||
ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
|
||||
}
|
||||
|
||||
return HSADebugProps;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
unsigned AsmVariant,
|
||||
const char *ExtraCode, raw_ostream &O) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "AMDGPU.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h"
|
||||
#include "SIProgramInfo.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
||||
|
@ -52,60 +53,6 @@ private:
|
|||
int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
|
||||
};
|
||||
|
||||
// Track resource usage for kernels / entry functions.
|
||||
struct SIProgramInfo {
|
||||
// Fields set in PGM_RSRC1 pm4 packet.
|
||||
uint32_t VGPRBlocks = 0;
|
||||
uint32_t SGPRBlocks = 0;
|
||||
uint32_t Priority = 0;
|
||||
uint32_t FloatMode = 0;
|
||||
uint32_t Priv = 0;
|
||||
uint32_t DX10Clamp = 0;
|
||||
uint32_t DebugMode = 0;
|
||||
uint32_t IEEEMode = 0;
|
||||
uint64_t ScratchSize = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc1 = 0;
|
||||
|
||||
// Fields set in PGM_RSRC2 pm4 packet.
|
||||
uint32_t LDSBlocks = 0;
|
||||
uint32_t ScratchBlocks = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc2 = 0;
|
||||
|
||||
uint32_t NumVGPR = 0;
|
||||
uint32_t NumSGPR = 0;
|
||||
uint32_t LDSSize = 0;
|
||||
bool FlatUsed = false;
|
||||
|
||||
// Number of SGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumSGPRsForWavesPerEU = 0;
|
||||
|
||||
// Number of VGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumVGPRsForWavesPerEU = 0;
|
||||
|
||||
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
||||
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
|
||||
// used or not known.
|
||||
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
|
||||
// kernel execution, or std::numeric_limits<uint16_t>::max() if the register
|
||||
// is not used or not known.
|
||||
uint16_t DebuggerPrivateSegmentBufferSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Whether there is recursion, dynamic allocas, indirect calls or some other
|
||||
// reason there may be statically unknown stack usage.
|
||||
bool DynamicCallStack = false;
|
||||
|
||||
// Bonus information for debugging.
|
||||
bool VCCUsed = false;
|
||||
|
||||
SIProgramInfo() = default;
|
||||
};
|
||||
|
||||
SIProgramInfo CurrentProgramInfo;
|
||||
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
|
||||
|
||||
|
@ -123,13 +70,6 @@ private:
|
|||
unsigned &NumSGPR,
|
||||
unsigned &NumVGPR) const;
|
||||
|
||||
AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
|
||||
/// Emit register usage information so that the GPU driver
|
||||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfoSI(const MachineFunction &MF,
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUHSAMetadataStreamer.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIProgramInfo.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
|
@ -196,6 +200,57 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
|
|||
return Dims;
|
||||
}
|
||||
|
||||
Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
|
||||
const Function &F = MF.getFunction();
|
||||
|
||||
// Avoid asserting on erroneous cases.
|
||||
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return HSACodeProps;
|
||||
|
||||
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
|
||||
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
|
||||
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
|
||||
HSACodeProps.mKernargSegmentAlign =
|
||||
std::max(uint32_t(4), MFI.getMaxKernArgAlign());
|
||||
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
|
||||
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
|
||||
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
|
||||
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
|
||||
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
|
||||
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
|
||||
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
|
||||
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
|
||||
|
||||
return HSACodeProps;
|
||||
}
|
||||
|
||||
Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
|
||||
|
||||
if (!STM.debuggerSupported())
|
||||
return HSADebugProps;
|
||||
|
||||
HSADebugProps.mDebuggerABIVersion.push_back(1);
|
||||
HSADebugProps.mDebuggerABIVersion.push_back(0);
|
||||
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
HSADebugProps.mPrivateSegmentBufferSGPR =
|
||||
ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
|
||||
HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
|
||||
ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
|
||||
}
|
||||
|
||||
return HSADebugProps;
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitVersion() {
|
||||
auto &Version = HSAMetadata.mVersion;
|
||||
|
||||
|
@ -408,10 +463,11 @@ void MetadataStreamer::end() {
|
|||
verify(HSAMetadataString);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernel(
|
||||
const Function &Func,
|
||||
const Kernel::CodeProps::Metadata &CodeProps,
|
||||
const Kernel::DebugProps::Metadata &DebugProps) {
|
||||
void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
|
||||
auto &Func = MF.getFunction();
|
||||
auto CodeProps = getHSACodeProps(MF, ProgramInfo);
|
||||
auto DebugProps = getHSADebugProps(MF, ProgramInfo);
|
||||
|
||||
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ class DataLayout;
|
|||
class Function;
|
||||
class MDNode;
|
||||
class Module;
|
||||
class SIProgramInfo;
|
||||
class Type;
|
||||
|
||||
namespace AMDGPU {
|
||||
|
@ -55,6 +56,13 @@ private:
|
|||
|
||||
std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
|
||||
|
||||
Kernel::CodeProps::Metadata getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
Kernel::DebugProps::Metadata getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
|
||||
void emitVersion();
|
||||
|
||||
void emitPrintf(const Module &Mod);
|
||||
|
@ -87,9 +95,7 @@ public:
|
|||
|
||||
void end();
|
||||
|
||||
void emitKernel(const Function &Func,
|
||||
const Kernel::CodeProps::Metadata &CodeProps,
|
||||
const Kernel::DebugProps::Metadata &DebugProps);
|
||||
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
|
||||
};
|
||||
|
||||
} // end namespace HSAMD
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
//===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// Defines struct to track resource usage for kernels and entry functions.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// Track resource usage for kernels / entry functions.
|
||||
struct SIProgramInfo {
|
||||
// Fields set in PGM_RSRC1 pm4 packet.
|
||||
uint32_t VGPRBlocks = 0;
|
||||
uint32_t SGPRBlocks = 0;
|
||||
uint32_t Priority = 0;
|
||||
uint32_t FloatMode = 0;
|
||||
uint32_t Priv = 0;
|
||||
uint32_t DX10Clamp = 0;
|
||||
uint32_t DebugMode = 0;
|
||||
uint32_t IEEEMode = 0;
|
||||
uint64_t ScratchSize = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc1 = 0;
|
||||
|
||||
// Fields set in PGM_RSRC2 pm4 packet.
|
||||
uint32_t LDSBlocks = 0;
|
||||
uint32_t ScratchBlocks = 0;
|
||||
|
||||
uint64_t ComputePGMRSrc2 = 0;
|
||||
|
||||
uint32_t NumVGPR = 0;
|
||||
uint32_t NumSGPR = 0;
|
||||
uint32_t LDSSize = 0;
|
||||
bool FlatUsed = false;
|
||||
|
||||
// Number of SGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumSGPRsForWavesPerEU = 0;
|
||||
|
||||
// Number of VGPRs that meets number of waves per execution unit request.
|
||||
uint32_t NumVGPRsForWavesPerEU = 0;
|
||||
|
||||
// Fixed SGPR number used to hold wave scratch offset for entire kernel
|
||||
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
|
||||
// used or not known.
|
||||
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
|
||||
// kernel execution, or std::numeric_limits<uint16_t>::max() if the register
|
||||
// is not used or not known.
|
||||
uint16_t DebuggerPrivateSegmentBufferSGPR =
|
||||
std::numeric_limits<uint16_t>::max();
|
||||
|
||||
// Whether there is recursion, dynamic allocas, indirect calls or some other
|
||||
// reason there may be statically unknown stack usage.
|
||||
bool DynamicCallStack = false;
|
||||
|
||||
// Bonus information for debugging.
|
||||
bool VCCUsed = false;
|
||||
|
||||
SIProgramInfo() = default;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
|
Loading…
Reference in New Issue