forked from OSchip/llvm-project
[AMDGPU] Emit kernel code properties as code object metadata
- These are not required for low level runtime Differential Revision: https://reviews.llvm.org/D29949 llvm-svn: 298556
This commit is contained in:
parent
fd8510cfec
commit
ca0e7f6472
|
@ -21,7 +21,6 @@
|
|||
#include "InstPrinter/AMDGPUInstPrinter.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
@ -145,14 +144,19 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
|||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
amd_kernel_code_t KernelCode;
|
||||
if (STM.isAmdCodeObjectV2(*MF)) {
|
||||
getSIProgramInfo(KernelInfo, *MF);
|
||||
EmitAmdKernelCodeT(*MF, KernelInfo);
|
||||
getAmdKernelCode(KernelCode, KernelInfo, *MF);
|
||||
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
|
||||
getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction());
|
||||
getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(),
|
||||
KernelCode);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
||||
|
@ -732,94 +736,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
|
|||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) const {
|
||||
void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
||||
const SIProgramInfo &KernelInfo,
|
||||
const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
|
||||
amd_kernel_code_t header;
|
||||
|
||||
AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
|
||||
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
|
||||
|
||||
header.compute_pgm_resource_registers =
|
||||
Out.compute_pgm_resource_registers =
|
||||
KernelInfo.ComputePGMRSrc1 |
|
||||
(KernelInfo.ComputePGMRSrc2 << 32);
|
||||
header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
|
||||
Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
|
||||
|
||||
|
||||
AMD_HSA_BITS_SET(header.code_properties,
|
||||
AMD_HSA_BITS_SET(Out.code_properties,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
|
||||
getElementByteSizeValue(STM.getMaxPrivateElementSize()));
|
||||
|
||||
if (MFI->hasPrivateSegmentBuffer()) {
|
||||
header.code_properties |=
|
||||
Out.code_properties |=
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
|
||||
}
|
||||
|
||||
if (MFI->hasDispatchPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
|
||||
if (MFI->hasQueuePtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
|
||||
|
||||
if (MFI->hasKernargSegmentPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
|
||||
|
||||
if (MFI->hasDispatchID())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
|
||||
|
||||
if (MFI->hasFlatScratchInit())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
|
||||
|
||||
// TODO: Private segment size
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
|
||||
|
||||
if (MFI->hasGridWorkgroupCountX()) {
|
||||
header.code_properties |=
|
||||
Out.code_properties |=
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
|
||||
}
|
||||
|
||||
if (MFI->hasGridWorkgroupCountY()) {
|
||||
header.code_properties |=
|
||||
Out.code_properties |=
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
|
||||
}
|
||||
|
||||
if (MFI->hasGridWorkgroupCountZ()) {
|
||||
header.code_properties |=
|
||||
Out.code_properties |=
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
|
||||
}
|
||||
|
||||
if (MFI->hasDispatchPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
|
||||
if (STM.debuggerSupported())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
|
||||
|
||||
if (STM.isXNACKEnabled())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
|
||||
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
|
||||
|
||||
// FIXME: Should use getKernArgSize
|
||||
header.kernarg_segment_byte_size =
|
||||
Out.kernarg_segment_byte_size =
|
||||
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
|
||||
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
|
||||
header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
|
||||
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
|
||||
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
|
||||
Out.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
Out.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
|
||||
Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
|
||||
Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
|
||||
Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
|
||||
|
||||
// These alignment values are specified in powers of two, so alignment =
|
||||
// 2^n. The minimum alignment is 2^4 = 16.
|
||||
header.kernarg_segment_alignment = std::max((size_t)4,
|
||||
Out.kernarg_segment_alignment = std::max((size_t)4,
|
||||
countTrailingZeros(MFI->getMaxKernArgAlign()));
|
||||
|
||||
if (STM.debuggerEmitPrologue()) {
|
||||
header.debug_wavefront_private_segment_offset_sgpr =
|
||||
Out.debug_wavefront_private_segment_offset_sgpr =
|
||||
KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
|
||||
header.debug_private_segment_buffer_sgpr =
|
||||
Out.debug_private_segment_buffer_sgpr =
|
||||
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
|
||||
}
|
||||
|
||||
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
|
||||
getTargetStreamer().EmitAMDKernelCodeT(header);
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
|
||||
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include <cstddef>
|
||||
|
@ -89,6 +90,8 @@ private:
|
|||
};
|
||||
|
||||
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
|
||||
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
|
||||
const MachineFunction &MF) const;
|
||||
void findNumUsedRegistersSI(const MachineFunction &MF,
|
||||
unsigned &NumSGPR,
|
||||
unsigned &NumVGPR) const;
|
||||
|
@ -97,8 +100,6 @@ private:
|
|||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfoR600(const MachineFunction &MF);
|
||||
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
|
||||
void EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) const;
|
||||
|
||||
public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
|
|
|
@ -274,6 +274,81 @@ struct Metadata final {
|
|||
|
||||
} // end namespace Arg
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Kernel Code Properties Metadata.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace CodeProps {
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Kernel::CodeProps::mKernargSegmentSize.
|
||||
constexpr char KernargSegmentSize[] = "KernargSegmentSize";
|
||||
/// \brief Key for Kernel::CodeProps::mWorkgroupGroupSegmentSize.
|
||||
constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
|
||||
/// \brief Key for Kernel::CodeProps::mWorkitemPrivateSegmentSize.
|
||||
constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
|
||||
/// \brief Key for Kernel::CodeProps::mWavefrontNumSGPRs.
|
||||
constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
|
||||
/// \brief Key for Kernel::CodeProps::mWorkitemNumVGPRs.
|
||||
constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
|
||||
/// \brief Key for Kernel::CodeProps::mKernargSegmentAlign.
|
||||
constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
|
||||
/// \brief Key for Kernel::CodeProps::mGroupSegmentAlign.
|
||||
constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
|
||||
/// \brief Key for Kernel::CodeProps::mPrivateSegmentAlign.
|
||||
constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
|
||||
/// \brief Key for Kernel::CodeProps::mWavefrontSize.
|
||||
constexpr char WavefrontSize[] = "WavefrontSize";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel code properties metadata.
|
||||
struct Metadata final {
|
||||
/// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
|
||||
/// holds the values of the arguments to the kernel. Optional.
|
||||
uint64_t mKernargSegmentSize = 0;
|
||||
/// \brief Size in bytes of the group segment memory required by a workgroup.
|
||||
/// This value does not include any dynamically allocated group segment memory
|
||||
/// that may be added when the kernel is dispatched. Optional.
|
||||
uint32_t mWorkgroupGroupSegmentSize = 0;
|
||||
/// \brief Size in bytes of the private segment memory required by a workitem.
|
||||
/// Private segment memory includes arg, spill and private segments. Optional.
|
||||
uint32_t mWorkitemPrivateSegmentSize = 0;
|
||||
/// \brief Total number of SGPRs used by a wavefront. Optional.
|
||||
uint16_t mWavefrontNumSGPRs = 0;
|
||||
/// \brief Total number of VGPRs used by a workitem. Optional.
|
||||
uint16_t mWorkitemNumVGPRs = 0;
|
||||
/// \brief Maximum byte alignment of variables used by the kernel in the
|
||||
/// kernarg memory segment. Expressed as a power of two. Optional.
|
||||
uint8_t mKernargSegmentAlign = 0;
|
||||
/// \brief Maximum byte alignment of variables used by the kernel in the
|
||||
/// group memory segment. Expressed as a power of two. Optional.
|
||||
uint8_t mGroupSegmentAlign = 0;
|
||||
/// \brief Maximum byte alignment of variables used by the kernel in the
|
||||
/// private memory segment. Expressed as a power of two. Optional.
|
||||
uint8_t mPrivateSegmentAlign = 0;
|
||||
/// \brief Wavefront size. Expressed as a power of two. Optional.
|
||||
uint8_t mWavefrontSize = 0;
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
|
||||
/// \returns True if kernel code properties metadata is empty, false
|
||||
/// otherwise.
|
||||
bool empty() const {
|
||||
return !notEmpty();
|
||||
}
|
||||
|
||||
/// \returns True if kernel code properties metadata is not empty, false
|
||||
/// otherwise.
|
||||
bool notEmpty() const {
|
||||
return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
|
||||
mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
|
||||
mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
|
||||
mPrivateSegmentAlign || mWavefrontSize;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace CodeProps
|
||||
|
||||
namespace Key {
|
||||
/// \brief Key for Kernel::Metadata::mName.
|
||||
constexpr char Name[] = "Name";
|
||||
|
@ -285,6 +360,8 @@ constexpr char LanguageVersion[] = "LanguageVersion";
|
|||
constexpr char Attrs[] = "Attrs";
|
||||
/// \brief Key for Kernel::Metadata::mArgs.
|
||||
constexpr char Args[] = "Args";
|
||||
/// \brief Key for Kernel::Metadata::mCodeProps.
|
||||
constexpr char CodeProps[] = "CodeProps";
|
||||
} // end namespace Key
|
||||
|
||||
/// \brief In-memory representation of kernel metadata.
|
||||
|
@ -299,6 +376,8 @@ struct Metadata final {
|
|||
Attrs::Metadata mAttrs = Attrs::Metadata();
|
||||
/// \brief Arguments metadata. Optional.
|
||||
std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
|
||||
/// \brief Code properties metadata. Optional.
|
||||
CodeProps::Metadata mCodeProps = CodeProps::Metadata();
|
||||
|
||||
/// \brief Default constructor.
|
||||
Metadata() = default;
|
||||
|
|
|
@ -153,6 +153,30 @@ struct MappingTraits<Kernel::Arg::Metadata> {
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Kernel::CodeProps::Metadata> {
|
||||
static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
|
||||
MD.mKernargSegmentSize, uint64_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
|
||||
MD.mWorkgroupGroupSegmentSize, uint32_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
|
||||
MD.mWorkitemPrivateSegmentSize, uint32_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
|
||||
MD.mWavefrontNumSGPRs, uint16_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
|
||||
MD.mWorkitemNumVGPRs, uint16_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
|
||||
MD.mKernargSegmentAlign, uint8_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
|
||||
MD.mGroupSegmentAlign, uint8_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
|
||||
MD.mPrivateSegmentAlign, uint8_t(0));
|
||||
YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
|
||||
MD.mWavefrontSize, uint8_t(0));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Kernel::Metadata> {
|
||||
static void mapping(IO &YIO, Kernel::Metadata &MD) {
|
||||
|
@ -164,6 +188,8 @@ struct MappingTraits<Kernel::Metadata> {
|
|||
YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
|
||||
if (!MD.mArgs.empty() || !YIO.outputting())
|
||||
YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
|
||||
if (!MD.mCodeProps.empty() || !YIO.outputting())
|
||||
YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -531,13 +557,31 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
|
|||
Arg.mTypeName = TypeName;
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelCodeProps(
|
||||
const amd_kernel_code_t &KernelCode) {
|
||||
auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps;
|
||||
|
||||
CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size;
|
||||
CodeProps.mWorkgroupGroupSegmentSize =
|
||||
KernelCode.workgroup_group_segment_byte_size;
|
||||
CodeProps.mWorkitemPrivateSegmentSize =
|
||||
KernelCode.workitem_private_segment_byte_size;
|
||||
CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count;
|
||||
CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count;
|
||||
CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment;
|
||||
CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment;
|
||||
CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment;
|
||||
CodeProps.mWavefrontSize = KernelCode.wavefront_size;
|
||||
}
|
||||
|
||||
void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) {
|
||||
emitVersion();
|
||||
emitIsa(Features);
|
||||
emitPrintf(Mod);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernel(const Function &Func) {
|
||||
void MetadataStreamer::emitKernel(const Function &Func,
|
||||
const amd_kernel_code_t &KernelCode) {
|
||||
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
|
||||
|
@ -548,6 +592,7 @@ void MetadataStreamer::emitKernel(const Function &Func) {
|
|||
emitKernelLanguage(Func);
|
||||
emitKernelAttrs(Func);
|
||||
emitKernelArgs(Func);
|
||||
emitKernelCodeProps(KernelCode);
|
||||
}
|
||||
|
||||
ErrorOr<std::string> MetadataStreamer::toYamlString() {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
|
||||
|
||||
#include "AMDGPUCodeObjectMetadata.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
|
||||
|
@ -72,6 +73,9 @@ private:
|
|||
StringRef TypeQual = "", StringRef BaseTypeName = "",
|
||||
StringRef AccQual = "", StringRef Name = "",
|
||||
StringRef TypeName = "");
|
||||
|
||||
void emitKernelCodeProps(const amd_kernel_code_t &KernelCode);
|
||||
|
||||
public:
|
||||
MetadataStreamer() = default;
|
||||
~MetadataStreamer() = default;
|
||||
|
@ -80,7 +84,7 @@ public:
|
|||
|
||||
void end() {}
|
||||
|
||||
void emitKernel(const Function &Func);
|
||||
void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode);
|
||||
|
||||
ErrorOr<std::string> toYamlString();
|
||||
|
||||
|
|
|
@ -47,8 +47,9 @@ void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(
|
|||
CodeObjectMetadataStreamer.begin(Features, Mod);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) {
|
||||
CodeObjectMetadataStreamer.emitKernel(Func);
|
||||
void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(
|
||||
const Function &Func, const amd_kernel_code_t &KernelCode) {
|
||||
CodeObjectMetadataStreamer.emitKernel(Func, KernelCode);
|
||||
}
|
||||
|
||||
void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata(
|
||||
|
|
|
@ -52,7 +52,8 @@ public:
|
|||
virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features,
|
||||
const Module &Mod);
|
||||
|
||||
virtual void EmitKernelCodeObjectMetadata(const Function &Func);
|
||||
virtual void EmitKernelCodeObjectMetadata(
|
||||
const Function &Func, const amd_kernel_code_t &KernelCode);
|
||||
|
||||
virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features);
|
||||
|
||||
|
|
|
@ -1274,8 +1274,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
|
|||
; NOTES-NEXT: Owner Data size Description
|
||||
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
|
||||
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
|
||||
; GFX700: AMD 0x000078b2 Unknown note type: (0x0000000a)
|
||||
; GFX800: AMD 0x000078b2 Unknown note type: (0x0000000a)
|
||||
; GFX900: AMD 0x000078b3 Unknown note type: (0x0000000a)
|
||||
; GFX700: AMD 0x0000928a Unknown note type: (0x0000000a)
|
||||
; GFX800: AMD 0x000092a9 Unknown note type: (0x0000000a)
|
||||
; GFX900: AMD 0x0000928b Unknown note type: (0x0000000a)
|
||||
|
||||
; PARSER: AMDGPU Code Object Metadata Parser Test: PASS
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: Version: [ 1, 0 ]
|
||||
|
||||
; CHECK: Kernels:
|
||||
; CHECK: - Name: test
|
||||
; CHECK: CodeProps:
|
||||
; CHECK: KernargSegmentSize: 24
|
||||
; GFX700: WavefrontNumSGPRs: 6
|
||||
; GFX800: WavefrontNumSGPRs: 96
|
||||
; GFX900: WavefrontNumSGPRs: 6
|
||||
; GFX700: WorkitemNumVGPRs: 4
|
||||
; GFX800: WorkitemNumVGPRs: 6
|
||||
; GFX900: WorkitemNumVGPRs: 6
|
||||
; CHECK: KernargSegmentAlign: 4
|
||||
; CHECK: GroupSegmentAlign: 4
|
||||
; CHECK: PrivateSegmentAlign: 4
|
||||
; CHECK: WavefrontSize: 6
|
||||
define amdgpu_kernel void @test(
|
||||
half addrspace(1)* %r,
|
||||
half addrspace(1)* %a,
|
||||
half addrspace(1)* %b) {
|
||||
entry:
|
||||
%a.val = load half, half addrspace(1)* %a
|
||||
%b.val = load half, half addrspace(1)* %b
|
||||
%r.val = fadd half %a.val, %b.val
|
||||
store half %r.val, half addrspace(1)* %r
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_code_object_metadata
|
||||
// CHECK: Version: [ 1, 0 ]
|
||||
// CHECK: Kernels:
|
||||
// CHECK: - Name: test_kernel
|
||||
// CHECK: CodeProps:
|
||||
// CHECK: KernargSegmentSize: 24
|
||||
// CHECK: WorkitemPrivateSegmentSize: 16
|
||||
// CHECK: WavefrontNumSGPRs: 6
|
||||
// CHECK: WorkitemNumVGPRs: 12
|
||||
.amdgpu_code_object_metadata
|
||||
Version: [ 1, 0 ]
|
||||
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
|
||||
Kernels:
|
||||
- Name: test_kernel
|
||||
CodeProps:
|
||||
KernargSegmentSize: 24
|
||||
WorkitemPrivateSegmentSize: 16
|
||||
WavefrontNumSGPRs: 6
|
||||
WorkitemNumVGPRs: 12
|
||||
.end_amdgpu_code_object_metadata
|
Loading…
Reference in New Issue