[AMDGPU] Emit kernel code properties as code object metadata

- These are not required for low level runtime

Differential Revision: https://reviews.llvm.org/D29949

llvm-svn: 298556
This commit is contained in:
Konstantin Zhuravlyov 2017-03-22 22:54:39 +00:00
parent fd8510cfec
commit ca0e7f6472
10 changed files with 233 additions and 48 deletions

View File

@ -21,7 +21,6 @@
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
@ -145,14 +144,19 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
amd_kernel_code_t KernelCode;
if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
EmitAmdKernelCodeT(*MF, KernelInfo);
getAmdKernelCode(KernelCode, KernelInfo, *MF);
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
}
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction());
getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(),
KernelCode);
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
@ -732,94 +736,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const {
void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
amd_kernel_code_t header;
AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
header.compute_pgm_resource_registers =
Out.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
AMD_HSA_BITS_SET(header.code_properties,
AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize()));
if (MFI->hasPrivateSegmentBuffer()) {
header.code_properties |=
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (MFI->hasQueuePtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (MFI->hasKernargSegmentPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (MFI->hasDispatchID())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (MFI->hasFlatScratchInit())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
// TODO: Private segment size
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (MFI->hasGridWorkgroupCountX()) {
header.code_properties |=
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
}
if (MFI->hasGridWorkgroupCountY()) {
header.code_properties |=
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
}
if (MFI->hasGridWorkgroupCountZ()) {
header.code_properties |=
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
}
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.debuggerSupported())
header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
if (STM.isXNACKEnabled())
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
// FIXME: Should use getKernArgSize
header.kernarg_segment_byte_size =
Out.kernarg_segment_byte_size =
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
Out.wavefront_sgpr_count = KernelInfo.NumSGPR;
Out.workitem_vgpr_count = KernelInfo.NumVGPR;
Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
header.kernarg_segment_alignment = std::max((size_t)4,
Out.kernarg_segment_alignment = std::max((size_t)4,
countTrailingZeros(MFI->getMaxKernArgAlign()));
if (STM.debuggerEmitPrologue()) {
header.debug_wavefront_private_segment_offset_sgpr =
Out.debug_wavefront_private_segment_offset_sgpr =
KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
header.debug_private_segment_buffer_sgpr =
Out.debug_private_segment_buffer_sgpr =
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
}
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
getTargetStreamer().EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,

View File

@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include <cstddef>
@ -89,6 +90,8 @@ private:
};
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
void findNumUsedRegistersSI(const MachineFunction &MF,
unsigned &NumSGPR,
unsigned &NumVGPR) const;
@ -97,8 +100,6 @@ private:
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
void EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const;
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,

View File

@ -274,6 +274,81 @@ struct Metadata final {
} // end namespace Arg
//===----------------------------------------------------------------------===//
// Kernel Code Properties Metadata.
//===----------------------------------------------------------------------===//
namespace CodeProps {
namespace Key {
/// \brief Key for Kernel::CodeProps::mKernargSegmentSize.
constexpr char KernargSegmentSize[] = "KernargSegmentSize";
/// \brief Key for Kernel::CodeProps::mWorkgroupGroupSegmentSize.
constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
/// \brief Key for Kernel::CodeProps::mWorkitemPrivateSegmentSize.
constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
/// \brief Key for Kernel::CodeProps::mWavefrontNumSGPRs.
constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
/// \brief Key for Kernel::CodeProps::mWorkitemNumVGPRs.
constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
/// \brief Key for Kernel::CodeProps::mKernargSegmentAlign.
constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
/// \brief Key for Kernel::CodeProps::mGroupSegmentAlign.
constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
/// \brief Key for Kernel::CodeProps::mPrivateSegmentAlign.
constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
/// \brief Key for Kernel::CodeProps::mWavefrontSize.
constexpr char WavefrontSize[] = "WavefrontSize";
} // end namespace Key
/// \brief In-memory representation of kernel code properties metadata.
struct Metadata final {
/// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
/// holds the values of the arguments to the kernel. Optional.
uint64_t mKernargSegmentSize = 0;
/// \brief Size in bytes of the group segment memory required by a workgroup.
/// This value does not include any dynamically allocated group segment memory
/// that may be added when the kernel is dispatched. Optional.
uint32_t mWorkgroupGroupSegmentSize = 0;
/// \brief Size in bytes of the private segment memory required by a workitem.
/// Private segment memory includes arg, spill and private segments. Optional.
uint32_t mWorkitemPrivateSegmentSize = 0;
/// \brief Total number of SGPRs used by a wavefront. Optional.
uint16_t mWavefrontNumSGPRs = 0;
/// \brief Total number of VGPRs used by a workitem. Optional.
uint16_t mWorkitemNumVGPRs = 0;
/// \brief Maximum byte alignment of variables used by the kernel in the
/// kernarg memory segment. Expressed as a power of two. Optional.
uint8_t mKernargSegmentAlign = 0;
/// \brief Maximum byte alignment of variables used by the kernel in the
/// group memory segment. Expressed as a power of two. Optional.
uint8_t mGroupSegmentAlign = 0;
/// \brief Maximum byte alignment of variables used by the kernel in the
/// private memory segment. Expressed as a power of two. Optional.
uint8_t mPrivateSegmentAlign = 0;
/// \brief Wavefront size. Expressed as a power of two. Optional.
uint8_t mWavefrontSize = 0;
/// \brief Default constructor.
Metadata() = default;
/// \returns True if kernel code properties metadata is empty, false
/// otherwise.
bool empty() const {
return !notEmpty();
}
/// \returns True if kernel code properties metadata is not empty, false
/// otherwise.
bool notEmpty() const {
return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
mPrivateSegmentAlign || mWavefrontSize;
}
};
} // end namespace CodeProps
namespace Key {
/// \brief Key for Kernel::Metadata::mName.
constexpr char Name[] = "Name";
@ -285,6 +360,8 @@ constexpr char LanguageVersion[] = "LanguageVersion";
constexpr char Attrs[] = "Attrs";
/// \brief Key for Kernel::Metadata::mArgs.
constexpr char Args[] = "Args";
/// \brief Key for Kernel::Metadata::mCodeProps.
constexpr char CodeProps[] = "CodeProps";
} // end namespace Key
/// \brief In-memory representation of kernel metadata.
@ -299,6 +376,8 @@ struct Metadata final {
Attrs::Metadata mAttrs = Attrs::Metadata();
/// \brief Arguments metadata. Optional.
std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
/// \brief Code properties metadata. Optional.
CodeProps::Metadata mCodeProps = CodeProps::Metadata();
/// \brief Default constructor.
Metadata() = default;

View File

@ -153,6 +153,30 @@ struct MappingTraits<Kernel::Arg::Metadata> {
}
};
template <>
struct MappingTraits<Kernel::CodeProps::Metadata> {
static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
MD.mKernargSegmentSize, uint64_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
MD.mWorkgroupGroupSegmentSize, uint32_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
MD.mWorkitemPrivateSegmentSize, uint32_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
MD.mWavefrontNumSGPRs, uint16_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
MD.mWorkitemNumVGPRs, uint16_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
MD.mKernargSegmentAlign, uint8_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
MD.mGroupSegmentAlign, uint8_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
MD.mPrivateSegmentAlign, uint8_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
MD.mWavefrontSize, uint8_t(0));
}
};
template <>
struct MappingTraits<Kernel::Metadata> {
static void mapping(IO &YIO, Kernel::Metadata &MD) {
@ -164,6 +188,8 @@ struct MappingTraits<Kernel::Metadata> {
YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
if (!MD.mArgs.empty() || !YIO.outputting())
YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
if (!MD.mCodeProps.empty() || !YIO.outputting())
YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
}
};
@ -531,13 +557,31 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
Arg.mTypeName = TypeName;
}
void MetadataStreamer::emitKernelCodeProps(
const amd_kernel_code_t &KernelCode) {
auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps;
CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size;
CodeProps.mWorkgroupGroupSegmentSize =
KernelCode.workgroup_group_segment_byte_size;
CodeProps.mWorkitemPrivateSegmentSize =
KernelCode.workitem_private_segment_byte_size;
CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count;
CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count;
CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment;
CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment;
CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment;
CodeProps.mWavefrontSize = KernelCode.wavefront_size;
}
void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) {
emitVersion();
emitIsa(Features);
emitPrintf(Mod);
}
void MetadataStreamer::emitKernel(const Function &Func) {
void MetadataStreamer::emitKernel(const Function &Func,
const amd_kernel_code_t &KernelCode) {
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;
@ -548,6 +592,7 @@ void MetadataStreamer::emitKernel(const Function &Func) {
emitKernelLanguage(Func);
emitKernelAttrs(Func);
emitKernelArgs(Func);
emitKernelCodeProps(KernelCode);
}
ErrorOr<std::string> MetadataStreamer::toYamlString() {

View File

@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
#include "AMDGPUCodeObjectMetadata.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorOr.h"
@ -72,6 +73,9 @@ private:
StringRef TypeQual = "", StringRef BaseTypeName = "",
StringRef AccQual = "", StringRef Name = "",
StringRef TypeName = "");
void emitKernelCodeProps(const amd_kernel_code_t &KernelCode);
public:
MetadataStreamer() = default;
~MetadataStreamer() = default;
@ -80,7 +84,7 @@ public:
void end() {}
void emitKernel(const Function &Func);
void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode);
ErrorOr<std::string> toYamlString();

View File

@ -47,8 +47,9 @@ void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(
CodeObjectMetadataStreamer.begin(Features, Mod);
}
void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) {
CodeObjectMetadataStreamer.emitKernel(Func);
void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(
const Function &Func, const amd_kernel_code_t &KernelCode) {
CodeObjectMetadataStreamer.emitKernel(Func, KernelCode);
}
void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata(

View File

@ -52,7 +52,8 @@ public:
virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features,
const Module &Mod);
virtual void EmitKernelCodeObjectMetadata(const Function &Func);
virtual void EmitKernelCodeObjectMetadata(
const Function &Func, const amd_kernel_code_t &KernelCode);
virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features);

View File

@ -1274,8 +1274,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
; NOTES-NEXT: Owner Data size Description
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
; GFX700: AMD 0x000078b2 Unknown note type: (0x0000000a)
; GFX800: AMD 0x000078b2 Unknown note type: (0x0000000a)
; GFX900: AMD 0x000078b3 Unknown note type: (0x0000000a)
; GFX700: AMD 0x0000928a Unknown note type: (0x0000000a)
; GFX800: AMD 0x000092a9 Unknown note type: (0x0000000a)
; GFX900: AMD 0x0000928b Unknown note type: (0x0000000a)
; PARSER: AMDGPU Code Object Metadata Parser Test: PASS

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test
; CHECK: CodeProps:
; CHECK: KernargSegmentSize: 24
; GFX700: WavefrontNumSGPRs: 6
; GFX800: WavefrontNumSGPRs: 96
; GFX900: WavefrontNumSGPRs: 6
; GFX700: WorkitemNumVGPRs: 4
; GFX800: WorkitemNumVGPRs: 6
; GFX900: WorkitemNumVGPRs: 6
; CHECK: KernargSegmentAlign: 4
; CHECK: GroupSegmentAlign: 4
; CHECK: PrivateSegmentAlign: 4
; CHECK: WavefrontSize: 6
define amdgpu_kernel void @test(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b) {
entry:
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
%r.val = fadd half %a.val, %b.val
store half %r.val, half addrspace(1)* %r
ret void
}

View File

@ -0,0 +1,24 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
// CHECK: .amdgpu_code_object_metadata
// CHECK: Version: [ 1, 0 ]
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: CodeProps:
// CHECK: KernargSegmentSize: 24
// CHECK: WorkitemPrivateSegmentSize: 16
// CHECK: WavefrontNumSGPRs: 6
// CHECK: WorkitemNumVGPRs: 12
.amdgpu_code_object_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
CodeProps:
KernargSegmentSize: 24
WorkitemPrivateSegmentSize: 16
WavefrontNumSGPRs: 6
WorkitemNumVGPRs: 12
.end_amdgpu_code_object_metadata