AMDGPU: [AMDGPU] Assembler: add .hsa_code_object_metadata directive for functime metadata V2.0

Summary:
Added pair of directives .hsa_code_object_metadata/.end_hsa_code_object_metadata.
Between them user can put YAML string that would be directly put to the generated note. E.g.:
'''
.hsa_code_object_metadata
    {
        amd.MDVersion: [ 2, 0 ]
    }
.end_hsa_code_object_metadata
'''
Based on D25046

Reviewers: vpykhtin, nhaustov, yaxunl, tstellarAMD

Subscribers: arsenm, kzhuravl, wdng, nhaehnle, mgorny, tony-tye

Differential Revision: https://reviews.llvm.org/D27619

llvm-svn: 290097
This commit is contained in:
Sam Kolton 2016-12-19 11:43:15 +00:00
parent 519807f7be
commit 69c8aa26d8
6 changed files with 200 additions and 73 deletions

View File

@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
"AMD", "AMDGPU");
// Emit runtime metadata.
TS->emitRuntimeMetadata(M);
TS->EmitRuntimeMetadata(M);
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(

View File

@ -23,6 +23,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@ -45,6 +46,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@ -678,6 +681,7 @@ private:
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
bool ParseDirectiveRuntimeMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool ParseSectionDirectiveHSAText();
@ -1747,6 +1751,46 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
std::string Metadata;
raw_string_ostream MS(Metadata);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
MS << ' ';
Lex();
}
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
if (ID == ".end_amdgpu_runtime_metadata") {
Lex();
FoundEnd = true;
break;
}
}
MS << Parser.parseStringToEndOfStatement()
<< getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
getLexer().setSkipSpace(true);
if (getLexer().is(AsmToken::Eof) && !FoundEnd)
return TokError("expected directive .end_amdgpu_runtime_metadata not found");
MS.flush();
getTargetStreamer().EmitRuntimeMetadata(Metadata);
return false;
}
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
SmallString<40> ErrStr;
@ -1853,6 +1897,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
if (IDVal == ".amdgpu_runtime_metadata")
return ParseDirectiveRuntimeMetadata();
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();

View File

@ -93,6 +93,18 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
}
void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
OS << "\t.amdgpu_runtime_metadata\n";
OS << getRuntimeMDYAMLString(M);
OS << "\n\t.end_amdgpu_runtime_metadata\n";
}
void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
OS << "\t.amdgpu_runtime_metadata";
OS << Metadata;
OS << "\t.end_amdgpu_runtime_metadata\n";
}
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@ -105,25 +117,39 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
}
void
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) {
MCStreamer &OS = getStreamer();
MCSectionELF *Note =
OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE,
ELF::SHF_ALLOC);
AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
PT_NOTE::NoteType Type,
std::function<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
auto NameSZ = sizeof(PT_NOTE::NoteName);
OS.PushSection();
OS.SwitchSection(Note);
OS.EmitIntValue(NameSZ, 4); // namesz
OS.EmitIntValue(8, 4); // descz
OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
OS.EmitValueToAlignment(4);
OS.EmitIntValue(Major, 4); // desc
OS.EmitIntValue(Minor, 4);
OS.EmitValueToAlignment(4);
OS.PopSection();
S.PushSection();
S.SwitchSection(Context.getELFSection(
PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
S.EmitIntValue(Type, 4); // type
S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
S.PopSection();
}
void
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) {
EmitAMDGPUNote(
MCConstantExpr::create(8, getContext()),
PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
[&](MCELFStreamer &OS){
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
}
);
}
void
@ -132,36 +158,28 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) {
MCStreamer &OS = getStreamer();
MCSectionELF *Note =
OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE,
ELF::SHF_ALLOC);
uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1;
unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
OS.PushSection();
OS.SwitchSection(Note);
auto NameSZ = sizeof(PT_NOTE::NoteName);
OS.EmitIntValue(NameSZ, 4); // namesz
OS.EmitIntValue(DescSZ, 4); // descsz
OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_ISA, 4); // type
OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
OS.EmitValueToAlignment(4);
OS.EmitIntValue(VendorNameSize, 2); // desc
OS.EmitIntValue(ArchNameSize, 2);
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
OS.EmitIntValue(Stepping, 4);
OS.EmitBytes(VendorName);
OS.EmitIntValue(0, 1); // NULL terminate VendorName
OS.EmitBytes(ArchName);
OS.EmitIntValue(0, 1); // NULL terminte ArchName
OS.EmitValueToAlignment(4);
OS.PopSection();
EmitAMDGPUNote(
MCConstantExpr::create(DescSZ, getContext()),
PT_NOTE::NT_AMDGPU_HSA_ISA,
[&](MCELFStreamer &OS) {
OS.EmitIntValue(VendorNameSize, 2);
OS.EmitIntValue(ArchNameSize, 2);
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
OS.EmitIntValue(Stepping, 4);
OS.EmitBytes(VendorName);
OS.EmitIntValue(0, 1); // NULL terminate VendorName
OS.EmitBytes(ArchName);
OS.EmitIntValue(0, 1); // NULL terminte ArchName
}
);
}
void
@ -198,35 +216,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
Symbol->setBinding(ELF::STB_GLOBAL);
}
void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) {
auto &S = getStreamer();
auto &Context = S.getContext();
auto NameSZ = sizeof(PT_NOTE::NoteName); // Size of note name including trailing null.
S.PushSection();
S.SwitchSection(Context.getELFSection(
PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
auto *DescBegin = Context.createTempSymbol();
auto *DescEnd = Context.createTempSymbol();
auto *DescSZ = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
// Emit the note element for runtime metadata.
// Name and desc should be padded to 4 byte boundary but size of name and
// desc should not include padding 0's.
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type
S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
S.EmitLabel(DescBegin);
S.EmitBytes(getRuntimeMDYAMLString(M)); // desc
S.EmitLabel(DescEnd);
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
S.PopSection();
EmitAMDGPUNote(
DescSZ,
PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
[&](MCELFStreamer &OS) {
OS.EmitLabel(DescBegin);
OS.EmitBytes(Metadata);
OS.EmitLabel(DescEnd);
}
);
}
void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
}

View File

@ -14,6 +14,7 @@
#include "llvm/MC/MCStreamer.h"
namespace llvm {
#include "AMDGPUPTNote.h"
class DataLayout;
class Function;
@ -24,6 +25,9 @@ class Module;
class Type;
class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
MCContext &getContext() const { return Streamer.getContext(); }
public:
AMDGPUTargetStreamer(MCStreamer &S);
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@ -42,7 +46,9 @@ public:
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
virtual void emitRuntimeMetadata(Module &M) = 0;
virtual void EmitRuntimeMetadata(Module &M) = 0;
virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
};
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@ -64,12 +70,18 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
void emitRuntimeMetadata(Module &M) override {}
void EmitRuntimeMetadata(Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
};
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
void EmitAMDGPUNote(const MCExpr* DescSize,
AMDGPU::PT_NOTE::NoteType Type,
std::function<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S);
@ -90,7 +102,9 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
void emitRuntimeMetadata(Module &M) override;
void EmitRuntimeMetadata(Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
};
}

View File

@ -14,6 +14,8 @@
// ELF: 0020: 03000000 414D4400 04000700 07000000
// ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
// We can't check binary representation of metadata note: it is different on
// Windows and Linux because of carriage return on Windows
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_minimal
@ -35,10 +37,29 @@
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amdgpu_runtime_metadata
{
amd.MDVersion: [ 2, 0 ]
amd.Kernels: [
{ amd.KernelName: amd_kernel_code_t_test_all },
{ amd.KernelName: amd_kernel_code_t_minimal }
]
}
.end_amdgpu_runtime_metadata
// ASM: .amdgpu_runtime_metadata
// ASM: {
// ASM: amd.MDVersion: [ 2, 0 ]
// ASM: amd.Kernels: [
// ASM: { amd.KernelName: amd_kernel_code_t_test_all },
// ASM: { amd.KernelName: amd_kernel_code_t_minimal }
// ASM: ]
// ASM: }
// ASM: .end_amdgpu_runtime_metadata
.amdgpu_hsa_kernel amd_kernel_code_t_test_all
.amdgpu_hsa_kernel amd_kernel_code_t_minimal
amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t

View File

@ -0,0 +1,35 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
.amdgpu_runtime_metadata
{ amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
- { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
- { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
- { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
- { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
}
.end_amdgpu_runtime_metadata
// ASM: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
// ASM: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
// ASM: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
// ASM: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
// ASM: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
// ASM: }