AMDGPU: [AMDGPU] Assembler: add .hsa_code_object_metadata directive for functime metadata V2.0

Summary:
Added pair of directives .hsa_code_object_metadata/.end_hsa_code_object_metadata.
Between them user can put YAML string that would be directly put to the generated note. E.g.:
'''
.hsa_code_object_metadata
    {
        amd.MDVersion: [ 2, 0 ]
    }
.end_hsa_code_object_metadata
'''
Based on D25046

Reviewers: vpykhtin, nhaustov, yaxunl, tstellarAMD

Subscribers: arsenm, kzhuravl, wdng, nhaehnle, mgorny, tony-tye

Differential Revision: https://reviews.llvm.org/D27619

llvm-svn: 290097
This commit is contained in:
Sam Kolton 2016-12-19 11:43:15 +00:00
parent 519807f7be
commit 69c8aa26d8
6 changed files with 200 additions and 73 deletions

View File

@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
"AMD", "AMDGPU"); "AMD", "AMDGPU");
// Emit runtime metadata. // Emit runtime metadata.
TS->emitRuntimeMetadata(M); TS->EmitRuntimeMetadata(M);
} }
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(

View File

@ -23,6 +23,7 @@
#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h" #include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h" #include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInst.h"
@ -45,6 +46,8 @@
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h" #include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cstdint> #include <cstdint>
@ -678,6 +681,7 @@ private:
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA(); bool ParseDirectiveHSACodeObjectISA();
bool ParseDirectiveRuntimeMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT(); bool ParseDirectiveAMDKernelCodeT();
bool ParseSectionDirectiveHSAText(); bool ParseSectionDirectiveHSAText();
@ -1747,6 +1751,46 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false; return false;
} }
bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
std::string Metadata;
raw_string_ostream MS(Metadata);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
MS << ' ';
Lex();
}
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
if (ID == ".end_amdgpu_runtime_metadata") {
Lex();
FoundEnd = true;
break;
}
}
MS << Parser.parseStringToEndOfStatement()
<< getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
getLexer().setSkipSpace(true);
if (getLexer().is(AsmToken::Eof) && !FoundEnd)
return TokError("expected directive .end_amdgpu_runtime_metadata not found");
MS.flush();
getTargetStreamer().EmitRuntimeMetadata(Metadata);
return false;
}
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) { amd_kernel_code_t &Header) {
SmallString<40> ErrStr; SmallString<40> ErrStr;
@ -1853,6 +1897,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa") if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA(); return ParseDirectiveHSACodeObjectISA();
if (IDVal == ".amdgpu_runtime_metadata")
return ParseDirectiveRuntimeMetadata();
if (IDVal == ".amd_kernel_code_t") if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT(); return ParseDirectiveAMDKernelCodeT();

View File

@ -93,6 +93,18 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
} }
void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
OS << "\t.amdgpu_runtime_metadata\n";
OS << getRuntimeMDYAMLString(M);
OS << "\n\t.end_amdgpu_runtime_metadata\n";
}
void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
OS << "\t.amdgpu_runtime_metadata";
OS << Metadata;
OS << "\t.end_amdgpu_runtime_metadata\n";
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer // AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -105,25 +117,39 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
} }
void void
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
uint32_t Minor) { PT_NOTE::NoteType Type,
MCStreamer &OS = getStreamer(); std::function<void(MCELFStreamer &)> EmitDesc) {
MCSectionELF *Note = auto &S = getStreamer();
OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE, auto &Context = S.getContext();
ELF::SHF_ALLOC);
auto NameSZ = sizeof(PT_NOTE::NoteName); auto NameSZ = sizeof(PT_NOTE::NoteName);
OS.PushSection();
OS.SwitchSection(Note); S.PushSection();
OS.EmitIntValue(NameSZ, 4); // namesz S.SwitchSection(Context.getELFSection(
OS.EmitIntValue(8, 4); // descz PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type S.EmitIntValue(NameSZ, 4); // namesz
OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name S.EmitValue(DescSZ, 4); // descz
OS.EmitValueToAlignment(4); S.EmitIntValue(Type, 4); // type
OS.EmitIntValue(Major, 4); // desc S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
OS.EmitIntValue(Minor, 4); S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
OS.EmitValueToAlignment(4); EmitDesc(S); // desc
OS.PopSection(); S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
S.PopSection();
}
void
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) {
EmitAMDGPUNote(
MCConstantExpr::create(8, getContext()),
PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
[&](MCELFStreamer &OS){
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
}
);
} }
void void
@ -132,36 +158,28 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
uint32_t Stepping, uint32_t Stepping,
StringRef VendorName, StringRef VendorName,
StringRef ArchName) { StringRef ArchName) {
MCStreamer &OS = getStreamer();
MCSectionELF *Note =
OS.getContext().getELFSection(PT_NOTE::SectionName, ELF::SHT_NOTE,
ELF::SHF_ALLOC);
uint16_t VendorNameSize = VendorName.size() + 1; uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1; uint16_t ArchNameSize = ArchName.size() + 1;
unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize; VendorNameSize + ArchNameSize;
OS.PushSection(); EmitAMDGPUNote(
OS.SwitchSection(Note); MCConstantExpr::create(DescSZ, getContext()),
auto NameSZ = sizeof(PT_NOTE::NoteName); PT_NOTE::NT_AMDGPU_HSA_ISA,
OS.EmitIntValue(NameSZ, 4); // namesz [&](MCELFStreamer &OS) {
OS.EmitIntValue(DescSZ, 4); // descsz OS.EmitIntValue(VendorNameSize, 2);
OS.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_ISA, 4); // type OS.EmitIntValue(ArchNameSize, 2);
OS.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name OS.EmitIntValue(Major, 4);
OS.EmitValueToAlignment(4); OS.EmitIntValue(Minor, 4);
OS.EmitIntValue(VendorNameSize, 2); // desc OS.EmitIntValue(Stepping, 4);
OS.EmitIntValue(ArchNameSize, 2); OS.EmitBytes(VendorName);
OS.EmitIntValue(Major, 4); OS.EmitIntValue(0, 1); // NULL terminate VendorName
OS.EmitIntValue(Minor, 4); OS.EmitBytes(ArchName);
OS.EmitIntValue(Stepping, 4); OS.EmitIntValue(0, 1); // NULL terminte ArchName
OS.EmitBytes(VendorName); }
OS.EmitIntValue(0, 1); // NULL terminate VendorName );
OS.EmitBytes(ArchName);
OS.EmitIntValue(0, 1); // NULL terminte ArchName
OS.EmitValueToAlignment(4);
OS.PopSection();
} }
void void
@ -198,35 +216,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
Symbol->setBinding(ELF::STB_GLOBAL); Symbol->setBinding(ELF::STB_GLOBAL);
} }
void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) { void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
auto &S = getStreamer();
auto &Context = S.getContext();
auto NameSZ = sizeof(PT_NOTE::NoteName); // Size of note name including trailing null.
S.PushSection();
S.SwitchSection(Context.getELFSection(
PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
// Create two labels to mark the beginning and end of the desc field // Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field. // and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
auto *DescBegin = Context.createTempSymbol(); auto *DescBegin = Context.createTempSymbol();
auto *DescEnd = Context.createTempSymbol(); auto *DescEnd = Context.createTempSymbol();
auto *DescSZ = MCBinaryExpr::createSub( auto *DescSZ = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(DescEnd, Context), MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context); MCSymbolRefExpr::create(DescBegin, Context), Context);
// Emit the note element for runtime metadata. EmitAMDGPUNote(
// Name and desc should be padded to 4 byte boundary but size of name and DescSZ,
// desc should not include padding 0's. PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
S.EmitIntValue(NameSZ, 4); // namesz [&](MCELFStreamer &OS) {
S.EmitValue(DescSZ, 4); // descz OS.EmitLabel(DescBegin);
S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type OS.EmitBytes(Metadata);
S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name OS.EmitLabel(DescEnd);
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 }
S.EmitLabel(DescBegin); );
S.EmitBytes(getRuntimeMDYAMLString(M)); // desc }
S.EmitLabel(DescEnd);
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
S.PopSection(); EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
} }

View File

@ -14,6 +14,7 @@
#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCStreamer.h"
namespace llvm { namespace llvm {
#include "AMDGPUPTNote.h"
class DataLayout; class DataLayout;
class Function; class Function;
@ -24,6 +25,9 @@ class Module;
class Type; class Type;
class AMDGPUTargetStreamer : public MCTargetStreamer { class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
MCContext &getContext() const { return Streamer.getContext(); }
public: public:
AMDGPUTargetStreamer(MCStreamer &S); AMDGPUTargetStreamer(MCStreamer &S);
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@ -42,7 +46,9 @@ public:
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
virtual void emitRuntimeMetadata(Module &M) = 0; virtual void EmitRuntimeMetadata(Module &M) = 0;
virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
}; };
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@ -64,12 +70,18 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
void emitRuntimeMetadata(Module &M) override {} void EmitRuntimeMetadata(Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
}; };
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
MCStreamer &Streamer; MCStreamer &Streamer;
void EmitAMDGPUNote(const MCExpr* DescSize,
AMDGPU::PT_NOTE::NoteType Type,
std::function<void(MCELFStreamer &)> EmitDesc);
public: public:
AMDGPUTargetELFStreamer(MCStreamer &S); AMDGPUTargetELFStreamer(MCStreamer &S);
@ -90,7 +102,9 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
void emitRuntimeMetadata(Module &M) override; void EmitRuntimeMetadata(Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
}; };
} }

View File

@ -14,6 +14,8 @@
// ELF: 0020: 03000000 414D4400 04000700 07000000 // ELF: 0020: 03000000 414D4400 04000700 07000000
// ELF: 0030: 00000000 00000000 414D4400 414D4447 // ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000 // ELF: 0040: 50550000
// We can't check binary representation of metadata note: it is different on
// Windows and Linux because of carriage return on Windows
// ELF: Symbol { // ELF: Symbol {
// ELF: Name: amd_kernel_code_t_minimal // ELF: Name: amd_kernel_code_t_minimal
@ -35,10 +37,29 @@
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amdgpu_runtime_metadata
{
amd.MDVersion: [ 2, 0 ]
amd.Kernels: [
{ amd.KernelName: amd_kernel_code_t_test_all },
{ amd.KernelName: amd_kernel_code_t_minimal }
]
}
.end_amdgpu_runtime_metadata
// ASM: .amdgpu_runtime_metadata
// ASM: {
// ASM: amd.MDVersion: [ 2, 0 ]
// ASM: amd.Kernels: [
// ASM: { amd.KernelName: amd_kernel_code_t_test_all },
// ASM: { amd.KernelName: amd_kernel_code_t_minimal }
// ASM: ]
// ASM: }
// ASM: .end_amdgpu_runtime_metadata
.amdgpu_hsa_kernel amd_kernel_code_t_test_all .amdgpu_hsa_kernel amd_kernel_code_t_test_all
.amdgpu_hsa_kernel amd_kernel_code_t_minimal .amdgpu_hsa_kernel amd_kernel_code_t_minimal
amd_kernel_code_t_test_all: amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values. ; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t .amd_kernel_code_t

View File

@ -0,0 +1,35 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
.amdgpu_runtime_metadata
{ amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
- { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
- { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
- { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
- { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
- { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
}
.end_amdgpu_runtime_metadata
// ASM: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
// ASM: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
// ASM: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
// ASM: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
// ASM: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
// ASM: }