[OpenMP] Use new offloading binary when embedding offloading images

The previous patch introduced the offloading binary format so we can
store some metada along with the binary image. This patch introduces
using this inside the linker wrapper and Clang instead of the previous
method that embedded the metadata in the section name.

Differential Revision: https://reviews.llvm.org/D122683
This commit is contained in:
Joseph Huber 2022-03-25 11:03:02 -04:00
parent cac81161ed
commit 984a0dc386
10 changed files with 122 additions and 104 deletions

View File

@ -276,9 +276,12 @@ public:
/// CUDA runtime back-end for incorporating them into host-side object file. /// CUDA runtime back-end for incorporating them into host-side object file.
std::string CudaGpuBinaryFileName; std::string CudaGpuBinaryFileName;
/// List of filenames and section name pairs passed in using the /// List of filenames and metadata passed in using the -fembed-offload-object
/// -fembed-offload-object option to embed device-side offloading objects into /// option to embed device-side offloading objects into the host as a named
/// the host as a named section. Input passed in as '<filename>,<section>' /// section. Input passed in as 'filename,kind,triple,arch'.
///
/// NOTE: This will need to be expanded whenever we want to pass in more
/// metadata, at some point this should be its own clang tool.
std::vector<std::string> OffloadObjects; std::vector<std::string> OffloadObjects;
/// The name of the file to which the backend should save YAML optimization /// The name of the file to which the backend should save YAML optimization

View File

@ -39,6 +39,7 @@
#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h" #include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Passes/StandardInstrumentations.h"
@ -1206,22 +1207,33 @@ void clang::EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts,
return; return;
for (StringRef OffloadObject : CGOpts.OffloadObjects) { for (StringRef OffloadObject : CGOpts.OffloadObjects) {
if (OffloadObject.count(',') != 1) SmallVector<StringRef, 4> ObjectFields;
Diags.Report(Diags.getCustomDiagID( OffloadObject.split(ObjectFields, ',');
DiagnosticsEngine::Error, "Invalid string pair for embedding '%0'"))
<< OffloadObject; if (ObjectFields.size() != 4) {
auto FilenameAndSection = OffloadObject.split(','); auto DiagID = Diags.getCustomDiagID(
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr = DiagnosticsEngine::Error, "Expected at least four arguments '%0'");
llvm::MemoryBuffer::getFileOrSTDIN(FilenameAndSection.first); Diags.Report(DiagID) << OffloadObject;
if (std::error_code EC = ObjectOrErr.getError()) {
auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"could not open '%0' for embedding");
Diags.Report(DiagID) << FilenameAndSection.first;
return; return;
} }
SmallString<128> SectionName( llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr =
{".llvm.offloading.", FilenameAndSection.second}); llvm::MemoryBuffer::getFileOrSTDIN(ObjectFields[0]);
llvm::embedBufferInModule(*M, **ObjectOrErr, SectionName); if (std::error_code EC = ObjectOrErr.getError()) {
auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"could not open '%0' for embedding");
Diags.Report(DiagID) << ObjectFields[0];
return;
}
OffloadBinary::OffloadingImage Image{};
Image.TheImageKind = getImageKind(ObjectFields[0].rsplit(".").second);
Image.TheOffloadKind = getOffloadKind(ObjectFields[1]);
Image.StringData = {{"triple", ObjectFields[2]}, {"arch", ObjectFields[3]}};
Image.Image = **ObjectOrErr;
std::unique_ptr<MemoryBuffer> OffloadBuffer = OffloadBinary::write(Image);
llvm::embedBufferInModule(*M, *OffloadBuffer, ".llvm.offloading",
Align(OffloadBinary::getAlignment()));
} }
} }

View File

@ -6940,13 +6940,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
StringRef File = StringRef File =
C.getArgs().MakeArgString(TC->getInputFilename(*InputFile)); C.getArgs().MakeArgString(TC->getInputFilename(*InputFile));
StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
CmdArgs.push_back(Args.MakeArgString( CmdArgs.push_back(
"-fembed-offload-object=" + File + "," + Args.MakeArgString("-fembed-offload-object=" + File + "," +
Action::GetOffloadKindName(Action::OFK_OpenMP) + "." + Action::GetOffloadKindName(Action::OFK_OpenMP) +
TC->getTripleString() + "." + "," + TC->getTripleString() + "," +
TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName)); TCArgs.getLastArgValue(options::OPT_march_EQ)));
} }
} }

View File

@ -344,4 +344,4 @@
// RUN: -fopenmp-new-driver -no-canonical-prefixes -nogpulib %s -o openmp-offload-gpu 2>&1 \ // RUN: -fopenmp-new-driver -no-canonical-prefixes -nogpulib %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s // RUN: | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s
// NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp.nvptx64-nvidia-cuda.sm_70 // NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70

View File

@ -1,6 +1,7 @@
// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h,section // RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm -fembed-offload-object=%S/Inputs/empty.h,,, -o - %s | FileCheck %s
// CHECK: @[[OBJECT:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8
// CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @[[OBJECT]]], section "llvm.metadata"
// CHECK: @[[OBJECT:.+]] = private constant [0 x i8] zeroinitializer, section ".llvm.offloading.section"
// CHECK: @llvm.compiler.used = appending global [3 x i8*] [i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT1]]], section "llvm.metadata"
void foo(void) {} void foo(void) {}

View File

@ -1,14 +1,14 @@
; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ ; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
; RUN: -fembed-offload-object=%S/Inputs/empty.h,section1 \ ; RUN: -fembed-offload-object=%S/Inputs/empty.h,,, \
; RUN: -fembed-offload-object=%S/Inputs/empty.h,section2 -x ir %s -o - \ ; RUN: -fembed-offload-object=%S/Inputs/empty.h,,, -x ir %s -o - \
; RUN: | FileCheck %s -check-prefix=CHECK ; RUN: | FileCheck %s -check-prefix=CHECK
; CHECK: @[[OBJECT1:.+]] = hidden constant [0 x i8] zeroinitializer, section ".llvm.offloading.section1" ; CHECK: @[[OBJECT_1:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}\00", section ".llvm.offloading", align 8
; CHECK: @[[OBJECT2:.+]] = hidden constant [0 x i8] zeroinitializer, section ".llvm.offloading.section2" ; CHECK: @[[OBJECT_2:.+]] = private constant [120 x i8] c"\10\FF\10\AD{{.*}}\00", section ".llvm.offloading", align 8
; CHECK: @llvm.compiler.used = appending global [3 x i8*] [i8* @x, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT1]], i32 0, i32 0), i8* getelementptr inbounds ([0 x i8], [0 x i8]* @[[OBJECT2]], i32 0, i32 0)], section "llvm.metadata" ; CHECK: @llvm.compiler.used = appending global [3 x ptr] [ptr @x, ptr @[[OBJECT_1]], ptr @[[OBJECT_2]]], section "llvm.metadata"
@x = private constant i8 1 @x = private constant i8 1
@llvm.compiler.used = appending global [1 x i8*] [i8* @x], section "llvm.metadata" @llvm.compiler.used = appending global [1 x ptr] [ptr @x], section "llvm.metadata"
define i32 @foo() { define i32 @foo() {
ret i32 0 ret i32 0

View File

@ -29,6 +29,7 @@
#include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/Binary.h" #include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h" #include "llvm/Object/ObjectFile.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h" #include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileOutputBuffer.h"
@ -146,8 +147,8 @@ static SmallVector<std::string, 16> TempFiles;
static codegen::RegisterCodeGenFlags CodeGenFlags; static codegen::RegisterCodeGenFlags CodeGenFlags;
/// Magic section string that marks the existence of offloading data. The /// Magic section string that marks the existence of offloading data. The
/// section string will be formatted as `.llvm.offloading.<triple>.<arch>`. /// section will contain one or more offloading binaries stored contiguously.
#define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading." #define OFFLOAD_SECTION_MAGIC_STR ".llvm.offloading"
/// Information for a device offloading file extracted from the host. /// Information for a device offloading file extracted from the host.
struct DeviceFile { struct DeviceFile {
@ -201,16 +202,6 @@ void printCommands(ArrayRef<StringRef> CmdArgs) {
llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n"); llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n");
} }
static StringRef getDeviceFileExtension(StringRef DeviceTriple,
bool IsBitcode = false) {
Triple TheTriple(DeviceTriple);
if (TheTriple.isAMDGPU() || IsBitcode)
return "bc";
if (TheTriple.isNVPTX())
return "cubin";
return "o";
}
std::string getMainExecutable(const char *Name) { std::string getMainExecutable(const char *Name) {
void *Ptr = (void *)(intptr_t)&getMainExecutable; void *Ptr = (void *)(intptr_t)&getMainExecutable;
auto COWPath = sys::fs::getMainExecutable(Name, Ptr); auto COWPath = sys::fs::getMainExecutable(Name, Ptr);
@ -289,6 +280,55 @@ void removeFromCompilerUsed(Module &M, GlobalValue &Value) {
GV->setSection("llvm.metadata"); GV->setSection("llvm.metadata");
} }
/// Attempts to extract all the embedded device images contained inside the
/// buffer \p Contents. The buffer is expected to contain a valid offloading
/// binary format.
Error extractOffloadFiles(StringRef Contents, StringRef Prefix,
SmallVectorImpl<DeviceFile> &DeviceFiles) {
uint64_t Offset = 0;
// There could be multiple offloading binaries stored at this section.
while (Offset < Contents.size()) {
std::unique_ptr<MemoryBuffer> Buffer =
MemoryBuffer::getMemBuffer(Contents.drop_front(Offset), "",
/*RequiresNullTerminator*/ false);
auto BinaryOrErr = OffloadBinary::create(*Buffer);
if (!BinaryOrErr)
return BinaryOrErr.takeError();
OffloadBinary &Binary = **BinaryOrErr;
if (Binary.getVersion() != 1)
return createStringError(inconvertibleErrorCode(),
"Incompatible device image version");
StringRef Kind = getOffloadKindName(Binary.getOffloadKind());
StringRef Suffix = getImageKindName(Binary.getImageKind());
SmallString<128> TempFile;
if (Error Err =
createOutputFile(Prefix + "-" + Kind + "-" + Binary.getTriple() +
"-" + Binary.getArch(),
Suffix, TempFile))
return std::move(Err);
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(TempFile, Binary.getImage().size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
std::copy(Binary.getImage().bytes_begin(), Binary.getImage().bytes_end(),
Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
DeviceFiles.emplace_back(Kind, Binary.getTriple(), Binary.getArch(),
TempFile);
Offset += Binary.getSize();
}
return Error::success();
}
Expected<Optional<std::string>> Expected<Optional<std::string>>
extractFromBinary(const ObjectFile &Obj, extractFromBinary(const ObjectFile &Obj,
SmallVectorImpl<DeviceFile> &DeviceFiles) { SmallVectorImpl<DeviceFile> &DeviceFiles) {
@ -296,40 +336,21 @@ extractFromBinary(const ObjectFile &Obj,
StringRef Prefix = sys::path::stem(Obj.getFileName()); StringRef Prefix = sys::path::stem(Obj.getFileName());
SmallVector<StringRef, 4> ToBeStripped; SmallVector<StringRef, 4> ToBeStripped;
// Extract data from sections of the form `.llvm.offloading.<triple>.<arch>`. // Extract offloading binaries from sections with the name `.llvm.offloading`.
for (const SectionRef &Sec : Obj.sections()) { for (const SectionRef &Sec : Obj.sections()) {
Expected<StringRef> Name = Sec.getName(); Expected<StringRef> Name = Sec.getName();
if (!Name || !Name->startswith(OFFLOAD_SECTION_MAGIC_STR)) if (!Name || !Name->equals(OFFLOAD_SECTION_MAGIC_STR))
continue; continue;
SmallVector<StringRef, 4> SectionFields; Expected<StringRef> Contents = Sec.getContents();
Name->split(SectionFields, '.'); if (!Contents)
StringRef Kind = SectionFields[3]; return Contents.takeError();
StringRef DeviceTriple = SectionFields[4];
StringRef Arch = SectionFields[5];
if (Expected<StringRef> Contents = Sec.getContents()) { if (Error Err = extractOffloadFiles(*Contents, Prefix, DeviceFiles))
SmallString<128> TempFile;
StringRef DeviceExtension = getDeviceFileExtension(
DeviceTriple, identify_magic(*Contents) == file_magic::bitcode);
if (Error Err = createOutputFile(Prefix + "-" + Kind + "-" +
DeviceTriple + "-" + Arch,
DeviceExtension, TempFile))
return std::move(Err); return std::move(Err);
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(TempFile, Sec.getSize());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
std::copy(Contents->begin(), Contents->end(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
DeviceFiles.emplace_back(Kind, DeviceTriple, Arch, TempFile);
ToBeStripped.push_back(*Name); ToBeStripped.push_back(*Name);
} }
}
if (ToBeStripped.empty() || !StripSections) if (ToBeStripped.empty() || !StripSections)
return None; return None;
@ -405,42 +426,21 @@ extractFromBitcode(std::unique_ptr<MemoryBuffer> Buffer,
SmallVector<GlobalVariable *, 4> ToBeDeleted; SmallVector<GlobalVariable *, 4> ToBeDeleted;
// Extract data from the global string containing a section of the form // Extract offloading data from globals with the `.llvm.offloading` section
// `.llvm.offloading.<triple>.<arch>`. // name.
for (GlobalVariable &GV : M->globals()) { for (GlobalVariable &GV : M->globals()) {
if (!GV.hasSection() || if (!GV.hasSection() || !GV.getSection().equals(OFFLOAD_SECTION_MAGIC_STR))
!GV.getSection().startswith(OFFLOAD_SECTION_MAGIC_STR))
continue; continue;
auto *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer()); auto *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
if (!CDS) if (!CDS)
continue; continue;
SmallVector<StringRef, 4> SectionFields;
GV.getSection().split(SectionFields, '.');
StringRef Kind = SectionFields[3];
StringRef DeviceTriple = SectionFields[4];
StringRef Arch = SectionFields[5];
StringRef Contents = CDS->getAsString(); StringRef Contents = CDS->getAsString();
SmallString<128> TempFile;
StringRef DeviceExtension = getDeviceFileExtension( if (Error Err = extractOffloadFiles(Contents, Prefix, DeviceFiles))
DeviceTriple, identify_magic(Contents) == file_magic::bitcode);
if (Error Err = createOutputFile(Prefix + "-" + Kind + "-" + DeviceTriple +
"-" + Arch,
DeviceExtension, TempFile))
return std::move(Err); return std::move(Err);
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(TempFile, Contents.size());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);
DeviceFiles.emplace_back(Kind, DeviceTriple, Arch, TempFile);
ToBeDeleted.push_back(&GV); ToBeDeleted.push_back(&GV);
} }

View File

@ -73,6 +73,7 @@ public:
ImageKind getImageKind() const { return TheEntry->TheImageKind; } ImageKind getImageKind() const { return TheEntry->TheImageKind; }
OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; }
uint32_t getVersion() const { return TheHeader->Version; }
uint32_t getFlags() const { return TheEntry->Flags; } uint32_t getFlags() const { return TheEntry->Flags; }
uint64_t getSize() const { return TheHeader->Size; } uint64_t getSize() const { return TheHeader->Size; }

View File

@ -14,6 +14,7 @@
#define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/MemoryBufferRef.h"
#include <utility> // for std::pair #include <utility> // for std::pair
@ -109,7 +110,8 @@ std::string getUniqueModuleId(Module *M);
/// Embed the memory buffer \p Buf into the module \p M as a global using the /// Embed the memory buffer \p Buf into the module \p M as a global using the
/// specified section name. /// specified section name.
void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName); void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName,
Align Alignment = Align(1));
class CallInst; class CallInst;
namespace VFABI { namespace VFABI {

View File

@ -265,15 +265,15 @@ void VFABI::setVectorVariantNames(CallInst *CI,
} }
void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
StringRef SectionName) { StringRef SectionName, Align Alignment) {
// Embed the buffer into the module. // Embed the memory buffer into the module.
Constant *ModuleConstant = ConstantDataArray::get( Constant *ModuleConstant = ConstantDataArray::get(
M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
GlobalVariable *GV = new GlobalVariable( GlobalVariable *GV = new GlobalVariable(
M, ModuleConstant->getType(), true, GlobalValue::ExternalLinkage, M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
ModuleConstant, SectionName.drop_front()); ModuleConstant, "llvm.embedded.object");
GV->setSection(SectionName); GV->setSection(SectionName);
GV->setVisibility(GlobalValue::HiddenVisibility); GV->setAlignment(Alignment);
appendToCompilerUsed(M, GV); appendToCompilerUsed(M, GV);
} }