LTO: introduce object file-based on-disk module format.

This format is simply a regular object file with the bitcode stored in a
section named ".llvmbc", plus any number of other (non-allocated) sections.

One immediate use case for this is to accommodate compilation processes
which expect the object file to contain metadata in non-allocated sections,
such as the ".go_export" section used by some Go compilers [1], although I
imagine that in the future we could consider compiling parts of the module
(such as large non-inlinable functions) directly into the object file to
improve LTO efficiency.

[1] http://golang.org/doc/install/gccgo#Imports

Differential Revision: http://reviews.llvm.org/D4371

llvm-svn: 218078
This commit is contained in:
Peter Collingbourne 2014-09-18 21:28:49 +00:00
parent 6bf091c656
commit 10039c02ea
15 changed files with 178 additions and 25 deletions

View File

@ -28,8 +28,9 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it
provides a mechanism for the file to self-describe "abbreviations", which are
effectively size optimizations for the content.
LLVM IR files may be optionally embedded into a `wrapper`_ structure that makes
it easy to embed extra data along with LLVM IR files.
LLVM IR files may be optionally embedded into a `wrapper`_ structure, or in a
`native object file`_. Both of these mechanisms make it easy to embed extra
data along with LLVM IR files.
This document first describes the LLVM bitstream format, describes the wrapper
format, then describes the record structure used by LLVM IR files.
@ -460,6 +461,19 @@ to the start of the bitcode stream in the file, and the Size field is the size
in bytes of the stream. CPUType is a target-specific value that can be used to
encode the CPU of the target.
.. _native object file:
Native Object File Wrapper Format
=================================
Bitcode files for LLVM IR may also be wrapped in a native object file
(i.e. ELF, COFF, Mach-O). The bitcode must be stored in a section of the
object file named ``.llvmbc``. This wrapper format is useful for accommodating
LTO in compilation pipelines where intermediate objects must be native object
files which contain metadata in other sections.
Not all tools support this format.
.. _encoding of LLVM IR:
LLVM IR Encoding

View File

@ -43,6 +43,9 @@ Non-comprehensive list of changes in this release
* Support for AuroraUX has been removed.
* Added support for a `native object file-based bitcode wrapper format
<BitCodeFormat.html#native-object-file>`_.
* ... next change ...
.. NOTE

View File

@ -26,7 +26,8 @@ enum class object_error {
arch_not_found,
invalid_file_type,
parse_failed,
unexpected_eof
unexpected_eof,
bitcode_section_not_found,
};
inline std::error_code make_error_code(object_error e) {

View File

@ -22,6 +22,8 @@ class Module;
class GlobalValue;
namespace object {
class ObjectFile;
class IRObjectFile : public SymbolicFile {
std::unique_ptr<Module> M;
std::unique_ptr<Mangler> Mang;
@ -49,6 +51,16 @@ public:
return v->isIR();
}
/// \brief Finds and returns bitcode embedded in the given object file, or an
/// error code if not found.
static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
/// \brief Finds and returns bitcode in the given memory buffer (which may
/// be either a bitcode file or a native object file with embedded bitcode),
/// or an error code if not found.
static ErrorOr<MemoryBufferRef>
findBitcodeInMemBuffer(MemoryBufferRef Object);
static ErrorOr<std::unique_ptr<IRObjectFile>>
createIRObjectFile(MemoryBufferRef Object, LLVMContext &Context);
};

View File

@ -29,6 +29,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@ -44,6 +46,7 @@
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include <system_error>
using namespace llvm;
using namespace llvm::object;
LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
llvm::TargetMachine *TM)
@ -51,23 +54,31 @@ LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
/// bitcode.
bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
sys::fs::file_magic::bitcode;
bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
return bool(BCData);
}
bool LTOModule::isBitcodeFile(const char *path) {
sys::fs::file_magic type;
if (sys::fs::identify_magic(path, type))
bool LTOModule::isBitcodeFile(const char *Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(Path);
if (!BufferOrErr)
return false;
return type == sys::fs::file_magic::bitcode;
ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
BufferOrErr.get()->getMemBufferRef());
return bool(BCData);
}
bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
StringRef triplePrefix) {
std::string Triple =
getBitcodeTargetTriple(buffer->getMemBufferRef(), getGlobalContext());
return StringRef(Triple).startswith(triplePrefix);
bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
StringRef TriplePrefix) {
ErrorOr<MemoryBufferRef> BCOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
if (!BCOrErr)
return false;
std::string Triple = getBitcodeTargetTriple(*BCOrErr, getGlobalContext());
return StringRef(Triple).startswith(TriplePrefix);
}
LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
@ -113,7 +124,13 @@ LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
TargetOptions options,
std::string &errMsg) {
ErrorOr<Module *> MOrErr = parseBitcodeFile(Buffer, getGlobalContext());
ErrorOr<MemoryBufferRef> MBOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer);
if (std::error_code EC = MBOrErr.getError()) {
errMsg = EC.message();
return nullptr;
}
ErrorOr<Module *> MOrErr = parseBitcodeFile(*MBOrErr, getGlobalContext());
if (std::error_code EC = MOrErr.getError()) {
errMsg = EC.message();
return nullptr;

View File

@ -41,6 +41,8 @@ std::string _object_error_category::message(int EV) const {
return "Invalid data was encountered while parsing the file";
case object_error::unexpected_eof:
return "The end of the file was unexpectedly encountered";
case object_error::bitcode_section_not_found:
return "Bitcode section not found in object file";
}
llvm_unreachable("An enumerator of object_error does not have a message "
"defined.");

View File

@ -25,6 +25,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
@ -264,11 +265,50 @@ basic_symbol_iterator IRObjectFile::symbol_end_impl() const {
return basic_symbol_iterator(BasicSymbolRef(Ret, this));
}
ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
for (const SectionRef &Sec : Obj.sections()) {
StringRef SecName;
if (std::error_code EC = Sec.getName(SecName))
return EC;
if (SecName == ".llvmbc") {
StringRef SecContents;
if (std::error_code EC = Sec.getContents(SecContents))
return EC;
return MemoryBufferRef(SecContents, Obj.getFileName());
}
}
return object_error::bitcode_section_not_found;
}
ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
switch (Type) {
case sys::fs::file_magic::bitcode:
return Object;
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::coff_object: {
ErrorOr<std::unique_ptr<ObjectFile>> ObjFile =
ObjectFile::createObjectFile(Object, Type);
if (!ObjFile)
return ObjFile.getError();
return findBitcodeInObject(*ObjFile->get());
}
default:
return object_error::invalid_file_type;
}
}
ErrorOr<std::unique_ptr<IRObjectFile>>
llvm::object::IRObjectFile::createIRObjectFile(MemoryBufferRef Object,
LLVMContext &Context) {
ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
if (!BCOrErr)
return BCOrErr.getError();
std::unique_ptr<MemoryBuffer> Buff(MemoryBuffer::getMemBuffer(Object, false));
std::unique_ptr<MemoryBuffer> Buff(
MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
ErrorOr<Module *> MOrErr = getLazyBitcodeModule(std::move(Buff), Context);
if (std::error_code EC = MOrErr.getError())

View File

@ -40,11 +40,9 @@ ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
case sys::fs::file_magic::macho_universal_binary:
case sys::fs::file_magic::windows_resource:
return object_error::invalid_file_type;
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
case sys::fs::file_magic::elf_core:
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::macho_executable:
case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
case sys::fs::file_magic::macho_core:
@ -54,10 +52,26 @@ ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
case sys::fs::file_magic::macho_bundle:
case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
case sys::fs::file_magic::macho_dsym_companion:
case sys::fs::file_magic::coff_object:
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
return ObjectFile::createObjectFile(Object, Type);
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::coff_object: {
ErrorOr<std::unique_ptr<ObjectFile>> Obj =
ObjectFile::createObjectFile(Object, Type);
if (!Obj || !Context)
return std::move(Obj);
ErrorOr<MemoryBufferRef> BCData =
IRObjectFile::findBitcodeInObject(*Obj->get());
if (!BCData)
return std::move(Obj);
return IRObjectFile::createIRObjectFile(
MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()),
*Context);
}
}
llvm_unreachable("Unexpected Binary File Type");
}

View File

@ -0,0 +1,2 @@
.section .llvmbc,.llvmbc
.incbin "bcsection.bc"

View File

@ -0,0 +1,2 @@
.section .llvmbc
.incbin "bcsection.bc"

View File

@ -0,0 +1,21 @@
; RUN: llvm-as -o %T/bcsection.bc %s
; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-pc-win32 -o %T/bcsection.coff.bco %p/Inputs/bcsection.s
; RUN: llvm-nm %T/bcsection.coff.bco | FileCheck %s
; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.coff.o %T/bcsection.coff.bco
; RUN: llvm-nm %T/bcsection.coff.o | FileCheck %s
; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-linux-gnu -o %T/bcsection.elf.bco %p/Inputs/bcsection.s
; RUN: llvm-nm %T/bcsection.elf.bco | FileCheck %s
; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.elf.o %T/bcsection.elf.bco
; RUN: llvm-nm %T/bcsection.elf.o | FileCheck %s
; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-apple-darwin11 -o %T/bcsection.macho.bco %p/Inputs/bcsection.macho.s
; RUN: llvm-nm %T/bcsection.macho.bco | FileCheck %s
; RUN: llvm-lto -exported-symbol=main -o %T/bcsection.macho.o %T/bcsection.macho.bco
; RUN: llvm-nm %T/bcsection.macho.o | FileCheck %s
; CHECK: main
define i32 @main() {
ret i32 0
}

View File

@ -0,0 +1,2 @@
.section .llvmbc
.incbin "bcsection.bc"

View File

@ -0,0 +1,11 @@
; RUN: llvm-as -o %T/bcsection.bc %s
; RUN: llvm-mc -I=%T -filetype=obj -o %T/bcsection.bco %p/Inputs/bcsection.s
; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
; RUN: ld -r -o %T/bcsection.o -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
; CHECK: main
define i32 @main() {
ret i32 0
}

View File

@ -300,7 +300,9 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
ErrorOr<std::unique_ptr<object::IRObjectFile>> ObjOrErr =
object::IRObjectFile::createIRObjectFile(BufferRef, Context);
std::error_code EC = ObjOrErr.getError();
if (EC == BitcodeError::InvalidBitcodeSignature)
if (EC == BitcodeError::InvalidBitcodeSignature ||
EC == object::object_error::invalid_file_type ||
EC == object::object_error::bitcode_section_not_found)
return LDPS_OK;
*claimed = 1;
@ -548,8 +550,15 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
if (get_view(F.handle, &View) != LDPS_OK)
message(LDPL_FATAL, "Failed to get a view of file");
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
StringRef((char *)View, File.filesize), "", false);
llvm::ErrorOr<MemoryBufferRef> MBOrErr =
object::IRObjectFile::findBitcodeInMemBuffer(
MemoryBufferRef(StringRef((const char *)View, File.filesize), ""));
if (std::error_code EC = MBOrErr.getError())
message(LDPL_FATAL, "Could not read bitcode from file : %s",
EC.message().c_str());
std::unique_ptr<MemoryBuffer> Buffer =
MemoryBuffer::getMemBuffer(MBOrErr->getBuffer(), "", false);
if (release_input_file(F.handle) != LDPS_OK)
message(LDPL_FATAL, "Failed to release file information");

View File

@ -149,6 +149,9 @@ cl::list<std::string> SegSect("s", cl::Positional, cl::ZeroOrMore,
cl::opt<bool> FormatMachOasHex("x", cl::desc("Print symbol entry in hex, "
"Mach-O only"));
cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
cl::desc("Disable LLVM bitcode reader"));
bool PrintAddress = true;
bool MultipleFiles = false;
@ -1009,8 +1012,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
return;
LLVMContext &Context = getGlobalContext();
ErrorOr<std::unique_ptr<Binary>> BinaryOrErr =
createBinary(BufferOrErr.get()->getMemBufferRef(), &Context);
ErrorOr<std::unique_ptr<Binary>> BinaryOrErr = createBinary(
BufferOrErr.get()->getMemBufferRef(), NoLLVMBitcode ? nullptr : &Context);
if (error(BinaryOrErr.getError(), Filename))
return;
Binary &Bin = *BinaryOrErr.get();