llvm-project/llvm/lib/Object/IRSymtab.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

414 lines
14 KiB
C++
Raw Normal View History

//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Object/IRSymtab.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <string>
#include <utility>
#include <vector>
using namespace llvm;
using namespace irsymtab;
static const char *LibcallRoutineNames[] = {
#define HANDLE_LIBCALL(code, name) name,
#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
};
namespace {
const char *getExpectedProducerName() {
static char DefaultName[] = LLVM_VERSION_STRING
#ifdef LLVM_REVISION
" " LLVM_REVISION
#endif
;
// Allows for testing of the irsymtab writer and upgrade mechanism. This
// environment variable should not be set by users.
if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))
return OverrideName;
return DefaultName;
}
const char *kExpectedProducerName = getExpectedProducerName();
/// Stores the temporary state that is required to build an IR symbol table.
struct Builder {
SmallVector<char, 0> &Symtab;
StringTableBuilder &StrtabBuilder;
StringSaver Saver;
// This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
// The StringTableBuilder does not create a copy of any strings added to it,
// so this provides somewhere to store any strings that we create.
Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
BumpPtrAllocator &Alloc)
: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
DenseMap<const Comdat *, int> ComdatMap;
Mangler Mang;
Triple TT;
std::vector<storage::Comdat> Comdats;
std::vector<storage::Module> Mods;
std::vector<storage::Symbol> Syms;
std::vector<storage::Uncommon> Uncommons;
std::string COFFLinkerOpts;
raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
[ELF] Implement Dependent Libraries Feature This patch implements a limited form of autolinking primarily designed to allow either the --dependent-library compiler option, or "comment lib" pragmas ( https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=vs-2017) in C/C++ e.g. #pragma comment(lib, "foo"), to cause an ELF linker to automatically add the specified library to the link when processing the input file generated by the compiler. Currently this extension is unique to LLVM and LLD. However, care has been taken to design this feature so that it could be supported by other ELF linkers. The design goals were to provide: - A simple linking model for developers to reason about. - The ability to to override autolinking from the linker command line. - Source code compatibility, where possible, with "comment lib" pragmas in other environments (MSVC in particular). Dependent library support is implemented differently for ELF platforms than on the other platforms. Primarily this difference is that on ELF we pass the dependent library specifiers directly to the linker without manipulating them. This is in contrast to other platforms where they are mapped to a specific linker option by the compiler. This difference is a result of the greater variety of ELF linkers and the fact that ELF linkers tend to handle libraries in a more complicated fashion than on other platforms. This forces us to defer handling the specifiers to the linker. In order to achieve a level of source code compatibility with other platforms we have restricted this feature to work with libraries that meet the following "reasonable" requirements: 1. There are no competing defined symbols in a given set of libraries, or if they exist, the program owner doesn't care which is linked to their program. 2. There may be circular dependencies between libraries. The binary representation is a mergeable string section (SHF_MERGE, SHF_STRINGS), called .deplibs, with custom type SHT_LLVM_DEPENDENT_LIBRARIES (0x6fff4c04). The compiler forms this section by concatenating the arguments of the "comment lib" pragmas and --dependent-library options in the order they are encountered. Partial (-r, -Ur) links are handled by concatenating .deplibs sections with the normal mergeable string section rules. As an example, #pragma comment(lib, "foo") would result in: .section ".deplibs","MS",@llvm_dependent_libraries,1 .asciz "foo" For LTO, equivalent information to the contents of a the .deplibs section can be retrieved by the LLD for bitcode input files. LLD processes the dependent library specifiers in the following way: 1. Dependent libraries which are found from the specifiers in .deplibs sections of relocatable object files are added when the linker decides to include that file (which could itself be in a library) in the link. Dependent libraries behave as if they were appended to the command line after all other options. As a consequence the set of dependent libraries are searched last to resolve symbols. 2. It is an error if a file cannot be found for a given specifier. 3. Any command line options in effect at the end of the command line parsing apply to the dependent libraries, e.g. --whole-archive. 4. The linker tries to add a library or relocatable object file from each of the strings in a .deplibs section by; first, handling the string as if it was specified on the command line; second, by looking for the string in each of the library search paths in turn; third, by looking for a lib<string>.a or lib<string>.so (depending on the current mode of the linker) in each of the library search paths. 5. A new command line option --no-dependent-libraries tells LLD to ignore the dependent libraries. Rationale for the above points: 1. Adding the dependent libraries last makes the process simple to understand from a developers perspective. All linkers are able to implement this scheme. 2. Error-ing for libraries that are not found seems like better behavior than failing the link during symbol resolution. 3. It seems useful for the user to be able to apply command line options which will affect all of the dependent libraries. There is a potential problem of surprise for developers, who might not realize that these options would apply to these "invisible" input files; however, despite the potential for surprise, this is easy for developers to reason about and gives developers the control that they may require. 4. This algorithm takes into account all of the different ways that ELF linkers find input files. The different search methods are tried by the linker in most obvious to least obvious order. 5. I considered adding finer grained control over which dependent libraries were ignored (e.g. MSVC has /nodefaultlib:<library>); however, I concluded that this is not necessary: if finer control is required developers can fall back to using the command line directly. RFC thread: http://lists.llvm.org/pipermail/llvm-dev/2019-March/131004.html. Differential Revision: https://reviews.llvm.org/D60274 llvm-svn: 360984
2019-05-17 11:44:15 +08:00
std::vector<storage::Str> DependentLibraries;
void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
S.Size = Value.size();
}
template <typename T>
void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
R.Offset = Symtab.size();
R.Size = Objs.size();
Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),
reinterpret_cast<const char *>(Objs.data() + Objs.size()));
}
Expected<int> getComdatIndex(const Comdat *C, const Module *M);
Error addModule(Module *M);
Error addSymbol(const ModuleSymbolTable &Msymtab,
const SmallPtrSet<GlobalValue *, 8> &Used,
ModuleSymbolTable::Symbol Sym);
Error build(ArrayRef<Module *> Mods);
};
Error Builder::addModule(Module *M) {
if (M->getDataLayoutStr().empty())
return make_error<StringError>("input module has no datalayout",
inconvertibleErrorCode());
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
ModuleSymbolTable Msymtab;
Msymtab.addModule(M);
storage::Module Mod;
Mod.Begin = Syms.size();
Mod.End = Syms.size() + Msymtab.symbols().size();
Mod.UncBegin = Uncommons.size();
Mods.push_back(Mod);
if (TT.isOSBinFormatCOFF()) {
if (auto E = M->materializeMetadata())
return E;
if (NamedMDNode *LinkerOptions =
M->getNamedMetadata("llvm.linker.options")) {
for (MDNode *MDOptions : LinkerOptions->operands())
for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
}
}
[ELF] Implement Dependent Libraries Feature This patch implements a limited form of autolinking primarily designed to allow either the --dependent-library compiler option, or "comment lib" pragmas ( https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=vs-2017) in C/C++ e.g. #pragma comment(lib, "foo"), to cause an ELF linker to automatically add the specified library to the link when processing the input file generated by the compiler. Currently this extension is unique to LLVM and LLD. However, care has been taken to design this feature so that it could be supported by other ELF linkers. The design goals were to provide: - A simple linking model for developers to reason about. - The ability to to override autolinking from the linker command line. - Source code compatibility, where possible, with "comment lib" pragmas in other environments (MSVC in particular). Dependent library support is implemented differently for ELF platforms than on the other platforms. Primarily this difference is that on ELF we pass the dependent library specifiers directly to the linker without manipulating them. This is in contrast to other platforms where they are mapped to a specific linker option by the compiler. This difference is a result of the greater variety of ELF linkers and the fact that ELF linkers tend to handle libraries in a more complicated fashion than on other platforms. This forces us to defer handling the specifiers to the linker. In order to achieve a level of source code compatibility with other platforms we have restricted this feature to work with libraries that meet the following "reasonable" requirements: 1. There are no competing defined symbols in a given set of libraries, or if they exist, the program owner doesn't care which is linked to their program. 2. There may be circular dependencies between libraries. The binary representation is a mergeable string section (SHF_MERGE, SHF_STRINGS), called .deplibs, with custom type SHT_LLVM_DEPENDENT_LIBRARIES (0x6fff4c04). The compiler forms this section by concatenating the arguments of the "comment lib" pragmas and --dependent-library options in the order they are encountered. Partial (-r, -Ur) links are handled by concatenating .deplibs sections with the normal mergeable string section rules. As an example, #pragma comment(lib, "foo") would result in: .section ".deplibs","MS",@llvm_dependent_libraries,1 .asciz "foo" For LTO, equivalent information to the contents of a the .deplibs section can be retrieved by the LLD for bitcode input files. LLD processes the dependent library specifiers in the following way: 1. Dependent libraries which are found from the specifiers in .deplibs sections of relocatable object files are added when the linker decides to include that file (which could itself be in a library) in the link. Dependent libraries behave as if they were appended to the command line after all other options. As a consequence the set of dependent libraries are searched last to resolve symbols. 2. It is an error if a file cannot be found for a given specifier. 3. Any command line options in effect at the end of the command line parsing apply to the dependent libraries, e.g. --whole-archive. 4. The linker tries to add a library or relocatable object file from each of the strings in a .deplibs section by; first, handling the string as if it was specified on the command line; second, by looking for the string in each of the library search paths in turn; third, by looking for a lib<string>.a or lib<string>.so (depending on the current mode of the linker) in each of the library search paths. 5. A new command line option --no-dependent-libraries tells LLD to ignore the dependent libraries. Rationale for the above points: 1. Adding the dependent libraries last makes the process simple to understand from a developers perspective. All linkers are able to implement this scheme. 2. Error-ing for libraries that are not found seems like better behavior than failing the link during symbol resolution. 3. It seems useful for the user to be able to apply command line options which will affect all of the dependent libraries. There is a potential problem of surprise for developers, who might not realize that these options would apply to these "invisible" input files; however, despite the potential for surprise, this is easy for developers to reason about and gives developers the control that they may require. 4. This algorithm takes into account all of the different ways that ELF linkers find input files. The different search methods are tried by the linker in most obvious to least obvious order. 5. I considered adding finer grained control over which dependent libraries were ignored (e.g. MSVC has /nodefaultlib:<library>); however, I concluded that this is not necessary: if finer control is required developers can fall back to using the command line directly. RFC thread: http://lists.llvm.org/pipermail/llvm-dev/2019-March/131004.html. Differential Revision: https://reviews.llvm.org/D60274 llvm-svn: 360984
2019-05-17 11:44:15 +08:00
if (TT.isOSBinFormatELF()) {
if (auto E = M->materializeMetadata())
return E;
if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {
for (MDNode *MDOptions : N->operands()) {
const auto OperandStr =
cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();
storage::Str Specifier;
setStr(Specifier, OperandStr);
DependentLibraries.emplace_back(Specifier);
}
}
}
for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
if (Error Err = addSymbol(Msymtab, Used, Msym))
return Err;
return Error::success();
}
Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
if (P.second) {
std::string Name;
if (TT.isOSBinFormatCOFF()) {
const GlobalValue *GV = M->getNamedValue(C->getName());
if (!GV)
return make_error<StringError>("Could not find leader",
inconvertibleErrorCode());
// Internal leaders do not affect symbol resolution, therefore they do not
// appear in the symbol table.
if (GV->hasLocalLinkage()) {
P.first->second = -1;
return -1;
}
llvm::raw_string_ostream OS(Name);
Mang.getNameWithPrefix(OS, GV, false);
} else {
Name = std::string(C->getName());
}
storage::Comdat Comdat;
setStr(Comdat.Name, Saver.save(Name));
Comdats.push_back(Comdat);
}
return P.first->second;
}
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
const SmallPtrSet<GlobalValue *, 8> &Used,
ModuleSymbolTable::Symbol Msym) {
Syms.emplace_back();
storage::Symbol &Sym = Syms.back();
Sym = {};
storage::Uncommon *Unc = nullptr;
auto Uncommon = [&]() -> storage::Uncommon & {
if (Unc)
return *Unc;
Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon;
Uncommons.emplace_back();
Unc = &Uncommons.back();
*Unc = {};
setStr(Unc->COFFWeakExternFallbackName, "");
setStr(Unc->SectionName, "");
return *Unc;
};
SmallString<64> Name;
{
raw_svector_ostream OS(Name);
Msymtab.printSymbolName(OS, Msym);
}
setStr(Sym.Name, Saver.save(StringRef(Name)));
auto Flags = Msymtab.getSymbolFlags(Msym);
if (Flags & object::BasicSymbolRef::SF_Undefined)
Sym.Flags |= 1 << storage::Symbol::FB_undefined;
if (Flags & object::BasicSymbolRef::SF_Weak)
Sym.Flags |= 1 << storage::Symbol::FB_weak;
if (Flags & object::BasicSymbolRef::SF_Common)
Sym.Flags |= 1 << storage::Symbol::FB_common;
if (Flags & object::BasicSymbolRef::SF_Indirect)
Sym.Flags |= 1 << storage::Symbol::FB_indirect;
if (Flags & object::BasicSymbolRef::SF_Global)
Sym.Flags |= 1 << storage::Symbol::FB_global;
if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
if (Flags & object::BasicSymbolRef::SF_Executable)
Sym.Flags |= 1 << storage::Symbol::FB_executable;
Sym.ComdatIndex = -1;
auto *GV = Msym.dyn_cast<GlobalValue *>();
if (!GV) {
// Undefined module asm symbols act as GC roots and are implicitly used.
if (Flags & object::BasicSymbolRef::SF_Undefined)
Sym.Flags |= 1 << storage::Symbol::FB_used;
setStr(Sym.IRName, "");
return Error::success();
}
setStr(Sym.IRName, GV->getName());
bool IsBuiltinFunc = false;
for (const char *LibcallName : LibcallRoutineNames)
if (GV->getName() == LibcallName)
IsBuiltinFunc = true;
if (Used.count(GV) || IsBuiltinFunc)
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
if (GV->hasGlobalUnnamedAddr())
Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
if (GV->canBeOmittedFromSymbolTable())
Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
if (Flags & object::BasicSymbolRef::SF_Common) {
auto *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar)
return make_error<StringError>("Only variables can have common linkage!",
inconvertibleErrorCode());
Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize(
GV->getType()->getElementType());
Uncommon().CommonAlign = GVar->getAlignment();
}
const GlobalObject *Base = GV->getBaseObject();
if (!Base)
return make_error<StringError>("Unable to determine comdat of alias!",
inconvertibleErrorCode());
if (const Comdat *C = Base->getComdat()) {
Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());
if (!ComdatIndexOrErr)
return ComdatIndexOrErr.takeError();
Sym.ComdatIndex = *ComdatIndexOrErr;
}
if (TT.isOSBinFormatCOFF()) {
emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
if ((Flags & object::BasicSymbolRef::SF_Weak) &&
(Flags & object::BasicSymbolRef::SF_Indirect)) {
auto *Fallback = dyn_cast<GlobalValue>(
cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts());
if (!Fallback)
return make_error<StringError>("Invalid weak external",
inconvertibleErrorCode());
std::string FallbackName;
raw_string_ostream OS(FallbackName);
Msymtab.printSymbolName(OS, Fallback);
OS.flush();
setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
}
}
if (!Base->getSection().empty())
setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
return Error::success();
}
Error Builder::build(ArrayRef<Module *> IRMods) {
storage::Header Hdr;
assert(!IRMods.empty());
Hdr.Version = storage::Header::kCurrentVersion;
setStr(Hdr.Producer, kExpectedProducerName);
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
TT = Triple(IRMods[0]->getTargetTriple());
for (auto *M : IRMods)
if (Error Err = addModule(M))
return Err;
COFFLinkerOptsOS.flush();
setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts));
// We are about to fill in the header's range fields, so reserve space for it
// and copy it in afterwards.
Symtab.resize(sizeof(storage::Header));
writeRange(Hdr.Modules, Mods);
writeRange(Hdr.Comdats, Comdats);
writeRange(Hdr.Symbols, Syms);
writeRange(Hdr.Uncommons, Uncommons);
[ELF] Implement Dependent Libraries Feature This patch implements a limited form of autolinking primarily designed to allow either the --dependent-library compiler option, or "comment lib" pragmas ( https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=vs-2017) in C/C++ e.g. #pragma comment(lib, "foo"), to cause an ELF linker to automatically add the specified library to the link when processing the input file generated by the compiler. Currently this extension is unique to LLVM and LLD. However, care has been taken to design this feature so that it could be supported by other ELF linkers. The design goals were to provide: - A simple linking model for developers to reason about. - The ability to to override autolinking from the linker command line. - Source code compatibility, where possible, with "comment lib" pragmas in other environments (MSVC in particular). Dependent library support is implemented differently for ELF platforms than on the other platforms. Primarily this difference is that on ELF we pass the dependent library specifiers directly to the linker without manipulating them. This is in contrast to other platforms where they are mapped to a specific linker option by the compiler. This difference is a result of the greater variety of ELF linkers and the fact that ELF linkers tend to handle libraries in a more complicated fashion than on other platforms. This forces us to defer handling the specifiers to the linker. In order to achieve a level of source code compatibility with other platforms we have restricted this feature to work with libraries that meet the following "reasonable" requirements: 1. There are no competing defined symbols in a given set of libraries, or if they exist, the program owner doesn't care which is linked to their program. 2. There may be circular dependencies between libraries. The binary representation is a mergeable string section (SHF_MERGE, SHF_STRINGS), called .deplibs, with custom type SHT_LLVM_DEPENDENT_LIBRARIES (0x6fff4c04). The compiler forms this section by concatenating the arguments of the "comment lib" pragmas and --dependent-library options in the order they are encountered. Partial (-r, -Ur) links are handled by concatenating .deplibs sections with the normal mergeable string section rules. As an example, #pragma comment(lib, "foo") would result in: .section ".deplibs","MS",@llvm_dependent_libraries,1 .asciz "foo" For LTO, equivalent information to the contents of a the .deplibs section can be retrieved by the LLD for bitcode input files. LLD processes the dependent library specifiers in the following way: 1. Dependent libraries which are found from the specifiers in .deplibs sections of relocatable object files are added when the linker decides to include that file (which could itself be in a library) in the link. Dependent libraries behave as if they were appended to the command line after all other options. As a consequence the set of dependent libraries are searched last to resolve symbols. 2. It is an error if a file cannot be found for a given specifier. 3. Any command line options in effect at the end of the command line parsing apply to the dependent libraries, e.g. --whole-archive. 4. The linker tries to add a library or relocatable object file from each of the strings in a .deplibs section by; first, handling the string as if it was specified on the command line; second, by looking for the string in each of the library search paths in turn; third, by looking for a lib<string>.a or lib<string>.so (depending on the current mode of the linker) in each of the library search paths. 5. A new command line option --no-dependent-libraries tells LLD to ignore the dependent libraries. Rationale for the above points: 1. Adding the dependent libraries last makes the process simple to understand from a developers perspective. All linkers are able to implement this scheme. 2. Error-ing for libraries that are not found seems like better behavior than failing the link during symbol resolution. 3. It seems useful for the user to be able to apply command line options which will affect all of the dependent libraries. There is a potential problem of surprise for developers, who might not realize that these options would apply to these "invisible" input files; however, despite the potential for surprise, this is easy for developers to reason about and gives developers the control that they may require. 4. This algorithm takes into account all of the different ways that ELF linkers find input files. The different search methods are tried by the linker in most obvious to least obvious order. 5. I considered adding finer grained control over which dependent libraries were ignored (e.g. MSVC has /nodefaultlib:<library>); however, I concluded that this is not necessary: if finer control is required developers can fall back to using the command line directly. RFC thread: http://lists.llvm.org/pipermail/llvm-dev/2019-March/131004.html. Differential Revision: https://reviews.llvm.org/D60274 llvm-svn: 360984
2019-05-17 11:44:15 +08:00
writeRange(Hdr.DependentLibraries, DependentLibraries);
*reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
return Error::success();
}
} // end anonymous namespace
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
StringTableBuilder &StrtabBuilder,
BumpPtrAllocator &Alloc) {
return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
}
// Upgrade a vector of bitcode modules created by an old version of LLVM by
// creating an irsymtab for them in the current format.
static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {
FileContents FC;
LLVMContext Ctx;
std::vector<Module *> Mods;
std::vector<std::unique_ptr<Module>> OwnedMods;
for (auto BM : BMs) {
Expected<std::unique_ptr<Module>> MOrErr =
BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
/*IsImporting*/ false);
if (!MOrErr)
return MOrErr.takeError();
Mods.push_back(MOrErr->get());
OwnedMods.push_back(std::move(*MOrErr));
}
StringTableBuilder StrtabBuilder(StringTableBuilder::RAW);
BumpPtrAllocator Alloc;
if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc))
return std::move(E);
StrtabBuilder.finalizeInOrder();
FC.Strtab.resize(StrtabBuilder.getSize());
StrtabBuilder.write((uint8_t *)FC.Strtab.data());
FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},
{FC.Strtab.data(), FC.Strtab.size()}};
return std::move(FC);
}
Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
if (BFC.Mods.empty())
return make_error<StringError>("Bitcode file does not contain any modules",
inconvertibleErrorCode());
if (BFC.StrtabForSymtab.empty() ||
BFC.Symtab.size() < sizeof(storage::Header))
return upgrade(BFC.Mods);
// We cannot use the regular reader to read the version and producer, because
// it will expect the header to be in the current format. The only thing we
// can rely on is that the version and producer will be present as the first
// struct elements.
auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());
unsigned Version = Hdr->Version;
StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);
if (Version != storage::Header::kCurrentVersion ||
Producer != kExpectedProducerName)
return upgrade(BFC.Mods);
FileContents FC;
FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},
{BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};
// Finally, make sure that the number of modules in the symbol table matches
// the number of modules in the bitcode file. If they differ, it may mean that
// the bitcode file was created by binary concatenation, so we need to create
// a new symbol table from scratch.
if (FC.TheReader.getNumModules() != BFC.Mods.size())
return upgrade(std::move(BFC.Mods));
return std::move(FC);
}