llvm-project/llvm/lib/LTO/LTOModule.cpp

665 lines
21 KiB
C++

//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Link Time Optimization library. This library is
// intended to be used by linker to optimize code at link time.
//
//===----------------------------------------------------------------------===//
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ObjectUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include <system_error>
using namespace llvm;
using namespace llvm::object;
LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
llvm::TargetMachine *TM)
: Mod(std::move(M)), MBRef(MBRef), _target(TM) {
SymTab.addModule(Mod.get());
}
LTOModule::~LTOModule() {}
/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
/// bitcode.
bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
if (!BCData) {
consumeError(BCData.takeError());
return false;
}
return true;
}
bool LTOModule::isBitcodeFile(StringRef Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(Path);
if (!BufferOrErr)
return false;
Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
BufferOrErr.get()->getMemBufferRef());
if (!BCData) {
consumeError(BCData.takeError());
return false;
}
return true;
}
bool LTOModule::isThinLTO() {
Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef);
if (!Result) {
logAllUnhandledErrors(Result.takeError(), errs(), "");
return false;
}
return Result->IsThinLTO;
}
bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
StringRef TriplePrefix) {
Expected<MemoryBufferRef> BCOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
if (!BCOrErr) {
consumeError(BCOrErr.takeError());
return false;
}
LLVMContext Context;
ErrorOr<std::string> TripleOrErr =
expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr));
if (!TripleOrErr)
return false;
return StringRef(*TripleOrErr).startswith(TriplePrefix);
}
std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
Expected<MemoryBufferRef> BCOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
if (!BCOrErr) {
consumeError(BCOrErr.takeError());
return "";
}
LLVMContext Context;
ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors(
Context, getBitcodeProducerString(*BCOrErr));
if (!ProducerOrErr)
return "";
return *ProducerOrErr;
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::createFromFile(LLVMContext &Context, StringRef path,
const TargetOptions &options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(path);
if (std::error_code EC = BufferOrErr.getError()) {
Context.emitError(EC.message());
return EC;
}
std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
return makeLTOModule(Buffer->getMemBufferRef(), options, Context,
/* ShouldBeLazy*/ false);
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path,
size_t size, const TargetOptions &options) {
return createFromOpenFileSlice(Context, fd, path, size, 0, options);
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
size_t map_size, off_t offset,
const TargetOptions &options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
if (std::error_code EC = BufferOrErr.getError()) {
Context.emitError(EC.message());
return EC;
}
std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
return makeLTOModule(Buffer->getMemBufferRef(), options, Context,
/* ShouldBeLazy */ false);
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::createFromBuffer(LLVMContext &Context, const void *mem,
size_t length, const TargetOptions &options,
StringRef path) {
StringRef Data((const char *)mem, length);
MemoryBufferRef Buffer(Data, path);
return makeLTOModule(Buffer, options, Context, /* ShouldBeLazy */ false);
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::createInLocalContext(std::unique_ptr<LLVMContext> Context,
const void *mem, size_t length,
const TargetOptions &options, StringRef path) {
StringRef Data((const char *)mem, length);
MemoryBufferRef Buffer(Data, path);
// If we own a context, we know this is being used only for symbol extraction,
// not linking. Be lazy in that case.
ErrorOr<std::unique_ptr<LTOModule>> Ret =
makeLTOModule(Buffer, options, *Context, /* ShouldBeLazy */ true);
if (Ret)
(*Ret)->OwnedContext = std::move(Context);
return Ret;
}
static ErrorOr<std::unique_ptr<Module>>
parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
bool ShouldBeLazy) {
// Find the buffer.
Expected<MemoryBufferRef> MBOrErr =
IRObjectFile::findBitcodeInMemBuffer(Buffer);
if (Error E = MBOrErr.takeError()) {
std::error_code EC = errorToErrorCode(std::move(E));
Context.emitError(EC.message());
return EC;
}
if (!ShouldBeLazy) {
// Parse the full file.
return expectedToErrorOrAndEmitErrors(Context,
parseBitcodeFile(*MBOrErr, Context));
}
// Parse lazily.
return expectedToErrorOrAndEmitErrors(
Context,
getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/));
}
ErrorOr<std::unique_ptr<LTOModule>>
LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
LLVMContext &Context, bool ShouldBeLazy) {
ErrorOr<std::unique_ptr<Module>> MOrErr =
parseBitcodeFileImpl(Buffer, Context, ShouldBeLazy);
if (std::error_code EC = MOrErr.getError())
return EC;
std::unique_ptr<Module> &M = *MOrErr;
std::string TripleStr = M->getTargetTriple();
if (TripleStr.empty())
TripleStr = sys::getDefaultTargetTriple();
llvm::Triple Triple(TripleStr);
// find machine architecture for this module
std::string errMsg;
const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
if (!march)
return std::unique_ptr<LTOModule>(nullptr);
// construct LTOModule, hand over ownership of module and target
SubtargetFeatures Features;
Features.getDefaultSubtargetFeatures(Triple);
std::string FeatureStr = Features.getString();
// Set a default CPU for Darwin triples.
std::string CPU;
if (Triple.isOSDarwin()) {
if (Triple.getArch() == llvm::Triple::x86_64)
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
else if (Triple.getArch() == llvm::Triple::aarch64)
CPU = "cyclone";
}
TargetMachine *target =
march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None);
std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target));
Ret->parseSymbols();
Ret->parseMetadata();
return std::move(Ret);
}
/// Create a MemoryBuffer from a memory range with an optional name.
std::unique_ptr<MemoryBuffer>
LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
const char *startPtr = (const char*)mem;
return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
}
/// objcClassNameFromExpression - Get string that the data pointer points to.
bool
LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
Constant *op = ce->getOperand(0);
if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
Constant *cn = gvn->getInitializer();
if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
if (ca->isCString()) {
name = (".objc_class_name_" + ca->getAsCString()).str();
return true;
}
}
}
}
return false;
}
/// addObjCClass - Parse i386/ppc ObjC class data structure.
void LTOModule::addObjCClass(const GlobalVariable *clgv) {
const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
if (!c) return;
// second slot in __OBJC,__class is pointer to superclass name
std::string superclassName;
if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
auto IterBool =
_undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
if (IterBool.second) {
NameAndAttributes &info = IterBool.first->second;
info.name = IterBool.first->first();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
}
}
// third slot in __OBJC,__class is pointer to class name
std::string className;
if (objcClassNameFromExpression(c->getOperand(2), className)) {
auto Iter = _defines.insert(className).first;
NameAndAttributes info;
info.name = Iter->first();
info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
info.isFunction = false;
info.symbol = clgv;
_symbols.push_back(info);
}
}
/// addObjCCategory - Parse i386/ppc ObjC category data structure.
void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
if (!c) return;
// second slot in __OBJC,__category is pointer to target class name
std::string targetclassName;
if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
return;
auto IterBool =
_undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
if (!IterBool.second)
return;
NameAndAttributes &info = IterBool.first->second;
info.name = IterBool.first->first();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
}
/// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
std::string targetclassName;
if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
return;
auto IterBool =
_undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
if (!IterBool.second)
return;
NameAndAttributes &info = IterBool.first->second;
info.name = IterBool.first->first();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
}
void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) {
SmallString<64> Buffer;
{
raw_svector_ostream OS(Buffer);
SymTab.printSymbolName(OS, Sym);
Buffer.c_str();
}
const GlobalValue *V = Sym.get<GlobalValue *>();
addDefinedDataSymbol(Buffer, V);
}
void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
// Add to list of defined symbols.
addDefinedSymbol(Name, v, false);
if (!v->hasSection() /* || !isTargetDarwin */)
return;
// Special case i386/ppc ObjC data structures in magic sections:
// The issue is that the old ObjC object format did some strange
// contortions to avoid real linker symbols. For instance, the
// ObjC class data structure is allocated statically in the executable
// that defines that class. That data structures contains a pointer to
// its superclass. But instead of just initializing that part of the
// struct to the address of its superclass, and letting the static and
// dynamic linkers do the rest, the runtime works by having that field
// instead point to a C-string that is the name of the superclass.
// At runtime the objc initialization updates that pointer and sets
// it to point to the actual super class. As far as the linker
// knows it is just a pointer to a string. But then someone wanted the
// linker to issue errors at build time if the superclass was not found.
// So they figured out a way in mach-o object format to use an absolute
// symbols (.objc_class_name_Foo = 0) and a floating reference
// (.reference .objc_class_name_Bar) to cause the linker into erroring when
// a class was missing.
// The following synthesizes the implicit .objc_* symbols for the linker
// from the ObjC data structures generated by the front end.
// special case if this data blob is an ObjC class definition
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
StringRef Section = GV->getSection();
if (Section.startswith("__OBJC,__class,")) {
addObjCClass(GV);
}
// special case if this data blob is an ObjC category definition
else if (Section.startswith("__OBJC,__category,")) {
addObjCCategory(GV);
}
// special case if this data blob is the list of referenced classes
else if (Section.startswith("__OBJC,__cls_refs,")) {
addObjCClassRef(GV);
}
}
}
void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) {
SmallString<64> Buffer;
{
raw_svector_ostream OS(Buffer);
SymTab.printSymbolName(OS, Sym);
Buffer.c_str();
}
const Function *F = cast<Function>(Sym.get<GlobalValue *>());
addDefinedFunctionSymbol(Buffer, F);
}
void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) {
// add to list of defined symbols
addDefinedSymbol(Name, F, true);
}
void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def,
bool isFunction) {
// set alignment part log2() can have rounding errors
uint32_t align = def->getAlignment();
uint32_t attr = align ? countTrailingZeros(align) : 0;
// set permissions part
if (isFunction) {
attr |= LTO_SYMBOL_PERMISSIONS_CODE;
} else {
const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
if (gv && gv->isConstant())
attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
else
attr |= LTO_SYMBOL_PERMISSIONS_DATA;
}
// set definition part
if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
attr |= LTO_SYMBOL_DEFINITION_WEAK;
else if (def->hasCommonLinkage())
attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
else
attr |= LTO_SYMBOL_DEFINITION_REGULAR;
// set scope part
if (def->hasLocalLinkage())
// Ignore visibility if linkage is local.
attr |= LTO_SYMBOL_SCOPE_INTERNAL;
else if (def->hasHiddenVisibility())
attr |= LTO_SYMBOL_SCOPE_HIDDEN;
else if (def->hasProtectedVisibility())
attr |= LTO_SYMBOL_SCOPE_PROTECTED;
else if (canBeOmittedFromSymbolTable(def))
attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
else
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
if (def->hasComdat())
attr |= LTO_SYMBOL_COMDAT;
if (isa<GlobalAlias>(def))
attr |= LTO_SYMBOL_ALIAS;
auto Iter = _defines.insert(Name).first;
// fill information structure
NameAndAttributes info;
StringRef NameRef = Iter->first();
info.name = NameRef;
assert(NameRef.data()[NameRef.size()] == '\0');
info.attributes = attr;
info.isFunction = isFunction;
info.symbol = def;
// add to table of symbols
_symbols.push_back(info);
}
/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
/// defined list.
void LTOModule::addAsmGlobalSymbol(StringRef name,
lto_symbol_attributes scope) {
auto IterBool = _defines.insert(name);
// only add new define if not already defined
if (!IterBool.second)
return;
NameAndAttributes &info = _undefines[IterBool.first->first()];
if (info.symbol == nullptr) {
// FIXME: This is trying to take care of module ASM like this:
//
// module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
//
// but is gross and its mother dresses it funny. Have the ASM parser give us
// more details for this type of situation so that we're not guessing so
// much.
// fill information structure
info.name = IterBool.first->first();
info.attributes =
LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
info.isFunction = false;
info.symbol = nullptr;
// add to table of symbols
_symbols.push_back(info);
return;
}
if (info.isFunction)
addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
else
addDefinedDataSymbol(info.name, info.symbol);
_symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
_symbols.back().attributes |= scope;
}
/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
/// undefined list.
void LTOModule::addAsmGlobalSymbolUndef(StringRef name) {
auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
_asm_undefines.push_back(IterBool.first->first());
// we already have the symbol
if (!IterBool.second)
return;
uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
NameAndAttributes &info = IterBool.first->second;
info.name = IterBool.first->first();
info.attributes = attr;
info.isFunction = false;
info.symbol = nullptr;
}
/// Add a symbol which isn't defined just yet to a list to be resolved later.
void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
bool isFunc) {
SmallString<64> name;
{
raw_svector_ostream OS(name);
SymTab.printSymbolName(OS, Sym);
name.c_str();
}
auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
// we already have the symbol
if (!IterBool.second)
return;
NameAndAttributes &info = IterBool.first->second;
info.name = IterBool.first->first();
const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>();
if (decl->hasExternalWeakLinkage())
info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
else
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = isFunc;
info.symbol = decl;
}
void LTOModule::parseSymbols() {
for (auto Sym : SymTab.symbols()) {
auto *GV = Sym.dyn_cast<GlobalValue *>();
uint32_t Flags = SymTab.getSymbolFlags(Sym);
if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
continue;
bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
if (!GV) {
SmallString<64> Buffer;
{
raw_svector_ostream OS(Buffer);
SymTab.printSymbolName(OS, Sym);
Buffer.c_str();
}
StringRef Name(Buffer);
if (IsUndefined)
addAsmGlobalSymbolUndef(Name);
else if (Flags & object::BasicSymbolRef::SF_Global)
addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
else
addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
continue;
}
auto *F = dyn_cast<Function>(GV);
if (IsUndefined) {
addPotentialUndefinedSymbol(Sym, F != nullptr);
continue;
}
if (F) {
addDefinedFunctionSymbol(Sym);
continue;
}
if (isa<GlobalVariable>(GV)) {
addDefinedDataSymbol(Sym);
continue;
}
assert(isa<GlobalAlias>(GV));
addDefinedDataSymbol(Sym);
}
// make symbols for all undefines
for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
e = _undefines.end(); u != e; ++u) {
// If this symbol also has a definition, then don't make an undefine because
// it is a tentative definition.
if (_defines.count(u->getKey())) continue;
NameAndAttributes info = u->getValue();
_symbols.push_back(info);
}
}
/// parseMetadata - Parse metadata from the module
void LTOModule::parseMetadata() {
raw_string_ostream OS(LinkerOpts);
// Linker Options
if (NamedMDNode *LinkerOptions =
getModule().getNamedMetadata("llvm.linker.options")) {
for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
MDNode *MDOptions = LinkerOptions->getOperand(i);
for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
OS << " " << MDOption->getString();
}
}
}
// Globals - we only need to do this for COFF.
const Triple TT(_target->getTargetTriple());
if (!TT.isOSBinFormatCOFF())
return;
Mangler M;
for (const NameAndAttributes &Sym : _symbols) {
if (!Sym.symbol)
continue;
emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M);
}
// Add other interesting metadata here.
}