Object: Extract a ModuleSymbolTable class from IRObjectFile.

This class represents a symbol table built from in-memory IR. It provides
access to GlobalValues and should only be used if such access is required
(e.g. in the LTO implementation). We will eventually change IRObjectFile
to read from a bitcode symbol table rather than using ModuleSymbolTable,
so it would not be able to expose the module.

Differential Revision: https://reviews.llvm.org/D27073

llvm-svn: 288319
This commit is contained in:
Peter Collingbourne 2016-12-01 06:51:47 +00:00
parent 57f9b8c5b5
commit 863cbfbeba
8 changed files with 262 additions and 168 deletions

View File

@ -15,6 +15,7 @@
#define LLVM_OBJECT_IROBJECTFILE_H
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
namespace llvm {
@ -28,15 +29,7 @@ class ObjectFile;
class IRObjectFile : public SymbolicFile {
std::unique_ptr<Module> M;
std::unique_ptr<Mangler> Mang;
typedef std::pair<std::string, uint32_t> AsmSymbol;
SpecificBumpPtrAllocator<AsmSymbol> AsmSymbols;
typedef PointerUnion<GlobalValue *, AsmSymbol *> Sym;
std::vector<Sym> SymTab;
static Sym getSym(DataRefImpl &Symb) {
return *reinterpret_cast<Sym *>(Symb.p);
}
ModuleSymbolTable SymTab;
public:
IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> M);
@ -70,15 +63,6 @@ public:
/// error code if not found.
static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
/// Parse inline ASM and collect the symbols that are not defined in
/// the current module.
///
/// For each found symbol, call \p AsmUndefinedRefs with the name of the
/// symbol found and the associated flags.
static void CollectAsmUndefinedRefs(
const Triple &TheTriple, StringRef InlineAsm,
function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmUndefinedRefs);
/// \brief Finds and returns bitcode in the given memory buffer (which may
/// be either a bitcode file or a native object file with embedded bitcode),
/// or an error code if not found.

View File

@ -0,0 +1,61 @@
//===- ModuleSymbolTable.h - symbol table for in-memory IR ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class represents a symbol table built from in-memory IR. It provides
// access to GlobalValues and should only be used if such access is required
// (e.g. in the LTO implementation).
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OBJECT_MODULESYMBOLTABLE_H
#define LLVM_OBJECT_MODULESYMBOLTABLE_H
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Object/SymbolicFile.h"
#include <string>
#include <utility>
namespace llvm {
class GlobalValue;
class ModuleSymbolTable {
public:
typedef std::pair<std::string, uint32_t> AsmSymbol;
typedef PointerUnion<GlobalValue *, AsmSymbol *> Symbol;
private:
Module *FirstMod = nullptr;
SpecificBumpPtrAllocator<AsmSymbol> AsmSymbols;
std::vector<Symbol> SymTab;
Mangler Mang;
public:
ArrayRef<Symbol> symbols() const { return SymTab; }
void addModule(Module *M);
void printSymbolName(raw_ostream &OS, Symbol S) const;
uint32_t getSymbolFlags(Symbol S) const;
/// Parse inline ASM and collect the symbols that are defined or referenced in
/// the current module.
///
/// For each found symbol, call \p AsmSymbol with the name of the symbol found
/// and the associated flags.
static void CollectAsmSymbols(
const Triple &TheTriple, StringRef InlineAsm,
function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol);
};
}
#endif

View File

@ -269,9 +269,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Also, any values used but not defined within module level asm should
// be listed on the llvm.used or llvm.compiler.used global and marked as
// referenced from there.
// FIXME: Rename CollectAsmUndefinedRefs to something more general, as we
// are also using it to find the file-scope locals defined in module asm.
object::IRObjectFile::CollectAsmUndefinedRefs(
ModuleSymbolTable::CollectAsmSymbols(
Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
[&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
// Symbols not marked as Weak or Global are local definitions.

View File

@ -280,7 +280,7 @@ static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
// Collect the list of undefined symbols used in asm and update
// llvm.compiler.used to prevent optimization to drop these from the output.
StringSet<> AsmUndefinedRefs;
object::IRObjectFile::CollectAsmUndefinedRefs(
ModuleSymbolTable::CollectAsmSymbols(
Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(),
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)

View File

@ -10,6 +10,7 @@ add_llvm_library(LLVMObject
MachOObjectFile.cpp
MachOUniversal.cpp
ModuleSummaryIndexObjectFile.cpp
ModuleSymbolTable.cpp
Object.cpp
ObjectFile.cpp
RecordStreamer.cpp

View File

@ -37,162 +37,27 @@ using namespace object;
IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
: SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
Mang.reset(new Mangler());
for (Function &F : *M)
SymTab.push_back(&F);
for (GlobalVariable &GV : M->globals())
SymTab.push_back(&GV);
for (GlobalAlias &GA : M->aliases())
SymTab.push_back(&GA);
CollectAsmUndefinedRefs(Triple(M->getTargetTriple()), M->getModuleInlineAsm(),
[this](StringRef Name, BasicSymbolRef::Flags Flags) {
SymTab.push_back(new (AsmSymbols.Allocate())
AsmSymbol(Name, Flags));
});
}
// Parse inline ASM and collect the list of symbols that are not defined in
// the current module. This is inspired from IRObjectFile.
void IRObjectFile::CollectAsmUndefinedRefs(
const Triple &TT, StringRef InlineAsm,
function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmUndefinedRefs) {
if (InlineAsm.empty())
return;
std::string Err;
const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
assert(T && T->hasMCAsmParser());
std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
if (!MRI)
return;
std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
if (!MAI)
return;
std::unique_ptr<MCSubtargetInfo> STI(
T->createMCSubtargetInfo(TT.str(), "", ""));
if (!STI)
return;
std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
if (!MCII)
return;
MCObjectFileInfo MOFI;
MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx);
RecordStreamer Streamer(MCCtx);
T->createNullTargetStreamer(Streamer);
std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
SourceMgr SrcMgr;
SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI));
MCTargetOptions MCOptions;
std::unique_ptr<MCTargetAsmParser> TAP(
T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
if (!TAP)
return;
Parser->setTargetParser(*TAP);
if (Parser->Run(false))
return;
for (auto &KV : Streamer) {
StringRef Key = KV.first();
RecordStreamer::State Value = KV.second;
uint32_t Res = BasicSymbolRef::SF_None;
switch (Value) {
case RecordStreamer::NeverSeen:
llvm_unreachable("NeverSeen should have been replaced earlier");
case RecordStreamer::DefinedGlobal:
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::Defined:
break;
case RecordStreamer::Global:
case RecordStreamer::Used:
Res |= BasicSymbolRef::SF_Undefined;
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::DefinedWeak:
Res |= BasicSymbolRef::SF_Weak;
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::UndefinedWeak:
Res |= BasicSymbolRef::SF_Weak;
Res |= BasicSymbolRef::SF_Undefined;
}
AsmUndefinedRefs(Key, BasicSymbolRef::Flags(Res));
}
SymTab.addModule(M.get());
}
IRObjectFile::~IRObjectFile() {}
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) {
return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p);
}
void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
Symb.p += sizeof(Sym);
Symb.p += sizeof(ModuleSymbolTable::Symbol);
}
std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
DataRefImpl Symb) const {
Sym S = getSym(Symb);
if (S.is<AsmSymbol *>()) {
OS << S.get<AsmSymbol *>()->first;
return std::error_code();
}
auto *GV = S.get<GlobalValue *>();
if (GV->hasDLLImportStorageClass())
OS << "__imp_";
if (Mang)
Mang->getNameWithPrefix(OS, GV, false);
else
OS << GV->getName();
SymTab.printSymbolName(OS, getSym(Symb));
return std::error_code();
}
uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
Sym S = getSym(Symb);
if (S.is<AsmSymbol *>())
return S.get<AsmSymbol *>()->second;
auto *GV = S.get<GlobalValue *>();
uint32_t Res = BasicSymbolRef::SF_None;
if (GV->isDeclarationForLinker())
Res |= BasicSymbolRef::SF_Undefined;
else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
Res |= BasicSymbolRef::SF_Hidden;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
if (GVar->isConstant())
Res |= BasicSymbolRef::SF_Const;
}
if (GV->hasPrivateLinkage())
Res |= BasicSymbolRef::SF_FormatSpecific;
if (!GV->hasLocalLinkage())
Res |= BasicSymbolRef::SF_Global;
if (GV->hasCommonLinkage())
Res |= BasicSymbolRef::SF_Common;
if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
GV->hasExternalWeakLinkage())
Res |= BasicSymbolRef::SF_Weak;
if (GV->getName().startswith("llvm."))
Res |= BasicSymbolRef::SF_FormatSpecific;
else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
if (Var->getSection() == "llvm.metadata")
Res |= BasicSymbolRef::SF_FormatSpecific;
}
return Res;
return SymTab.getSymbolFlags(getSym(Symb));
}
GlobalValue *IRObjectFile::getSymbolGV(DataRefImpl Symb) {
@ -203,13 +68,14 @@ std::unique_ptr<Module> IRObjectFile::takeModule() { return std::move(M); }
basic_symbol_iterator IRObjectFile::symbol_begin() const {
DataRefImpl Ret;
Ret.p = reinterpret_cast<uintptr_t>(SymTab.data());
Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data());
return basic_symbol_iterator(BasicSymbolRef(Ret, this));
}
basic_symbol_iterator IRObjectFile::symbol_end() const {
DataRefImpl Ret;
Ret.p = reinterpret_cast<uintptr_t>(SymTab.data() + SymTab.size());
Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() +
SymTab.symbols().size());
return basic_symbol_iterator(BasicSymbolRef(Ret, this));
}

View File

@ -0,0 +1,184 @@
//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class represents a symbol table built from in-memory IR. It provides
// access to GlobalValues and should only be used if such access is required
// (e.g. in the LTO implementation).
//
//===----------------------------------------------------------------------===//
#include "llvm/Object/IRObjectFile.h"
#include "RecordStreamer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
void ModuleSymbolTable::addModule(Module *M) {
if (FirstMod)
assert(FirstMod->getTargetTriple() == M->getTargetTriple());
else
FirstMod = M;
for (Function &F : *M)
SymTab.push_back(&F);
for (GlobalVariable &GV : M->globals())
SymTab.push_back(&GV);
for (GlobalAlias &GA : M->aliases())
SymTab.push_back(&GA);
CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(),
[this](StringRef Name, BasicSymbolRef::Flags Flags) {
SymTab.push_back(new (AsmSymbols.Allocate())
AsmSymbol(Name, Flags));
});
}
void ModuleSymbolTable::CollectAsmSymbols(
const Triple &TT, StringRef InlineAsm,
function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
if (InlineAsm.empty())
return;
std::string Err;
const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
assert(T && T->hasMCAsmParser());
std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
if (!MRI)
return;
std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
if (!MAI)
return;
std::unique_ptr<MCSubtargetInfo> STI(
T->createMCSubtargetInfo(TT.str(), "", ""));
if (!STI)
return;
std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
if (!MCII)
return;
MCObjectFileInfo MOFI;
MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx);
RecordStreamer Streamer(MCCtx);
T->createNullTargetStreamer(Streamer);
std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
SourceMgr SrcMgr;
SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI));
MCTargetOptions MCOptions;
std::unique_ptr<MCTargetAsmParser> TAP(
T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
if (!TAP)
return;
Parser->setTargetParser(*TAP);
if (Parser->Run(false))
return;
for (auto &KV : Streamer) {
StringRef Key = KV.first();
RecordStreamer::State Value = KV.second;
uint32_t Res = BasicSymbolRef::SF_None;
switch (Value) {
case RecordStreamer::NeverSeen:
llvm_unreachable("NeverSeen should have been replaced earlier");
case RecordStreamer::DefinedGlobal:
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::Defined:
break;
case RecordStreamer::Global:
case RecordStreamer::Used:
Res |= BasicSymbolRef::SF_Undefined;
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::DefinedWeak:
Res |= BasicSymbolRef::SF_Weak;
Res |= BasicSymbolRef::SF_Global;
break;
case RecordStreamer::UndefinedWeak:
Res |= BasicSymbolRef::SF_Weak;
Res |= BasicSymbolRef::SF_Undefined;
}
AsmSymbol(Key, BasicSymbolRef::Flags(Res));
}
}
void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const {
if (S.is<AsmSymbol *>()) {
OS << S.get<AsmSymbol *>()->first;
return;
}
auto *GV = S.get<GlobalValue *>();
if (GV->hasDLLImportStorageClass())
OS << "__imp_";
Mang.getNameWithPrefix(OS, GV, false);
}
uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
if (S.is<AsmSymbol *>())
return S.get<AsmSymbol *>()->second;
auto *GV = S.get<GlobalValue *>();
uint32_t Res = BasicSymbolRef::SF_None;
if (GV->isDeclarationForLinker())
Res |= BasicSymbolRef::SF_Undefined;
else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
Res |= BasicSymbolRef::SF_Hidden;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
if (GVar->isConstant())
Res |= BasicSymbolRef::SF_Const;
}
if (GV->hasPrivateLinkage())
Res |= BasicSymbolRef::SF_FormatSpecific;
if (!GV->hasLocalLinkage())
Res |= BasicSymbolRef::SF_Global;
if (GV->hasCommonLinkage())
Res |= BasicSymbolRef::SF_Common;
if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
GV->hasExternalWeakLinkage())
Res |= BasicSymbolRef::SF_Weak;
if (GV->getName().startswith("llvm."))
Res |= BasicSymbolRef::SF_FormatSpecific;
else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
if (Var->getSection() == "llvm.metadata")
Res |= BasicSymbolRef::SF_FormatSpecific;
}
return Res;
}

View File

@ -555,7 +555,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// Parse inline ASM and collect the list of symbols that are not defined in
// the current module.
StringSet<> AsmUndefinedRefs;
object::IRObjectFile::CollectAsmUndefinedRefs(
ModuleSymbolTable::CollectAsmSymbols(
Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)