forked from OSchip/llvm-project
531 lines
18 KiB
C++
531 lines
18 KiB
C++
//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BinaryContext.h"
|
|
#include "BinaryFunction.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
|
#include "llvm/MC/MCContext.h"
|
|
#include "llvm/MC/MCStreamer.h"
|
|
#include "llvm/MC/MCSymbol.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::OptionCategory BoltCategory;
|
|
|
|
static cl::opt<bool>
|
|
PrintDebugInfo("print-debug-info",
|
|
cl::desc("print debug info when printing functions"),
|
|
cl::Hidden,
|
|
cl::cat(BoltCategory));
|
|
|
|
static cl::opt<bool>
|
|
PrintRelocations("print-relocations",
|
|
cl::desc("print relocations when printing functions"),
|
|
cl::Hidden,
|
|
cl::cat(BoltCategory));
|
|
|
|
static cl::opt<bool>
|
|
PrintMemData("print-mem-data",
|
|
cl::desc("print memory data annotations when printing functions"),
|
|
cl::Hidden,
|
|
cl::cat(BoltCategory));
|
|
|
|
} // namespace opts
|
|
|
|
BinaryContext::~BinaryContext() { }
|
|
|
|
MCObjectWriter *BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
|
|
if (!MAB) {
|
|
MAB = std::unique_ptr<MCAsmBackend>(
|
|
TheTarget->createMCAsmBackend(*MRI, TripleName, ""));
|
|
}
|
|
|
|
return MAB->createObjectWriter(OS);
|
|
}
|
|
|
|
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
|
Twine Prefix) {
|
|
MCSymbol *Symbol{nullptr};
|
|
std::string Name;
|
|
auto NI = GlobalAddresses.find(Address);
|
|
if (NI != GlobalAddresses.end()) {
|
|
// Even though there could be multiple names registered at the address,
|
|
// we only use the first one.
|
|
Name = NI->second;
|
|
} else {
|
|
Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
|
|
assert(GlobalSymbols.find(Name) == GlobalSymbols.end() &&
|
|
"created name is not unique");
|
|
GlobalAddresses.emplace(std::make_pair(Address, Name));
|
|
}
|
|
|
|
Symbol = Ctx->lookupSymbol(Name);
|
|
if (Symbol)
|
|
return Symbol;
|
|
|
|
Symbol = Ctx->getOrCreateSymbol(Name);
|
|
GlobalSymbols[Name] = Address;
|
|
|
|
return Symbol;
|
|
}
|
|
|
|
MCSymbol *BinaryContext::getGlobalSymbolAtAddress(uint64_t Address) const {
|
|
auto NI = GlobalAddresses.find(Address);
|
|
if (NI == GlobalAddresses.end())
|
|
return nullptr;
|
|
|
|
auto *Symbol = Ctx->lookupSymbol(NI->second);
|
|
assert(Symbol && "symbol cannot be NULL at this point");
|
|
|
|
return Symbol;
|
|
}
|
|
|
|
MCSymbol *BinaryContext::getGlobalSymbolByName(const std::string &Name) const {
|
|
auto Itr = GlobalSymbols.find(Name);
|
|
return Itr == GlobalSymbols.end()
|
|
? nullptr : getGlobalSymbolAtAddress(Itr->second);
|
|
}
|
|
|
|
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
|
|
BinaryFunction &ParentBF,
|
|
std::map<uint64_t, BinaryFunction> &BFs) {
|
|
// Copy name list.
|
|
ParentBF.addNewNames(ChildBF.getNames());
|
|
|
|
// Update internal bookkeeping info.
|
|
for (auto &Name : ChildBF.getNames()) {
|
|
// Calls to functions are handled via symbols, and we keep the lookup table
|
|
// that we need to update.
|
|
auto *Symbol = Ctx->lookupSymbol(Name);
|
|
assert(Symbol && "symbol cannot be NULL at this point");
|
|
SymbolToFunctionMap[Symbol] = &ParentBF;
|
|
|
|
// NB: there's no need to update GlobalAddresses and GlobalSymbols.
|
|
}
|
|
|
|
// Merge execution counts of ChildBF into those of ParentBF.
|
|
ChildBF.mergeProfileDataInto(ParentBF);
|
|
|
|
if (HasRelocations) {
|
|
// Remove ChildBF from the global set of functions in relocs mode.
|
|
auto FI = BFs.find(ChildBF.getAddress());
|
|
assert(FI != BFs.end() && "function not found");
|
|
assert(&ChildBF == &FI->second && "function mismatch");
|
|
FI = BFs.erase(FI);
|
|
} else {
|
|
// In non-relocation mode we keep the function, but rename it.
|
|
std::string NewName = "__ICF_" + ChildBF.Names.back();
|
|
ChildBF.Names.clear();
|
|
ChildBF.Names.push_back(NewName);
|
|
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
|
|
ChildBF.setFolded();
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
|
|
for (auto &Entry : GlobalSymbols) {
|
|
OS << "(" << Entry.first << " -> 0x"
|
|
<< Twine::utohexstr(Entry.second) << ")\n";
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
|
|
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
|
|
/// are never called and removed from the binary) in Unknown.
|
|
void findSubprograms(DWARFCompileUnit *Unit,
|
|
const DWARFDebugInfoEntryMinimal *DIE,
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
if (DIE->isSubprogramDIE()) {
|
|
// TODO: handle DW_AT_ranges.
|
|
uint64_t LowPC, HighPC;
|
|
if (DIE->getLowAndHighPC(Unit, LowPC, HighPC)) {
|
|
auto It = BinaryFunctions.find(LowPC);
|
|
if (It != BinaryFunctions.end()) {
|
|
It->second.addSubprogramDIE(Unit, DIE);
|
|
} else {
|
|
// The function must have been optimized away by GC.
|
|
}
|
|
} else {
|
|
const auto RangesVector = DIE->getAddressRanges(Unit);
|
|
if (!RangesVector.empty()) {
|
|
errs() << "BOLT-ERROR: split function detected in .debug_info. "
|
|
"Split functions are not supported.\n";
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (auto ChildDIE = DIE->getFirstChild();
|
|
ChildDIE != nullptr && !ChildDIE->isNULL();
|
|
ChildDIE = ChildDIE->getSibling()) {
|
|
findSubprograms(Unit, ChildDIE, BinaryFunctions);
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
|
|
const uint32_t SrcCUID,
|
|
unsigned FileIndex) {
|
|
auto SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
|
|
auto LineTable = DwCtx->getLineTableForUnit(SrcUnit);
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
// means empty dir.
|
|
assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
|
|
"FileIndex out of range for the compilation unit.");
|
|
const char *Dir = FileNames[FileIndex - 1].DirIdx ?
|
|
LineTable->Prologue.IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1] :
|
|
"";
|
|
return Ctx->getDwarfFile(Dir, FileNames[FileIndex - 1].Name, 0, DestCUID);
|
|
}
|
|
|
|
std::vector<BinaryFunction *> BinaryContext::getSortedFunctions(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
|
|
std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
|
|
SortedFunctions.begin(),
|
|
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
|
|
return &BFI.second;
|
|
});
|
|
|
|
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
|
|
[](const BinaryFunction *A, const BinaryFunction *B) {
|
|
if (A->hasValidIndex() && B->hasValidIndex()) {
|
|
return A->getIndex() < B->getIndex();
|
|
} else {
|
|
return A->hasValidIndex();
|
|
}
|
|
});
|
|
return SortedFunctions;
|
|
}
|
|
|
|
void BinaryContext::preprocessDebugInfo(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
// Populate MCContext with DWARF files.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto CUID = CU->getOffset();
|
|
auto LineTable = DwCtx->getLineTableForUnit(CU.get());
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
// means empty dir.
|
|
const char *Dir = FileNames[I].DirIdx ?
|
|
LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1] :
|
|
"";
|
|
Ctx->getDwarfFile(Dir, FileNames[I].Name, 0, CUID);
|
|
}
|
|
}
|
|
|
|
// For each CU, iterate over its children DIEs and match subprogram DIEs to
|
|
// BinaryFunctions.
|
|
for (auto &CU : DwCtx->compile_units()) {
|
|
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions);
|
|
}
|
|
|
|
// Some functions may not have a corresponding subprogram DIE
|
|
// yet they will be included in some CU and will have line number information.
|
|
// Hence we need to associate them with the CU and include in CU ranges.
|
|
for (auto &AddrFunctionPair : BinaryFunctions) {
|
|
auto FunctionAddress = AddrFunctionPair.first;
|
|
auto &Function = AddrFunctionPair.second;
|
|
if (!Function.getSubprogramDIEs().empty())
|
|
continue;
|
|
if (auto DebugAranges = DwCtx->getDebugAranges()) {
|
|
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
|
|
if (CUOffset != -1U) {
|
|
Function.addSubprogramDIE(DwCtx->getCompileUnitForOffset(CUOffset),
|
|
nullptr);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
#ifdef DWARF_LOOKUP_ALL_RANGES
|
|
// Last resort - iterate over all compile units. This should not happen
|
|
// very often. If it does, we need to create a separate lookup table
|
|
// similar to .debug_aranges internally. This slows down processing
|
|
// considerably.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto *CUDie = CU->getUnitDIE();
|
|
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
|
|
if (FunctionAddress >= Range.first &&
|
|
FunctionAddress < Range.second) {
|
|
Function.addSubprogramDIE(CU.get(), nullptr);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
|
uint32_t Operation = Inst.getOperation();
|
|
switch (Operation) {
|
|
case MCCFIInstruction::OpSameValue:
|
|
OS << "OpSameValue Reg" << Inst.getRegister();
|
|
break;
|
|
case MCCFIInstruction::OpRememberState:
|
|
OS << "OpRememberState";
|
|
break;
|
|
case MCCFIInstruction::OpRestoreState:
|
|
OS << "OpRestoreState";
|
|
break;
|
|
case MCCFIInstruction::OpOffset:
|
|
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
break;
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
|
|
break;
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
OS << "OpDefCfaOffset " << Inst.getOffset();
|
|
break;
|
|
case MCCFIInstruction::OpDefCfa:
|
|
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
break;
|
|
case MCCFIInstruction::OpRelOffset:
|
|
OS << "OpRelOffset";
|
|
break;
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
OS << "OfAdjustCfaOffset";
|
|
break;
|
|
case MCCFIInstruction::OpEscape:
|
|
OS << "OpEscape";
|
|
break;
|
|
case MCCFIInstruction::OpRestore:
|
|
OS << "OpRestore";
|
|
break;
|
|
case MCCFIInstruction::OpUndefined:
|
|
OS << "OpUndefined";
|
|
break;
|
|
case MCCFIInstruction::OpRegister:
|
|
OS << "OpRegister";
|
|
break;
|
|
case MCCFIInstruction::OpWindowSave:
|
|
OS << "OpWindowSave";
|
|
break;
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
OS << "OpGnuArgsSize";
|
|
break;
|
|
default:
|
|
OS << "Op#" << Operation;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printInstruction(raw_ostream &OS,
|
|
const MCInst &Instruction,
|
|
uint64_t Offset,
|
|
const BinaryFunction* Function,
|
|
bool PrintMCInst,
|
|
bool PrintMemData,
|
|
bool PrintRelocations) const {
|
|
if (MIA->isEHLabel(Instruction)) {
|
|
OS << " EH_LABEL: " << *MIA->getTargetSymbol(Instruction) << '\n';
|
|
return;
|
|
}
|
|
OS << format(" %08" PRIx64 ": ", Offset);
|
|
if (MIA->isCFI(Instruction)) {
|
|
uint32_t Offset = Instruction.getOperand(0).getImm();
|
|
OS << "\t!CFI\t$" << Offset << "\t; ";
|
|
if (Function)
|
|
printCFI(OS, *Function->getCFIFor(Instruction));
|
|
OS << "\n";
|
|
return;
|
|
}
|
|
InstPrinter->printInst(&Instruction, OS, "", *STI);
|
|
if (MIA->isCall(Instruction)) {
|
|
if (MIA->isTailCall(Instruction))
|
|
OS << " # TAILCALL ";
|
|
if (MIA->isInvoke(Instruction)) {
|
|
const MCSymbol *LP;
|
|
uint64_t Action;
|
|
std::tie(LP, Action) = MIA->getEHInfo(Instruction);
|
|
OS << " # handler: ";
|
|
if (LP)
|
|
OS << *LP;
|
|
else
|
|
OS << '0';
|
|
OS << "; action: " << Action;
|
|
auto GnuArgsSize = MIA->getGnuArgsSize(Instruction);
|
|
if (GnuArgsSize >= 0)
|
|
OS << "; GNU_args_size = " << GnuArgsSize;
|
|
}
|
|
}
|
|
if (MIA->isIndirectBranch(Instruction)) {
|
|
if (auto JTAddress = MIA->getJumpTable(Instruction)) {
|
|
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
|
|
}
|
|
}
|
|
|
|
MIA->forEachAnnotation(
|
|
Instruction,
|
|
[&OS](const MCAnnotation *Annotation) {
|
|
OS << " # " << Annotation->getName() << ": ";
|
|
Annotation->print(OS);
|
|
}
|
|
);
|
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
|
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second
|
|
: nullptr;
|
|
|
|
if (LineTable) {
|
|
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
|
|
|
|
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
|
|
const auto &Row = LineTable->Rows[RowRef.RowIndex - 1];
|
|
OS << " # debug line "
|
|
<< LineTable->Prologue.FileNames[Row.File - 1].Name
|
|
<< ":" << Row.Line;
|
|
|
|
if (Row.Column) {
|
|
OS << ":" << Row.Column;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((opts::PrintMemData || PrintMemData) && Function) {
|
|
const auto *MD = Function->getMemData();
|
|
const auto MemDataOffset =
|
|
MIA->tryGetAnnotationAs<uint64_t>(Instruction, "MemDataOffset");
|
|
if (MD && MemDataOffset) {
|
|
bool DidPrint = false;
|
|
for (auto &MI : MD->getMemInfoRange(MemDataOffset.get())) {
|
|
OS << (DidPrint ? ", " : " # Loads: ");
|
|
OS << MI.Addr << "/" << MI.Count;
|
|
DidPrint = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((opts::PrintRelocations || PrintRelocations) && Function) {
|
|
const auto Size = computeCodeSize(&Instruction, &Instruction + 1);
|
|
Function->printRelocations(OS, Offset, Size);
|
|
}
|
|
|
|
OS << "\n";
|
|
|
|
if (PrintMCInst) {
|
|
Instruction.dump_pretty(OS, InstPrinter.get());
|
|
OS << "\n";
|
|
}
|
|
}
|
|
|
|
ErrorOr<ArrayRef<uint8_t>>
|
|
BinaryContext::getFunctionData(const BinaryFunction &Function) const {
|
|
auto &Section = Function.getSection();
|
|
assert(Section.containsRange(Function.getAddress(), Function.getSize()) &&
|
|
"wrong section for function");
|
|
|
|
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
|
|
return std::make_error_code(std::errc::bad_address);
|
|
}
|
|
|
|
StringRef SectionContents = Section.getContents();
|
|
|
|
assert(SectionContents.size() == Section.getSize() &&
|
|
"section size mismatch");
|
|
|
|
// Function offset from the section start.
|
|
auto FunctionOffset = Function.getAddress() - Section.getAddress();
|
|
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
|
|
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getSize());
|
|
}
|
|
|
|
ErrorOr<BinarySection&> BinaryContext::getSectionForAddress(uint64_t Address) {
|
|
auto SI = AddressToSection.upper_bound(Address);
|
|
if (SI != AddressToSection.begin()) {
|
|
--SI;
|
|
if (SI->first + SI->second->getSize() > Address)
|
|
return *SI->second;
|
|
}
|
|
return std::make_error_code(std::errc::bad_address);
|
|
}
|
|
|
|
ErrorOr<const BinarySection &>
|
|
BinaryContext::getSectionForAddress(uint64_t Address) const {
|
|
auto SI = AddressToSection.upper_bound(Address);
|
|
if (SI != AddressToSection.begin()) {
|
|
--SI;
|
|
if (SI->first + SI->second->getSize() > Address)
|
|
return *SI->second;
|
|
}
|
|
return std::make_error_code(std::errc::bad_address);
|
|
}
|
|
|
|
BinarySection &BinaryContext::registerSection(SectionRef Section) {
|
|
StringRef Name;
|
|
Section.getName(Name);
|
|
auto Res = Sections.insert(BinarySection(Section));
|
|
assert(Res.second && "can't register the same section twice.");
|
|
// Cast away const here because std::set always stores values by
|
|
// const. It's ok to do this because we can never change the
|
|
// BinarySection properties that affect set ordering.
|
|
auto *BS = const_cast<BinarySection *>(&*Res.first);
|
|
// Only register sections with addresses in the AddressToSection map.
|
|
if (Section.getAddress())
|
|
AddressToSection.insert(std::make_pair(Section.getAddress(), BS));
|
|
NameToSection.insert(std::make_pair(Name, BS));
|
|
return *BS;
|
|
}
|
|
|
|
ErrorOr<uint64_t>
|
|
BinaryContext::extractPointerAtAddress(uint64_t Address) const {
|
|
auto Section = getSectionForAddress(Address);
|
|
if (!Section)
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
StringRef SectionContents = Section->getContents();
|
|
DataExtractor DE(SectionContents,
|
|
AsmInfo->isLittleEndian(),
|
|
AsmInfo->getPointerSize());
|
|
uint32_t SectionOffset = Address - Section->getAddress();
|
|
return DE.getAddress(&SectionOffset);
|
|
}
|
|
|
|
void BinaryContext::addSectionRelocation(BinarySection &Section,
|
|
uint64_t Offset,
|
|
MCSymbol *Symbol,
|
|
uint64_t Type,
|
|
uint64_t Addend) {
|
|
Section.addRelocation(Offset, Symbol, Type, Addend);
|
|
}
|
|
|
|
void BinaryContext::addRelocation(uint64_t Address,
|
|
MCSymbol *Symbol,
|
|
uint64_t Type,
|
|
uint64_t Addend) {
|
|
auto Section = getSectionForAddress(Address);
|
|
assert(Section && "cannot find section for address");
|
|
Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend);
|
|
}
|
|
|
|
void BinaryContext::removeRelocationAt(uint64_t Address) {
|
|
auto Section = getSectionForAddress(Address);
|
|
assert(Section && "cannot find section for address");
|
|
Section->removeRelocationAt(Address - Section->getAddress());
|
|
}
|
|
|
|
const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
|
|
auto Section = getSectionForAddress(Address);
|
|
assert(Section && "cannot find section for address");
|
|
return Section->getRelocationAt(Address - Section->getAddress());
|
|
}
|