forked from OSchip/llvm-project
351 lines
12 KiB
C++
351 lines
12 KiB
C++
//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BinaryContext.h"
|
|
#include "BinaryFunction.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
|
#include "llvm/MC/MCContext.h"
|
|
#include "llvm/MC/MCSymbol.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
namespace opts {
|
|
|
|
static cl::opt<bool>
|
|
PrintDebugInfo("print-debug-info",
|
|
cl::desc("print debug info when printing functions"),
|
|
cl::Hidden);
|
|
|
|
} // namespace opts
|
|
|
|
BinaryContext::~BinaryContext() { }
|
|
|
|
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
|
|
Twine Prefix) {
|
|
MCSymbol *Symbol{nullptr};
|
|
std::string Name;
|
|
auto NI = GlobalAddresses.find(Address);
|
|
if (NI != GlobalAddresses.end()) {
|
|
// Even though there could be multiple names registered at the address,
|
|
// we only use the first one.
|
|
Name = NI->second;
|
|
} else {
|
|
Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
|
|
assert(GlobalSymbols.find(Name) == GlobalSymbols.end() &&
|
|
"created name is not unique");
|
|
GlobalAddresses.emplace(std::make_pair(Address, Name));
|
|
}
|
|
|
|
Symbol = Ctx->lookupSymbol(Name);
|
|
if (Symbol)
|
|
return Symbol;
|
|
|
|
Symbol = Ctx->getOrCreateSymbol(Name);
|
|
GlobalSymbols[Name] = Address;
|
|
|
|
return Symbol;
|
|
}
|
|
|
|
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
|
|
for (auto &entry : GlobalSymbols) {
|
|
OS << "(" << entry.first << " -> " << entry.second << ")\n";
|
|
}
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|
|
|
|
namespace {
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
/// Returns the binary function that contains a given address in the input
|
|
/// binary, or nullptr if none does.
|
|
BinaryFunction *getBinaryFunctionContainingAddress(
|
|
uint64_t Address,
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
auto It = BinaryFunctions.upper_bound(Address);
|
|
if (It != BinaryFunctions.begin()) {
|
|
--It;
|
|
if (It->first + It->second.getSize() > Address) {
|
|
return &It->second;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Traverses the DIE tree in a recursive depth-first search and finds lexical
|
|
// blocks and instances of inlined subroutines, saving them in
|
|
// AddressRangesObjects.
|
|
void findAddressRangesObjects(
|
|
const DWARFCompileUnit *Unit,
|
|
const DWARFDebugInfoEntryMinimal *DIE,
|
|
std::map<uint64_t, BinaryFunction> &Functions,
|
|
std::vector<llvm::bolt::AddressRangesDWARFObject> &AddressRangesObjects) {
|
|
auto Tag = DIE->getTag();
|
|
if (Tag == dwarf::DW_TAG_lexical_block ||
|
|
Tag == dwarf::DW_TAG_inlined_subroutine ||
|
|
Tag == dwarf::DW_TAG_try_block ||
|
|
Tag == dwarf::DW_TAG_catch_block) {
|
|
auto const &Ranges = DIE->getAddressRanges(Unit);
|
|
if (!Ranges.empty()) {
|
|
// We have to process all ranges, even for functions that we are not
|
|
// updating. The primary reason is that abbrev entries are shared
|
|
// and if we convert one DIE, it may affect the rest. Thus
|
|
// the conservative approach that does not involve expanding
|
|
// .debug_abbrev, is to switch all DIEs to use .debug_ranges, even if
|
|
// they use a single [a,b) range. The secondary reason is that it allows
|
|
// us to get rid of the original portion of .debug_ranges to save
|
|
// space in the binary.
|
|
auto Function = getBinaryFunctionContainingAddress(Ranges.front().first,
|
|
Functions);
|
|
AddressRangesObjects.emplace_back(Unit, DIE);
|
|
auto &Object = AddressRangesObjects.back();
|
|
for (const auto &Range : Ranges) {
|
|
if (Function && Function->isSimple()) {
|
|
Object.addAddressRange(*Function, Range.first, Range.second);
|
|
} else {
|
|
Object.addAbsoluteRange(Range.first, Range.second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively visit each child.
|
|
for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) {
|
|
findAddressRangesObjects(Unit, Child, Functions, AddressRangesObjects);
|
|
}
|
|
}
|
|
|
|
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
|
|
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
|
|
/// are never called and removed from the binary) in Unknown.
|
|
void findSubprograms(DWARFCompileUnit *Unit,
|
|
const DWARFDebugInfoEntryMinimal *DIE,
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions,
|
|
BinaryContext::DIECompileUnitVector &Unknown) {
|
|
if (DIE->isSubprogramDIE()) {
|
|
// TODO: handle DW_AT_ranges.
|
|
uint64_t LowPC, HighPC;
|
|
if (DIE->getLowAndHighPC(Unit, LowPC, HighPC)) {
|
|
auto It = BinaryFunctions.find(LowPC);
|
|
if (It != BinaryFunctions.end()) {
|
|
It->second.addSubprogramDIE(Unit, DIE);
|
|
} else {
|
|
Unknown.emplace_back(DIE, Unit);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (auto ChildDIE = DIE->getFirstChild();
|
|
ChildDIE != nullptr && !ChildDIE->isNULL();
|
|
ChildDIE = ChildDIE->getSibling()) {
|
|
findSubprograms(Unit, ChildDIE, BinaryFunctions, Unknown);
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
void BinaryContext::preprocessDebugInfo(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
// Populate MCContext with DWARF files.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto CUID = CU->getOffset();
|
|
auto LineTable = DwCtx->getLineTableForUnit(CU.get());
|
|
const auto &FileNames = LineTable->Prologue.FileNames;
|
|
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
// means empty dir.
|
|
const char *Dir = FileNames[I].DirIdx ?
|
|
LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1] :
|
|
"";
|
|
Ctx->getDwarfFile(Dir, FileNames[I].Name, I + 1, CUID);
|
|
}
|
|
}
|
|
|
|
// For each CU, iterate over its children DIEs and match subprogram DIEs to
|
|
// BinaryFunctions.
|
|
for (auto &CU : DwCtx->compile_units()) {
|
|
findSubprograms(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
|
|
UnknownFunctions);
|
|
}
|
|
|
|
// Some functions may not have a corresponding subprogram DIE
|
|
// yet they will be included in some CU and will have line number information.
|
|
// Hence we need to associate them with the CU and include in CU ranges.
|
|
for (auto &AddrFunctionPair : BinaryFunctions) {
|
|
auto FunctionAddress = AddrFunctionPair.first;
|
|
auto &Function = AddrFunctionPair.second;
|
|
if (!Function.getSubprogramDIEs().empty())
|
|
continue;
|
|
if (auto DebugAranges = DwCtx->getDebugAranges()) {
|
|
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
|
|
if (CUOffset != -1U) {
|
|
Function.addSubprogramDIE(DwCtx->getCompileUnitForOffset(CUOffset),
|
|
nullptr);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
#ifdef DWARF_LOOKUP_ALL_RANGES
|
|
// Last resort - iterate over all compile units. This should not happen
|
|
// very often. If it does, we need to create a separate lookup table
|
|
// similar to .debug_aranges internally. This slows down processing
|
|
// considerably.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
const auto *CUDie = CU->getUnitDIE();
|
|
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
|
|
if (FunctionAddress >= Range.first &&
|
|
FunctionAddress < Range.second) {
|
|
Function.addSubprogramDIE(CU.get(), nullptr);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void BinaryContext::preprocessFunctionDebugInfo(
|
|
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
|
|
// Iterate over DIE trees finding objects that contain address ranges.
|
|
for (const auto &CU : DwCtx->compile_units()) {
|
|
findAddressRangesObjects(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
|
|
AddressRangesObjects);
|
|
}
|
|
|
|
// Iterate over location lists and save them in LocationLists.
|
|
auto DebugLoc = DwCtx->getDebugLoc();
|
|
for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) {
|
|
if (DebugLocEntry.Entries.empty())
|
|
continue;
|
|
auto StartAddress = DebugLocEntry.Entries.front().Begin;
|
|
auto *Function = getBinaryFunctionContainingAddress(StartAddress,
|
|
BinaryFunctions);
|
|
if (!Function || !Function->isSimple())
|
|
continue;
|
|
LocationLists.emplace_back(DebugLocEntry.Offset);
|
|
auto &LocationList = LocationLists.back();
|
|
for (const auto &Location : DebugLocEntry.Entries) {
|
|
LocationList.addLocation(&Location.Loc, *Function, Location.Begin,
|
|
Location.End);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printCFI(raw_ostream &OS, uint32_t Operation) {
|
|
switch(Operation) {
|
|
case MCCFIInstruction::OpSameValue: OS << "OpSameValue"; break;
|
|
case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break;
|
|
case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break;
|
|
case MCCFIInstruction::OpOffset: OS << "OpOffset"; break;
|
|
case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister"; break;
|
|
case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset"; break;
|
|
case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa"; break;
|
|
case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset"; break;
|
|
case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset"; break;
|
|
case MCCFIInstruction::OpEscape: OS << "OpEscape"; break;
|
|
case MCCFIInstruction::OpRestore: OS << "OpRestore"; break;
|
|
case MCCFIInstruction::OpUndefined: OS << "OpUndefined"; break;
|
|
case MCCFIInstruction::OpRegister: OS << "OpRegister"; break;
|
|
case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break;
|
|
case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break;
|
|
default: OS << "Op#" << Operation; break;
|
|
}
|
|
}
|
|
|
|
void BinaryContext::printInstruction(raw_ostream &OS,
|
|
const MCInst &Instruction,
|
|
uint64_t Offset,
|
|
const BinaryFunction* Function,
|
|
bool printMCInst) const {
|
|
if (MIA->isEHLabel(Instruction)) {
|
|
OS << " EH_LABEL: "
|
|
<< cast<MCSymbolRefExpr>(Instruction.getOperand(0).getExpr())->
|
|
getSymbol()
|
|
<< '\n';
|
|
return;
|
|
}
|
|
OS << format(" %08" PRIx64 ": ", Offset);
|
|
if (Function && MIA->isCFI(Instruction)) {
|
|
uint32_t Offset = Instruction.getOperand(0).getImm();
|
|
OS << "\t!CFI\t$" << Offset << "\t; ";
|
|
printCFI(OS, Function->getCFIFor(Instruction)->getOperation());
|
|
OS << "\n";
|
|
return;
|
|
}
|
|
InstPrinter->printInst(&Instruction, OS, "", *STI);
|
|
if (MIA->isCall(Instruction)) {
|
|
if (MIA->isTailCall(Instruction))
|
|
OS << " # TAILCALL ";
|
|
if (MIA->isInvoke(Instruction)) {
|
|
const MCSymbol *LP;
|
|
uint64_t Action;
|
|
std::tie(LP, Action) = MIA->getEHInfo(Instruction);
|
|
OS << " # handler: ";
|
|
if (LP)
|
|
OS << *LP;
|
|
else
|
|
OS << '0';
|
|
OS << "; action: " << Action;
|
|
auto GnuArgsSize = MIA->getGnuArgsSize(Instruction);
|
|
if (GnuArgsSize >= 0)
|
|
OS << "; GNU_args_size = " << GnuArgsSize;
|
|
}
|
|
}
|
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
|
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second
|
|
: nullptr;
|
|
|
|
if (LineTable) {
|
|
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
|
|
|
|
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
|
|
const auto &Row = LineTable->Rows[RowRef.RowIndex - 1];
|
|
OS << " # debug line "
|
|
<< LineTable->Prologue.FileNames[Row.File - 1].Name
|
|
<< ":" << Row.Line;
|
|
|
|
if (Row.Column) {
|
|
OS << ":" << Row.Column;
|
|
}
|
|
}
|
|
}
|
|
|
|
OS << "\n";
|
|
|
|
if (printMCInst) {
|
|
Instruction.dump_pretty(OS, InstPrinter.get());
|
|
OS << "\n";
|
|
}
|
|
}
|
|
|
|
ErrorOr<SectionRef> BinaryContext::getSectionForAddress(uint64_t Address) const{
|
|
auto SI = AllocatableSections.upper_bound(Address);
|
|
if (SI != AllocatableSections.begin()) {
|
|
--SI;
|
|
if (SI->first + SI->second.getSize() > Address)
|
|
return SI->second;
|
|
}
|
|
return std::make_error_code(std::errc::bad_address);
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|