From 77a6b72842cd0ebfc3c7101ed446be2cdc60169f Mon Sep 17 00:00:00 2001 From: Gabriel Poesia Date: Thu, 25 Feb 2016 16:57:07 -0800 Subject: [PATCH] BOLT: Read and tie .debug_line info to IR. Summary: Reads information in the DWARF .debug_line section using LLVM and tie every MCInst to one line of a line table from the input binary. Subsequent diffs will update this information to match the final binary layout and output updated line tables. (cherry picked from FBD2989813) --- bolt/BinaryContext.cpp | 7 +++ bolt/BinaryContext.h | 24 ++++++- bolt/BinaryFunction.cpp | 115 ++++++++++++++++++++++++++++++++-- bolt/BinaryFunction.h | 19 +++++- bolt/CMakeLists.txt | 1 + bolt/DebugLineTableRowRef.cpp | 21 +++++++ bolt/DebugLineTableRowRef.h | 63 +++++++++++++++++++ bolt/RewriteInstance.cpp | 38 ++++++----- bolt/RewriteInstance.h | 1 - 9 files changed, 264 insertions(+), 25 deletions(-) create mode 100644 bolt/DebugLineTableRowRef.cpp create mode 100644 bolt/DebugLineTableRowRef.h diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp index 88e784ff7963..46ef7b046109 100644 --- a/bolt/BinaryContext.cpp +++ b/bolt/BinaryContext.cpp @@ -43,5 +43,12 @@ MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, return Symbol; } + +void BinaryContext::buildOffsetToDWARFCompileUnitMap() { + for (const auto &CU : DwCtx->compile_units()) { + OffsetToDwarfCU[CU->getOffset()] = CU.get(); + } +} + } // namespace bolt } // namespace llvm diff --git a/bolt/BinaryContext.h b/bolt/BinaryContext.h index f7d817aa1fe0..444492bf0d49 100644 --- a/bolt/BinaryContext.h +++ b/bolt/BinaryContext.h @@ -15,6 +15,8 @@ #define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H #include "llvm/ADT/Triple.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" @@ -55,8 +57,14 @@ public: // Set of addresses we cannot relocate because we have a direct branch to it. std::set InterproceduralBranchTargets; + // Map from offset in the .debug_info section of the binary the + // DWARF Compilation Unit that starts at that offset. + std::map OffsetToDwarfCU; + std::unique_ptr Ctx; + std::unique_ptr DwCtx; + std::unique_ptr TheTriple; const Target *TheTarget; @@ -86,6 +94,7 @@ public: const DataReader &DR; BinaryContext(std::unique_ptr Ctx, + std::unique_ptr DwCtx, std::unique_ptr TheTriple, const Target *TheTarget, std::string TripleName, @@ -98,8 +107,10 @@ public: std::unique_ptr MIA, std::unique_ptr MRI, std::unique_ptr DisAsm, - const DataReader &DR) : + const DataReader &DR, + bool LoadDebugContext) : Ctx(std::move(Ctx)), + DwCtx(std::move(DwCtx)), TheTriple(std::move(TheTriple)), TheTarget(TheTarget), TripleName(TripleName), @@ -112,7 +123,11 @@ public: MIA(std::move(MIA)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), - DR(DR) {} + DR(DR) { + if (LoadDebugContext) { + buildOffsetToDWARFCompileUnitMap(); + } + } ~BinaryContext() {} @@ -121,6 +136,11 @@ public: /// If there are multiple symbols registered at the \p Address, then /// return the first one. MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix); + +private: + // Iterates over all DWARF compilation units and maps their offset in the + // binary to themselves in OffsetDwarfCUMap + void buildOffsetToDWARFCompileUnitMap(); }; } // namespace bolt diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index b1aa3101b529..aa1febbab114 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -13,7 +13,9 @@ #include "BinaryBasicBlock.h" #include "BinaryFunction.h" #include "DataReader.h" +#include "DebugLineTableRowRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -38,8 +40,35 @@ namespace opts { static cl::opt PrintClusters("print-clusters", cl::desc("print clusters"), cl::Optional); +static cl::opt +PrintDebugInfo("print-debug-info", + cl::desc("print debug info when printing functions"), + cl::Hidden); + } // namespace opts +namespace { + +// Finds which DWARF compile unit owns an address in the executable by +// querying .debug_aranges. +DWARFCompileUnit *FindCompileUnitForAddress(uint64_t Address, + const BinaryContext &BC) { + auto DebugAranges = BC.DwCtx->getDebugAranges(); + if (!DebugAranges) + return nullptr; + + uint32_t CompileUnitIndex = DebugAranges->findAddress(Address); + + auto It = BC.OffsetToDwarfCU.find(CompileUnitIndex); + if (It == BC.OffsetToDwarfCU.end()) { + return nullptr; + } else { + return It->second; + } +} + +} // namespace + uint64_t BinaryFunction::Count = 0; BinaryBasicBlock * @@ -135,6 +164,15 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, } }; + // Used in printInstruction below to print debug line information. + DWARFCompileUnit *Unit = nullptr; + const DWARFDebugLine::LineTable *LineTable = nullptr; + + if (opts::PrintDebugInfo) { + Unit = FindCompileUnitForAddress(getAddress(), BC); + LineTable = Unit ? BC.DwCtx->getLineTableForUnit(Unit) : nullptr; + } + auto printInstruction = [&](const MCInst &Instruction) { if (BC.MIA->isEHLabel(Instruction)) { OS << " EH_LABEL: " @@ -168,6 +206,21 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, OS << "; action: " << Action; } } + if (opts::PrintDebugInfo && LineTable) { + auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); + + if (RowRef != DebugLineTableRowRef::NULL_ROW) { + const auto &Row = LineTable->Rows[RowRef.RowIndex]; + OS << " # debug line " + << LineTable->Prologue.FileNames[Row.File - 1].Name + << ":" << Row.Line; + + if (Row.Column) { + OS << ":" << Row.Column; + } + } + } + OS << "\n"; // In case we need MCInst printer: // Instr.dump_pretty(OS, InstructionPrinter.get()); @@ -294,12 +347,18 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, OS << "End of Function \"" << getName() << "\"\n\n"; } -bool BinaryFunction::disassemble(ArrayRef FunctionData) { +bool BinaryFunction::disassemble(ArrayRef FunctionData, + bool ExtractDebugLineData) { assert(FunctionData.size() == getSize() && "function size does not match raw data size"); auto &Ctx = BC.Ctx; auto &MIA = BC.MIA; + DWARFCompileUnit *CompileUnit = nullptr; + + if (ExtractDebugLineData) { + CompileUnit = FindCompileUnitForAddress(getAddress(), BC); + } // Insert a label at the beginning of the function. This will be our first // basic block. @@ -335,16 +394,18 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { for (uint64_t Offset = 0; IsSimple && (Offset < getSize()); ) { MCInst Instruction; uint64_t Size; + uint64_t AbsoluteInstrAddr = getAddress() + Offset; + if (!BC.DisAsm->getInstruction(Instruction, Size, FunctionData.slice(Offset), - getAddress() + Offset, + AbsoluteInstrAddr, nulls(), nulls())) { // Ignore this function. Skip to the next one. errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x" << Twine::utohexstr(Offset) << " (address 0x" - << Twine::utohexstr(getAddress() + Offset) << ") in function " + << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << getName() << '\n'; IsSimple = false; break; @@ -353,13 +414,12 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { if (MIA->isUnsupported(Instruction)) { errs() << "BOLT-WARNING: unsupported instruction seen at offset 0x" << Twine::utohexstr(Offset) << " (address 0x" - << Twine::utohexstr(getAddress() + Offset) << ") in function " + << Twine::utohexstr(AbsoluteInstrAddr) << ") in function " << getName() << '\n'; IsSimple = false; break; } - uint64_t AbsoluteInstrAddr = getAddress() + Offset; if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) { uint64_t InstructionTarget = 0; if (MIA->evaluateBranch(Instruction, @@ -476,6 +536,12 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { } } + if (CompileUnit) { + Instruction.setLoc( + findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, + CompileUnit)); + } + addInstruction(Offset, std::move(Instruction)); Offset += Size; @@ -491,6 +557,45 @@ bool BinaryFunction::disassemble(ArrayRef FunctionData) { return true; } +SMLoc +BinaryFunction::findDebugLineInformationForInstructionAt( + uint64_t Address, + DWARFCompileUnit *Unit) { + // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, + // which occupies 64 bits. Thus, we can only proceed if the struct fits into + // the pointer itself. + assert( + sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) && + "Cannot fit instruction debug line information into SMLoc's pointer"); + + const DWARFDebugLine::LineTable *LineTable = + BC.DwCtx->getLineTableForUnit(Unit); + + SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); + + if (!LineTable) { + return NullResult; + } + + uint32_t RowIndex = LineTable->lookupAddress(Address); + + if (RowIndex == LineTable->UnknownRowIndex) { + return NullResult; + } + + assert(RowIndex < LineTable->Rows.size() && + "Line Table lookup returned invalid index."); + + decltype(SMLoc().getPointer()) Ptr; + DebugLineTableRowRef *InstructionLocation = + reinterpret_cast(&Ptr); + + InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); + InstructionLocation->RowIndex = RowIndex; + + return SMLoc::getFromPointer(Ptr); +} + bool BinaryFunction::buildCFG() { auto &MIA = BC.MIA; diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index ffb295ed55d4..1096a30ba0a3 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -58,7 +58,7 @@ public: enum LayoutType : char { /// LT_NONE - do not change layout of basic blocks LT_NONE = 0, /// no reordering - /// LT_REVERSE - reverse the order of basic blocks, meant for testing + /// LT_REVERSE - reverse the order of basic blocks, meant for testing /// purposes. The first basic block is left intact and the rest are /// put in the reverse order. LT_REVERSE, @@ -186,6 +186,14 @@ private: return *this; } + /// Gets debug line information for the instruction located at the given + /// address in the original binary. The SMLoc's pointer is used + /// to point to this information, which is represented by a + /// DebugLineTableRowRef. The returned pointer is null if no debug line + /// information for this instruction was found. + SMLoc findDebugLineInformationForInstructionAt(uint64_t Address, + DWARFCompileUnit *Unit); + const BinaryBasicBlock * getOriginalLayoutSuccessor(const BinaryBasicBlock *BB) const; @@ -434,7 +442,7 @@ public: /// function and append it to the end of list of blocks. /// If \p DeriveAlignment is true, set the alignment of the block based /// on the alignment of the existing offset. - /// + /// /// Returns NULL if basic block already exists at the \p Offset. BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label, bool DeriveAlignment = false) { @@ -648,6 +656,10 @@ public: /// /// \p FunctionData is the set bytes representing the function body. /// + /// \p ExtractDebugLineData is a flag indicating whether DWARF .debug_line + /// information should be looked up and tied to each disassembled + /// instruction. + /// /// The Function should be properly initialized before this function /// is called. I.e. function address and size should be set. /// @@ -655,7 +667,8 @@ public: /// state to State:Disassembled. /// /// Returns false if disassembly failed. - bool disassemble(ArrayRef FunctionData); + bool disassemble(ArrayRef FunctionData, + bool ExtractDebugLineData = false); /// Builds a list of basic blocks with successor and predecessor info. /// diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index 2ee858fe5b85..7b25d1e27984 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_tool(llvm-bolt BinaryContext.cpp BinaryFunction.cpp DataReader.cpp + DebugLineTableRowRef.cpp Exceptions.cpp RewriteInstance.cpp ) diff --git a/bolt/DebugLineTableRowRef.cpp b/bolt/DebugLineTableRowRef.cpp new file mode 100644 index 000000000000..83ed5158e6cc --- /dev/null +++ b/bolt/DebugLineTableRowRef.cpp @@ -0,0 +1,21 @@ +//===--- DebugLineTableRowRef.cpp - Identifies a row in a .debug_line table ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "DebugLineTableRowRef.h" + + +namespace llvm { +namespace bolt { + +const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{-1U, -1U}; + +} // namespace bolt +} // namespace llvm diff --git a/bolt/DebugLineTableRowRef.h b/bolt/DebugLineTableRowRef.h new file mode 100644 index 000000000000..66c1be5c43ff --- /dev/null +++ b/bolt/DebugLineTableRowRef.h @@ -0,0 +1,63 @@ +//===--- DebugLineTableRowRef.h - Identifies a row in a .debug_line table -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Class that references a row in a DWARFDebugLine::LineTable by the DWARF +// Context index of the DWARF Compile Unit that owns the Line Table and the row +// index. This is tied to our IR during disassembly so that we can later update +// .debug_line information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_DEBUGLINETABLEROWREF_H +#define LLVM_TOOLS_LLVM_BOLT_DEBUGLINETABLEROWREF_H + +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { +namespace bolt { + +struct DebugLineTableRowRef { + uint32_t DwCompileUnitIndex; + uint32_t RowIndex; + + const static DebugLineTableRowRef NULL_ROW; + + bool operator==(const DebugLineTableRowRef &Rhs) const { + return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && + RowIndex == Rhs.RowIndex; + } + + bool operator!=(const DebugLineTableRowRef &Rhs) const { + return !(*this == Rhs); + } + + static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { + union { + decltype(Loc.getPointer()) Ptr; + DebugLineTableRowRef Ref; + } U; + U.Ptr = Loc.getPointer(); + return U.Ref; + } + + SMLoc toSMLoc() const { + union { + decltype(SMLoc().getPointer()) Ptr; + DebugLineTableRowRef Ref; + } U; + U.Ref = *this; + return SMLoc::getFromPointer(U.Ptr); + } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index 3eda5951b936..546f69ca6855 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -94,6 +94,11 @@ SplitFunctions("split-functions", cl::desc("split functions into hot and cold distinct regions"), cl::Optional); +static cl::opt +UpdateDebugSections("update-debug-sections", + cl::desc("update DWARF debug sections of the executable"), + cl::Optional); + static cl::opt ReorderBlocks( "reorder-blocks", @@ -258,7 +263,9 @@ bool ExecutableFileMemoryManager::finalizeMemory(std::string *ErrMsg) { /// triple \p TripleName. static std::unique_ptr CreateBinaryContext( std::string ArchName, - std::string TripleName, const DataReader &DR) { + std::string TripleName, + const DataReader &DR, + std::unique_ptr DwCtx) { std::string Error; @@ -343,6 +350,7 @@ static std::unique_ptr CreateBinaryContext( auto BC = llvm::make_unique(std::move(Ctx), + std::move(DwCtx), std::move(TheTriple), TheTarget, TripleName, @@ -355,15 +363,18 @@ static std::unique_ptr CreateBinaryContext( std::move(MIA), std::move(MRI), std::move(DisAsm), - DR); + DR, + opts::UpdateDebugSections); return BC; } RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const DataReader &DR) - : File(File), BC(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR)), - DwCtx(new DWARFContextInMemory(*File)) {} + : File(File), + BC(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR, + std::unique_ptr(new DWARFContextInMemory(*File)))) +{ } RewriteInstance::~RewriteInstance() {} @@ -371,8 +382,8 @@ void RewriteInstance::reset() { BinaryFunctions.clear(); FileSymRefs.clear(); auto &DR = BC->DR; - BC = CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR); - DwCtx.reset(new DWARFContextInMemory(*File)); + BC = CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR, + std::unique_ptr(new DWARFContextInMemory(*File))); CFIRdWrt.reset(nullptr); SectionMM.reset(nullptr); Out.reset(nullptr); @@ -615,7 +626,7 @@ void RewriteInstance::readSpecialSections() { FrameHdrCopy = std::vector(FrameHdrContents.begin(), FrameHdrContents.end()); // Process debug sections. - EHFrame = DwCtx->getEHFrame(); + EHFrame = BC->DwCtx->getEHFrame(); if (opts::DumpEHFrame) { EHFrame->dump(outs()); } @@ -684,11 +695,11 @@ void RewriteInstance::disassembleFunctions() { (SectionContents.data()) + FunctionOffset, Function.getSize()); - if (!Function.disassemble(FunctionData)) + if (!Function.disassemble(FunctionData, opts::UpdateDebugSections)) continue; if (opts::PrintAll || opts::PrintDisasm) - Function.print(errs(), "after disassembly"); + Function.print(errs(), "after disassembly", true); if (!Function.isSimple()) continue; @@ -711,7 +722,7 @@ void RewriteInstance::disassembleFunctions() { continue; if (opts::PrintAll || opts::PrintCFG) - Function.print(errs(), "after building cfg"); + Function.print(errs(), "after building cfg", true); TotalScore += Function.getFunctionScore(); @@ -822,13 +833,13 @@ void RewriteInstance::runOptimizationPasses() { } if (opts::PrintAll || opts::PrintUCE) - Function.print(errs(), "after unreachable code elimination"); + Function.print(errs(), "after unreachable code elimination", true); } if (opts::ReorderBlocks != BinaryFunction::LT_NONE) { BFI.second.modifyLayout(opts::ReorderBlocks, opts::SplitFunctions); if (opts::PrintAll || opts::PrintReordered) - Function.print(errs(), "after reordering blocks"); + Function.print(errs(), "after reordering blocks", true); } // Post-processing passes. @@ -844,8 +855,7 @@ void RewriteInstance::runOptimizationPasses() { // Update exception handling information. Function.updateEHRanges(); if (opts::PrintAll || opts::PrintEHRanges) - Function.print(errs(), "after updating EH ranges"); - + Function.print(errs(), "after updating EH ranges", true); } } diff --git a/bolt/RewriteInstance.h b/bolt/RewriteInstance.h index 2ad4efed2cb2..8c63ed7345e2 100644 --- a/bolt/RewriteInstance.h +++ b/bolt/RewriteInstance.h @@ -161,7 +161,6 @@ private: llvm::object::ELFObjectFileBase *File; std::unique_ptr BC; - std::unique_ptr DwCtx; std::unique_ptr CFIRdWrt; /// Our in-memory intermediary object file where we hold final code for /// rewritten functions.