From 4b4db401748f74bed71cc5b32eb67ed30afa5812 Mon Sep 17 00:00:00 2001 From: Gabriel Poesia Date: Fri, 1 Apr 2016 11:37:28 -0700 Subject: [PATCH] Update DWARF location lists after optimization. Summary: Summary: Update DWARF location lists in .debug_loc and pointers to them in .debug_info so that gdb can print variables which change location during their lifetime. The following changes were made: - Refactored BasicBlockOffsetRanges to allow ranges to be tied to binary information (so that we can reuse it for location lists) - Implemented range compression optimization in BasicBlockOffsetRanges (needed otherwise too much data was being generated). - Added representation for location lists (LocationList.h, BinaryContext.h) - Implemented .debug_loc serializer that keeps the updated offsets (DebugLocWriter.{h,cpp}) - After disassembly, traverse entries in .debug_loc and save them in context (BinaryContext.cpp) - After optimizations, serialize .debug_loc and update pointers in .debug_info (RewriteInstance.cpp) (cherry picked from FBD3130682) --- bolt/BasicBlockOffsetRanges.cpp | 38 +++++++++++--- bolt/BasicBlockOffsetRanges.h | 36 ++++++++----- bolt/BinaryContext.cpp | 15 ++++++ bolt/BinaryContext.h | 5 ++ bolt/CMakeLists.txt | 1 + bolt/DebugLocWriter.cpp | 45 +++++++++++++++++ bolt/DebugLocWriter.h | 53 ++++++++++++++++++++ bolt/LexicalBlock.h | 11 +++- bolt/LocationList.h | 61 ++++++++++++++++++++++ bolt/RewriteInstance.cpp | 89 +++++++++++++++++++++++++++++++-- bolt/RewriteInstance.h | 13 +++++ 11 files changed, 344 insertions(+), 23 deletions(-) create mode 100644 bolt/DebugLocWriter.cpp create mode 100644 bolt/DebugLocWriter.h create mode 100644 bolt/LocationList.h diff --git a/bolt/BasicBlockOffsetRanges.cpp b/bolt/BasicBlockOffsetRanges.cpp index 9cb507facd9e..445da2bc4ef1 100644 --- a/bolt/BasicBlockOffsetRanges.cpp +++ b/bolt/BasicBlockOffsetRanges.cpp @@ -12,13 +12,15 @@ #include "BasicBlockOffsetRanges.h" #include "BinaryBasicBlock.h" #include "BinaryFunction.h" +#include namespace llvm { namespace bolt { void BasicBlockOffsetRanges::addAddressRange(BinaryFunction &Function, uint64_t BeginAddress, - uint64_t EndAddress) { + uint64_t EndAddress, + const BinaryData *Data) { auto FirstBB = Function.getBasicBlockContainingOffset( BeginAddress - Function.getAddress()); assert(FirstBB && "No basic blocks in the function intersect given range."); @@ -40,13 +42,14 @@ void BasicBlockOffsetRanges::addAddressRange(BinaryFunction &Function, BBAddressRange{ BB, static_cast(InternalAddressRangeBegin - BBAddress), - static_cast(InternalAddressRangeEnd - BBAddress)}); + static_cast(InternalAddressRangeEnd - BBAddress), + Data}); } } -std::vector> +std::vector BasicBlockOffsetRanges::getAbsoluteAddressRanges() const { - std::vector> AbsoluteRanges; + std::vector AbsoluteRanges; for (const auto &BBAddressRange : AddressRanges) { auto BBOutputAddressRange = BBAddressRange.BasicBlock->getOutputAddressRange(); @@ -61,9 +64,32 @@ BasicBlockOffsetRanges::getAbsoluteAddressRanges() const { BBFunction->getBasicBlockOriginalSize(BBAddressRange.BasicBlock)) ? BBOutputAddressRange.second : (BBOutputAddressRange.first + BBAddressRange.RangeEndOffset); - AbsoluteRanges.emplace_back(NewRangeBegin, NewRangeEnd); + AbsoluteRanges.emplace_back(AbsoluteRange{NewRangeBegin, NewRangeEnd, + BBAddressRange.Data}); } - return AbsoluteRanges; + if (AbsoluteRanges.empty()) { + return AbsoluteRanges; + } + // Merge adjacent ranges that have the same data. + std::sort(AbsoluteRanges.begin(), AbsoluteRanges.end(), + [](const AbsoluteRange &A, const AbsoluteRange &B) { + return A.Begin < B.Begin; + }); + decltype(AbsoluteRanges) MergedRanges; + + MergedRanges.emplace_back(AbsoluteRanges[0]); + for (unsigned I = 1, S = AbsoluteRanges.size(); I != S; ++I) { + // If this range complements the last one and they point to the same + // (possibly null) data, merge them instead of creating another one. + if (AbsoluteRanges[I].Begin == MergedRanges.back().End && + AbsoluteRanges[I].Data == MergedRanges.back().Data) { + MergedRanges.back().End = AbsoluteRanges[I].End; + } else { + MergedRanges.emplace_back(AbsoluteRanges[I]); + } + } + + return MergedRanges; } } // namespace bolt diff --git a/bolt/BasicBlockOffsetRanges.h b/bolt/BasicBlockOffsetRanges.h index f9221ff617e8..51dac4dc9e50 100644 --- a/bolt/BasicBlockOffsetRanges.h +++ b/bolt/BasicBlockOffsetRanges.h @@ -16,7 +16,9 @@ #ifndef LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H #define LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H +#include "llvm/ADT/SmallVector.h" #include +#include #include #include @@ -27,6 +29,26 @@ class BinaryFunction; class BinaryBasicBlock; class BasicBlockOffsetRanges { +public: + typedef SmallVectorImpl BinaryData; + struct AbsoluteRange { + uint64_t Begin; + uint64_t End; + const BinaryData *Data; + }; + + /// Add range [BeginAddress, EndAddress) to the address ranges list. + /// \p Function is the function that contains the given address range. + void addAddressRange(BinaryFunction &Function, + uint64_t BeginAddress, + uint64_t EndAddress, + const BinaryData *Data = nullptr); + + /// Returns the list of absolute addresses calculated using the output address + /// of the basic blocks, i.e. the input ranges updated after basic block + /// addresses might have changed, together with the data associated to them. + std::vector getAbsoluteAddressRanges() const; + private: /// An address range inside one basic block. struct BBAddressRange { @@ -35,21 +57,11 @@ private: uint16_t RangeBeginOffset; /// (Exclusive) end of the range counting from BB's start address. uint16_t RangeEndOffset; + /// Binary data associated with this range. + const BinaryData *Data; }; std::vector AddressRanges; - -public: - /// Add range [BeginAddress, EndAddress) to the address ranges list. - /// \p Function is the function that contains the given address range. - void addAddressRange(BinaryFunction &Function, - uint64_t BeginAddress, - uint64_t EndAddress); - - /// Returns the list of absolute addresses calculated using the output address - /// of the basic blocks, i.e. the input ranges updated after basic block - /// addresses might have changed. - std::vector> getAbsoluteAddressRanges() const; }; } // namespace bolt diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp index 00e4ffce1ea4..1cbca4e193b1 100644 --- a/bolt/BinaryContext.cpp +++ b/bolt/BinaryContext.cpp @@ -157,6 +157,21 @@ void BinaryContext::preprocessFunctionDebugInfo( findLexicalBlocks(CU.get(), CU->getUnitDIE(false), BinaryFunctions, LexicalBlocks); } + + // Iterate over location lists and save them in LocationLists. + auto DebugLoc = DwCtx->getDebugLoc(); + for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) { + LocationLists.emplace_back(DebugLocEntry.Offset); + auto &LocationList = LocationLists.back(); + for (const auto &Location : DebugLocEntry.Entries) { + auto *Function = getBinaryFunctionContainingAddress(Location.Begin, + BinaryFunctions); + if (Function && Function->isSimple()) { + LocationList.addLocation(&Location.Loc, *Function, Location.Begin, + Location.End); + } + } + } } } // namespace bolt diff --git a/bolt/BinaryContext.h b/bolt/BinaryContext.h index 8c838b3c2235..d93ecff598d1 100644 --- a/bolt/BinaryContext.h +++ b/bolt/BinaryContext.h @@ -15,6 +15,7 @@ #define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H #include "LexicalBlock.h" +#include "LocationList.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -36,6 +37,7 @@ #include #include #include +#include namespace llvm { namespace bolt { @@ -73,6 +75,9 @@ public: /// List of DWARF lexical blocks in .debug_info. std::vector LexicalBlocks; + /// List of DWARF location lists in .debug_loc. + std::vector LocationLists; + std::unique_ptr Ctx; std::unique_ptr DwCtx; diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index c66cb442fbfc..7f7343a41c06 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_tool(llvm-bolt BinaryPatcher.cpp DataReader.cpp DebugLineTableRowRef.cpp + DebugLocWriter.cpp DebugRangesSectionsWriter.cpp Exceptions.cpp RewriteInstance.cpp diff --git a/bolt/DebugLocWriter.cpp b/bolt/DebugLocWriter.cpp new file mode 100644 index 000000000000..e2c0e84dbbf0 --- /dev/null +++ b/bolt/DebugLocWriter.cpp @@ -0,0 +1,45 @@ +//===-- DebugLocWriter.cpp - Writes the DWARF .debug_loc section. ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "DebugLocWriter.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCObjectWriter.h" +#include + +namespace llvm { +namespace bolt { + +void DebugLocWriter::write(const LocationList &LocList, + MCObjectWriter *Writer) { + // Reference: DWARF 4 specification section 7.7.3. + UpdatedOffsets[LocList.getOriginalOffset()] = SectionOffset; + auto AbsoluteRanges = LocList.getAbsoluteAddressRanges(); + + for (const auto &Entry : LocList.getAbsoluteAddressRanges()) { + Writer->writeLE64(Entry.Begin); + Writer->writeLE64(Entry.End); + assert(Entry.Data && "Entry with null location expression."); + Writer->writeLE16(Entry.Data->size()); + + // Need to convert binary data from unsigned char to char. + Writer->writeBytes( + StringRef(reinterpret_cast(Entry.Data->data()), + Entry.Data->size())); + + SectionOffset += 2 * 8 + 2 + Entry.Data->size(); + } + Writer->writeLE64(0); + Writer->writeLE64(0); + SectionOffset += 2 * 8; +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/DebugLocWriter.h b/bolt/DebugLocWriter.h new file mode 100644 index 000000000000..c0c60fd8c9ee --- /dev/null +++ b/bolt/DebugLocWriter.h @@ -0,0 +1,53 @@ +//===-- DebugLocWriter.h - Writes the DWARF .debug_loc section -------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Class that serializes the .debug_loc section given LocationLists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_DEBUG_LOC_WRITER_H +#define LLVM_TOOLS_LLVM_BOLT_DEBUG_LOC_WRITER_H + +#include "LocationList.h" +#include +#include + +namespace llvm { + +class MCObjectWriter; + +namespace bolt { + +class DebugLocWriter { +public: + /// Writes the given location list to the writer. + void write(const LocationList &LocList, MCObjectWriter *Writer); + + using UpdatedOffsetMapType = std::map; + + /// Returns mapping from offsets in the input .debug_loc to offsets in the + /// output .debug_loc section with the corresponding updated location list + /// entry. + const UpdatedOffsetMapType &getUpdatedLocationListOffsets() const { + return UpdatedOffsets; + } + +private: + /// Current offset in the section (updated as new entries are written). + uint32_t SectionOffset{0}; + + /// Map from input offsets to output offsets for location lists that were + /// updated, generated after write(). + UpdatedOffsetMapType UpdatedOffsets; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/LexicalBlock.h b/bolt/LexicalBlock.h index a7740f13563e..fd085e62b277 100644 --- a/bolt/LexicalBlock.h +++ b/bolt/LexicalBlock.h @@ -35,7 +35,7 @@ public: const DWARFDebugInfoEntryMinimal *DIE) : CU(CU), DIE(DIE) { } - // Add range [BeginAddress, EndAddress) to lexical block. + /// Add range [BeginAddress, EndAddress) to lexical block. void addAddressRange(BinaryFunction &Function, uint64_t BeginAddress, uint64_t EndAddress) { @@ -43,7 +43,14 @@ public: } std::vector> getAbsoluteAddressRanges() const { - return BBOffsetRanges.getAbsoluteAddressRanges(); + auto AddressRangesWithData = BBOffsetRanges.getAbsoluteAddressRanges(); + std::vector> AddressRanges( + AddressRangesWithData.size()); + for (unsigned I = 0, S = AddressRanges.size(); I != S; ++I) { + AddressRanges[I] = std::make_pair(AddressRangesWithData[I].Begin, + AddressRangesWithData[I].End); + } + return AddressRanges; } void setAddressRangesOffset(uint32_t Offset) { AddressRangesOffset = Offset; } diff --git a/bolt/LocationList.h b/bolt/LocationList.h new file mode 100644 index 000000000000..7cf4fab14507 --- /dev/null +++ b/bolt/LocationList.h @@ -0,0 +1,61 @@ +//===--- LocationList.h - DWARF location lists ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Represents DWARF location lists, maintaining their list of location +// expressions and the address ranges in which they are valid to be updated in +// the output debugging information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_LOCATION_LIST_H +#define LLVM_TOOLS_LLVM_BOLT_LOCATION_LIST_H + +#include "BasicBlockOffsetRanges.h" + +namespace llvm { + +class DWARFCompileUnit; +class DWARFDebugInfoEntryMinimal; + +namespace bolt { + +class BinaryBasicBlock; + +class LocationList { +public: + LocationList(uint32_t Offset) : DebugLocOffset(Offset) { } + + /// Add a location expression that is valid in [BeginAddress, EndAddress) + /// within Function to location list. + void addLocation(const BasicBlockOffsetRanges::BinaryData *Expression, + BinaryFunction &Function, + uint64_t BeginAddress, + uint64_t EndAddress) { + BBOffsetRanges.addAddressRange(Function, BeginAddress, EndAddress, + Expression); + } + + std::vector + getAbsoluteAddressRanges() const { + return BBOffsetRanges.getAbsoluteAddressRanges(); + } + + uint32_t getOriginalOffset() const { return DebugLocOffset; } + +private: + BasicBlockOffsetRanges BBOffsetRanges; + + // Offset of this location list in the input .debug_loc section. + uint32_t DebugLocOffset; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index d42518402e98..77592c515443 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -732,6 +732,8 @@ void RewriteInstance::readSpecialSections() { DebugLineSize = Section.getSize(); } else if (SectionName == ".debug_ranges") { DebugRangesSize = Section.getSize(); + } else if (SectionName == ".debug_loc") { + DebugLocSize = Section.getSize(); } } @@ -1502,6 +1504,85 @@ void RewriteInstance::generateDebugRanges() { } } +void RewriteInstance::updateLocationLists() { + // Write new contents to .debug_loc. + SmallVector DebugLocBuffer; + raw_svector_ostream OS(DebugLocBuffer); + + auto MAB = BC->TheTarget->createMCAsmBackend(*BC->MRI, BC->TripleName, ""); + auto Writer = MAB->createObjectWriter(OS); + + DebugLocWriter LocationListsWriter; + + for (const auto &Loc : BC->LocationLists) { + LocationListsWriter.write(Loc, Writer); + } + + const auto &DebugLocContents = OS.str(); + + // Free'd by SectionMM. + uint8_t *SectionData = new uint8_t[DebugLocContents.size()]; + memcpy(SectionData, DebugLocContents.data(), DebugLocContents.size()); + + SectionMM->NoteSectionInfo[".debug_loc"] = SectionInfo( + reinterpret_cast(SectionData), + DebugLocContents.size(), + /*Alignment=*/0, + /*IsCode=*/false, + /*IsReadOnly=*/true); + + // For each CU, update pointers into .debug_loc. + for (const auto &CU : BC->DwCtx->compile_units()) { + updateLocationListPointers( + CU.get(), + CU->getUnitDIE(false), + LocationListsWriter.getUpdatedLocationListOffsets()); + } +} + +void RewriteInstance::updateLocationListPointers( + const DWARFUnit *Unit, + const DWARFDebugInfoEntryMinimal *DIE, + const std::map &UpdatedOffsets) { + // Stop if we're in a non-simple function, which will not be rewritten. + auto Tag = DIE->getTag(); + if (Tag == dwarf::DW_TAG_subprogram) { + uint64_t LowPC = -1ULL, HighPC = -1ULL; + DIE->getLowAndHighPC(Unit, LowPC, HighPC); + if (LowPC != -1ULL) { + auto It = BinaryFunctions.find(LowPC); + if (It != BinaryFunctions.end() && !It->second.isSimple()) + return; + } + } + // If the DIE has a DW_AT_location attribute with a section offset, update it. + DWARFFormValue Value; + uint32_t AttrOffset; + if (DIE->getAttributeValue(Unit, dwarf::DW_AT_location, Value, &AttrOffset) && + (Value.isFormClass(DWARFFormValue::FC_Constant) || + Value.isFormClass(DWARFFormValue::FC_SectionOffset))) { + uint64_t DebugLocOffset = -1ULL; + if (Value.isFormClass(DWARFFormValue::FC_SectionOffset)) { + DebugLocOffset = Value.getAsSectionOffset().getValue(); + } else if (Value.isFormClass(DWARFFormValue::FC_Constant)) { // DWARF 3 + DebugLocOffset = Value.getAsUnsignedConstant().getValue(); + } + + auto It = UpdatedOffsets.find(DebugLocOffset); + if (It != UpdatedOffsets.end()) { + auto DebugInfoPatcher = + static_cast( + SectionPatchers[".debug_info"].get()); + DebugInfoPatcher->addLE32Patch(AttrOffset, It->second + DebugLocSize); + } + } + + // Recursively visit children. + for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) { + updateLocationListPointers(Unit, Child, UpdatedOffsets); + } +} + void RewriteInstance::patchELFPHDRTable() { auto ELF64LEFile = dyn_cast(InputFile); if (!ELF64LEFile) { @@ -2047,12 +2128,17 @@ void RewriteInstance::updateDebugInfo() { if (!opts::UpdateDebugSections) return; + SectionPatchers[".debug_abbrev"] = llvm::make_unique(); + SectionPatchers[".debug_info"] = llvm::make_unique(); + updateFunctionRanges(); updateLexicalBlocksAddresses(); generateDebugRanges(); + updateLocationLists(); + auto &DebugInfoSI = SectionMM->NoteSectionInfo[".debug_info"]; for (const auto &CU : BC->DwCtx->compile_units()) { const auto CUID = CU->getOffset(); @@ -2080,9 +2166,6 @@ void RewriteInstance::updateDebugInfo() { } void RewriteInstance::updateDWARFAddressRanges() { - SectionPatchers[".debug_abbrev"] = llvm::make_unique(); - SectionPatchers[".debug_info"] = llvm::make_unique(); - // Update address ranges of functions. for (const auto &BFI : BinaryFunctions) { const auto &Function = BFI.second; diff --git a/bolt/RewriteInstance.h b/bolt/RewriteInstance.h index dde0c4fc0dcd..ba8710d34ad8 100644 --- a/bolt/RewriteInstance.h +++ b/bolt/RewriteInstance.h @@ -15,6 +15,7 @@ #define LLVM_TOOLS_LLVM_BOLT_REWRITE_INSTANCE_H #include "BinaryPatcher.h" +#include "DebugLocWriter.h" #include "DebugRangesSectionsWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" @@ -210,6 +211,9 @@ private: /// Update lexical blocks ranges after optimizations. void updateLexicalBlocksAddresses(); + /// Generate new contents for .debug_loc. + void updateLocationLists(); + /// Generate new contents for .debug_ranges and .debug_aranges section. void generateDebugRanges(); @@ -228,6 +232,12 @@ private: const DWARFUnit *Unit, const DWARFDebugInfoEntryMinimal *DIE); + /// Updates pointers in .debug_info to location lists in .debug_loc. + void updateLocationListPointers( + const DWARFUnit *Unit, + const DWARFDebugInfoEntryMinimal *DIE, + const std::map &UpdatedOffsets); + /// Return file offset corresponding to a given virtual address. uint64_t getFileOffsetFor(uint64_t Address) { assert(Address >= NewTextSegmentAddress && @@ -297,6 +307,9 @@ private: /// Size of the .debug_line section on input. uint32_t DebugLineSize{0}; + /// Size of the .debug_loc section in input. + uint32_t DebugLocSize{0}; + /// Size of the .debug_ranges section on input. uint32_t DebugRangesSize{0};