Update DWARF location lists after optimization.

Summary:

    Summary: Update DWARF location lists in .debug_loc and pointers to
    them in .debug_info so that gdb can print variables which change
    location during their lifetime.

    The following changes were made:

    - Refactored BasicBlockOffsetRanges to allow ranges to be tied to binary information (so that we can reuse it for location lists)
    - Implemented range compression optimization in BasicBlockOffsetRanges (needed otherwise too much data was being generated).
    - Added representation for location lists (LocationList.h, BinaryContext.h)
    - Implemented .debug_loc serializer that keeps the updated offsets (DebugLocWriter.{h,cpp})
    - After disassembly, traverse entries in .debug_loc and save them in context (BinaryContext.cpp)
    - After optimizations, serialize .debug_loc and update pointers in .debug_info (RewriteInstance.cpp)

(cherry picked from FBD3130682)
This commit is contained in:
Gabriel Poesia 2016-04-01 11:37:28 -07:00 committed by Maksim Panchenko
parent 4349b63144
commit 4b4db40174
11 changed files with 344 additions and 23 deletions

View File

@ -12,13 +12,15 @@
#include "BasicBlockOffsetRanges.h"
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include <algorithm>
namespace llvm {
namespace bolt {
void BasicBlockOffsetRanges::addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress) {
uint64_t EndAddress,
const BinaryData *Data) {
auto FirstBB = Function.getBasicBlockContainingOffset(
BeginAddress - Function.getAddress());
assert(FirstBB && "No basic blocks in the function intersect given range.");
@ -40,13 +42,14 @@ void BasicBlockOffsetRanges::addAddressRange(BinaryFunction &Function,
BBAddressRange{
BB,
static_cast<uint16_t>(InternalAddressRangeBegin - BBAddress),
static_cast<uint16_t>(InternalAddressRangeEnd - BBAddress)});
static_cast<uint16_t>(InternalAddressRangeEnd - BBAddress),
Data});
}
}
std::vector<std::pair<uint64_t, uint64_t>>
std::vector<BasicBlockOffsetRanges::AbsoluteRange>
BasicBlockOffsetRanges::getAbsoluteAddressRanges() const {
std::vector<std::pair<uint64_t, uint64_t>> AbsoluteRanges;
std::vector<AbsoluteRange> AbsoluteRanges;
for (const auto &BBAddressRange : AddressRanges) {
auto BBOutputAddressRange =
BBAddressRange.BasicBlock->getOutputAddressRange();
@ -61,10 +64,33 @@ BasicBlockOffsetRanges::getAbsoluteAddressRanges() const {
BBFunction->getBasicBlockOriginalSize(BBAddressRange.BasicBlock))
? BBOutputAddressRange.second
: (BBOutputAddressRange.first + BBAddressRange.RangeEndOffset);
AbsoluteRanges.emplace_back(NewRangeBegin, NewRangeEnd);
AbsoluteRanges.emplace_back(AbsoluteRange{NewRangeBegin, NewRangeEnd,
BBAddressRange.Data});
}
if (AbsoluteRanges.empty()) {
return AbsoluteRanges;
}
// Merge adjacent ranges that have the same data.
std::sort(AbsoluteRanges.begin(), AbsoluteRanges.end(),
[](const AbsoluteRange &A, const AbsoluteRange &B) {
return A.Begin < B.Begin;
});
decltype(AbsoluteRanges) MergedRanges;
MergedRanges.emplace_back(AbsoluteRanges[0]);
for (unsigned I = 1, S = AbsoluteRanges.size(); I != S; ++I) {
// If this range complements the last one and they point to the same
// (possibly null) data, merge them instead of creating another one.
if (AbsoluteRanges[I].Begin == MergedRanges.back().End &&
AbsoluteRanges[I].Data == MergedRanges.back().Data) {
MergedRanges.back().End = AbsoluteRanges[I].End;
} else {
MergedRanges.emplace_back(AbsoluteRanges[I]);
}
}
return MergedRanges;
}
} // namespace bolt
} // namespace llvm

View File

@ -16,7 +16,9 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H
#define LLVM_TOOLS_LLVM_BOLT_BASIC_BLOCK_OFFSET_RANGES_H
#include "llvm/ADT/SmallVector.h"
#include <map>
#include <string>
#include <utility>
#include <vector>
@ -27,6 +29,26 @@ class BinaryFunction;
class BinaryBasicBlock;
class BasicBlockOffsetRanges {
public:
typedef SmallVectorImpl<unsigned char> BinaryData;
struct AbsoluteRange {
uint64_t Begin;
uint64_t End;
const BinaryData *Data;
};
/// Add range [BeginAddress, EndAddress) to the address ranges list.
/// \p Function is the function that contains the given address range.
void addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress,
const BinaryData *Data = nullptr);
/// Returns the list of absolute addresses calculated using the output address
/// of the basic blocks, i.e. the input ranges updated after basic block
/// addresses might have changed, together with the data associated to them.
std::vector<AbsoluteRange> getAbsoluteAddressRanges() const;
private:
/// An address range inside one basic block.
struct BBAddressRange {
@ -35,21 +57,11 @@ private:
uint16_t RangeBeginOffset;
/// (Exclusive) end of the range counting from BB's start address.
uint16_t RangeEndOffset;
/// Binary data associated with this range.
const BinaryData *Data;
};
std::vector<BBAddressRange> AddressRanges;
public:
/// Add range [BeginAddress, EndAddress) to the address ranges list.
/// \p Function is the function that contains the given address range.
void addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress);
/// Returns the list of absolute addresses calculated using the output address
/// of the basic blocks, i.e. the input ranges updated after basic block
/// addresses might have changed.
std::vector<std::pair<uint64_t, uint64_t>> getAbsoluteAddressRanges() const;
};
} // namespace bolt

View File

@ -157,6 +157,21 @@ void BinaryContext::preprocessFunctionDebugInfo(
findLexicalBlocks(CU.get(), CU->getUnitDIE(false), BinaryFunctions,
LexicalBlocks);
}
// Iterate over location lists and save them in LocationLists.
auto DebugLoc = DwCtx->getDebugLoc();
for (const auto &DebugLocEntry : DebugLoc->getLocationLists()) {
LocationLists.emplace_back(DebugLocEntry.Offset);
auto &LocationList = LocationLists.back();
for (const auto &Location : DebugLocEntry.Entries) {
auto *Function = getBinaryFunctionContainingAddress(Location.Begin,
BinaryFunctions);
if (Function && Function->isSimple()) {
LocationList.addLocation(&Location.Loc, *Function, Location.Begin,
Location.End);
}
}
}
}
} // namespace bolt

View File

@ -15,6 +15,7 @@
#define LLVM_TOOLS_LLVM_BOLT_BINARY_CONTEXT_H
#include "LexicalBlock.h"
#include "LocationList.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@ -36,6 +37,7 @@
#include <set>
#include <string>
#include <system_error>
#include <vector>
namespace llvm {
namespace bolt {
@ -73,6 +75,9 @@ public:
/// List of DWARF lexical blocks in .debug_info.
std::vector<LexicalBlock> LexicalBlocks;
/// List of DWARF location lists in .debug_loc.
std::vector<LocationList> LocationLists;
std::unique_ptr<MCContext> Ctx;
std::unique_ptr<DWARFContext> DwCtx;

View File

@ -20,6 +20,7 @@ add_llvm_tool(llvm-bolt
BinaryPatcher.cpp
DataReader.cpp
DebugLineTableRowRef.cpp
DebugLocWriter.cpp
DebugRangesSectionsWriter.cpp
Exceptions.cpp
RewriteInstance.cpp

45
bolt/DebugLocWriter.cpp Normal file
View File

@ -0,0 +1,45 @@
//===-- DebugLocWriter.cpp - Writes the DWARF .debug_loc section. ----------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "DebugLocWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCObjectWriter.h"
#include <algorithm>
namespace llvm {
namespace bolt {
void DebugLocWriter::write(const LocationList &LocList,
MCObjectWriter *Writer) {
// Reference: DWARF 4 specification section 7.7.3.
UpdatedOffsets[LocList.getOriginalOffset()] = SectionOffset;
auto AbsoluteRanges = LocList.getAbsoluteAddressRanges();
for (const auto &Entry : LocList.getAbsoluteAddressRanges()) {
Writer->writeLE64(Entry.Begin);
Writer->writeLE64(Entry.End);
assert(Entry.Data && "Entry with null location expression.");
Writer->writeLE16(Entry.Data->size());
// Need to convert binary data from unsigned char to char.
Writer->writeBytes(
StringRef(reinterpret_cast<const char *>(Entry.Data->data()),
Entry.Data->size()));
SectionOffset += 2 * 8 + 2 + Entry.Data->size();
}
Writer->writeLE64(0);
Writer->writeLE64(0);
SectionOffset += 2 * 8;
}
} // namespace bolt
} // namespace llvm

53
bolt/DebugLocWriter.h Normal file
View File

@ -0,0 +1,53 @@
//===-- DebugLocWriter.h - Writes the DWARF .debug_loc section -------------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Class that serializes the .debug_loc section given LocationLists.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_DEBUG_LOC_WRITER_H
#define LLVM_TOOLS_LLVM_BOLT_DEBUG_LOC_WRITER_H
#include "LocationList.h"
#include <map>
#include <vector>
namespace llvm {
class MCObjectWriter;
namespace bolt {
class DebugLocWriter {
public:
/// Writes the given location list to the writer.
void write(const LocationList &LocList, MCObjectWriter *Writer);
using UpdatedOffsetMapType = std::map<uint32_t, uint32_t>;
/// Returns mapping from offsets in the input .debug_loc to offsets in the
/// output .debug_loc section with the corresponding updated location list
/// entry.
const UpdatedOffsetMapType &getUpdatedLocationListOffsets() const {
return UpdatedOffsets;
}
private:
/// Current offset in the section (updated as new entries are written).
uint32_t SectionOffset{0};
/// Map from input offsets to output offsets for location lists that were
/// updated, generated after write().
UpdatedOffsetMapType UpdatedOffsets;
};
} // namespace bolt
} // namespace llvm
#endif

View File

@ -35,7 +35,7 @@ public:
const DWARFDebugInfoEntryMinimal *DIE)
: CU(CU), DIE(DIE) { }
// Add range [BeginAddress, EndAddress) to lexical block.
/// Add range [BeginAddress, EndAddress) to lexical block.
void addAddressRange(BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress) {
@ -43,7 +43,14 @@ public:
}
std::vector<std::pair<uint64_t, uint64_t>> getAbsoluteAddressRanges() const {
return BBOffsetRanges.getAbsoluteAddressRanges();
auto AddressRangesWithData = BBOffsetRanges.getAbsoluteAddressRanges();
std::vector<std::pair<uint64_t, uint64_t>> AddressRanges(
AddressRangesWithData.size());
for (unsigned I = 0, S = AddressRanges.size(); I != S; ++I) {
AddressRanges[I] = std::make_pair(AddressRangesWithData[I].Begin,
AddressRangesWithData[I].End);
}
return AddressRanges;
}
void setAddressRangesOffset(uint32_t Offset) { AddressRangesOffset = Offset; }

61
bolt/LocationList.h Normal file
View File

@ -0,0 +1,61 @@
//===--- LocationList.h - DWARF location lists ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Represents DWARF location lists, maintaining their list of location
// expressions and the address ranges in which they are valid to be updated in
// the output debugging information.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_BOLT_LOCATION_LIST_H
#define LLVM_TOOLS_LLVM_BOLT_LOCATION_LIST_H
#include "BasicBlockOffsetRanges.h"
namespace llvm {
class DWARFCompileUnit;
class DWARFDebugInfoEntryMinimal;
namespace bolt {
class BinaryBasicBlock;
class LocationList {
public:
LocationList(uint32_t Offset) : DebugLocOffset(Offset) { }
/// Add a location expression that is valid in [BeginAddress, EndAddress)
/// within Function to location list.
void addLocation(const BasicBlockOffsetRanges::BinaryData *Expression,
BinaryFunction &Function,
uint64_t BeginAddress,
uint64_t EndAddress) {
BBOffsetRanges.addAddressRange(Function, BeginAddress, EndAddress,
Expression);
}
std::vector<BasicBlockOffsetRanges::AbsoluteRange>
getAbsoluteAddressRanges() const {
return BBOffsetRanges.getAbsoluteAddressRanges();
}
uint32_t getOriginalOffset() const { return DebugLocOffset; }
private:
BasicBlockOffsetRanges BBOffsetRanges;
// Offset of this location list in the input .debug_loc section.
uint32_t DebugLocOffset;
};
} // namespace bolt
} // namespace llvm
#endif

View File

@ -732,6 +732,8 @@ void RewriteInstance::readSpecialSections() {
DebugLineSize = Section.getSize();
} else if (SectionName == ".debug_ranges") {
DebugRangesSize = Section.getSize();
} else if (SectionName == ".debug_loc") {
DebugLocSize = Section.getSize();
}
}
@ -1502,6 +1504,85 @@ void RewriteInstance::generateDebugRanges() {
}
}
void RewriteInstance::updateLocationLists() {
// Write new contents to .debug_loc.
SmallVector<char, 16> DebugLocBuffer;
raw_svector_ostream OS(DebugLocBuffer);
auto MAB = BC->TheTarget->createMCAsmBackend(*BC->MRI, BC->TripleName, "");
auto Writer = MAB->createObjectWriter(OS);
DebugLocWriter LocationListsWriter;
for (const auto &Loc : BC->LocationLists) {
LocationListsWriter.write(Loc, Writer);
}
const auto &DebugLocContents = OS.str();
// Free'd by SectionMM.
uint8_t *SectionData = new uint8_t[DebugLocContents.size()];
memcpy(SectionData, DebugLocContents.data(), DebugLocContents.size());
SectionMM->NoteSectionInfo[".debug_loc"] = SectionInfo(
reinterpret_cast<uint64_t>(SectionData),
DebugLocContents.size(),
/*Alignment=*/0,
/*IsCode=*/false,
/*IsReadOnly=*/true);
// For each CU, update pointers into .debug_loc.
for (const auto &CU : BC->DwCtx->compile_units()) {
updateLocationListPointers(
CU.get(),
CU->getUnitDIE(false),
LocationListsWriter.getUpdatedLocationListOffsets());
}
}
void RewriteInstance::updateLocationListPointers(
const DWARFUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
const std::map<uint32_t, uint32_t> &UpdatedOffsets) {
// Stop if we're in a non-simple function, which will not be rewritten.
auto Tag = DIE->getTag();
if (Tag == dwarf::DW_TAG_subprogram) {
uint64_t LowPC = -1ULL, HighPC = -1ULL;
DIE->getLowAndHighPC(Unit, LowPC, HighPC);
if (LowPC != -1ULL) {
auto It = BinaryFunctions.find(LowPC);
if (It != BinaryFunctions.end() && !It->second.isSimple())
return;
}
}
// If the DIE has a DW_AT_location attribute with a section offset, update it.
DWARFFormValue Value;
uint32_t AttrOffset;
if (DIE->getAttributeValue(Unit, dwarf::DW_AT_location, Value, &AttrOffset) &&
(Value.isFormClass(DWARFFormValue::FC_Constant) ||
Value.isFormClass(DWARFFormValue::FC_SectionOffset))) {
uint64_t DebugLocOffset = -1ULL;
if (Value.isFormClass(DWARFFormValue::FC_SectionOffset)) {
DebugLocOffset = Value.getAsSectionOffset().getValue();
} else if (Value.isFormClass(DWARFFormValue::FC_Constant)) { // DWARF 3
DebugLocOffset = Value.getAsUnsignedConstant().getValue();
}
auto It = UpdatedOffsets.find(DebugLocOffset);
if (It != UpdatedOffsets.end()) {
auto DebugInfoPatcher =
static_cast<SimpleBinaryPatcher *>(
SectionPatchers[".debug_info"].get());
DebugInfoPatcher->addLE32Patch(AttrOffset, It->second + DebugLocSize);
}
}
// Recursively visit children.
for (auto Child = DIE->getFirstChild(); Child; Child = Child->getSibling()) {
updateLocationListPointers(Unit, Child, UpdatedOffsets);
}
}
void RewriteInstance::patchELFPHDRTable() {
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
if (!ELF64LEFile) {
@ -2047,12 +2128,17 @@ void RewriteInstance::updateDebugInfo() {
if (!opts::UpdateDebugSections)
return;
SectionPatchers[".debug_abbrev"] = llvm::make_unique<DebugAbbrevPatcher>();
SectionPatchers[".debug_info"] = llvm::make_unique<SimpleBinaryPatcher>();
updateFunctionRanges();
updateLexicalBlocksAddresses();
generateDebugRanges();
updateLocationLists();
auto &DebugInfoSI = SectionMM->NoteSectionInfo[".debug_info"];
for (const auto &CU : BC->DwCtx->compile_units()) {
const auto CUID = CU->getOffset();
@ -2080,9 +2166,6 @@ void RewriteInstance::updateDebugInfo() {
}
void RewriteInstance::updateDWARFAddressRanges() {
SectionPatchers[".debug_abbrev"] = llvm::make_unique<DebugAbbrevPatcher>();
SectionPatchers[".debug_info"] = llvm::make_unique<SimpleBinaryPatcher>();
// Update address ranges of functions.
for (const auto &BFI : BinaryFunctions) {
const auto &Function = BFI.second;

View File

@ -15,6 +15,7 @@
#define LLVM_TOOLS_LLVM_BOLT_REWRITE_INSTANCE_H
#include "BinaryPatcher.h"
#include "DebugLocWriter.h"
#include "DebugRangesSectionsWriter.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
@ -210,6 +211,9 @@ private:
/// Update lexical blocks ranges after optimizations.
void updateLexicalBlocksAddresses();
/// Generate new contents for .debug_loc.
void updateLocationLists();
/// Generate new contents for .debug_ranges and .debug_aranges section.
void generateDebugRanges();
@ -228,6 +232,12 @@ private:
const DWARFUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE);
/// Updates pointers in .debug_info to location lists in .debug_loc.
void updateLocationListPointers(
const DWARFUnit *Unit,
const DWARFDebugInfoEntryMinimal *DIE,
const std::map<uint32_t, uint32_t> &UpdatedOffsets);
/// Return file offset corresponding to a given virtual address.
uint64_t getFileOffsetFor(uint64_t Address) {
assert(Address >= NewTextSegmentAddress &&
@ -297,6 +307,9 @@ private:
/// Size of the .debug_line section on input.
uint32_t DebugLineSize{0};
/// Size of the .debug_loc section in input.
uint32_t DebugLocSize{0};
/// Size of the .debug_ranges section on input.
uint32_t DebugRangesSize{0};