llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

336 lines
12 KiB
C
Raw Normal View History

//===-- DWARFUnit.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef SymbolFileDWARF_DWARFUnit_h_
#define SymbolFileDWARF_DWARFUnit_h_
#include "DWARFDIE.h"
#include "DWARFDebugInfoEntry.h"
#include "lldb/lldb-enumerations.h"
#include "llvm/Support/RWMutex.h"
#include <atomic>
class DWARFUnit;
class DWARFCompileUnit;
class NameToDIE;
class SymbolFileDWARF;
class SymbolFileDWARFDwo;
typedef std::shared_ptr<DWARFUnit> DWARFUnitSP;
enum DWARFProducer {
eProducerInvalid = 0,
eProducerClang,
eProducerGCC,
eProducerLLVMGCC,
eProcucerOther
};
/// Base class describing the header of any kind of "unit." Some information
/// is specific to certain unit types. We separate this class out so we can
/// parse the header before deciding what specific kind of unit to construct.
class DWARFUnitHeader {
dw_offset_t m_offset = 0;
dw_offset_t m_length = 0;
uint16_t m_version = 0;
dw_offset_t m_abbr_offset = 0;
uint8_t m_unit_type = 0;
uint8_t m_addr_size = 0;
uint64_t m_type_hash = 0;
uint32_t m_type_offset = 0;
uint64_t m_dwo_id = 0;
DWARFUnitHeader() = default;
public:
dw_offset_t GetOffset() const { return m_offset; }
uint16_t GetVersion() const { return m_version; }
uint16_t GetAddressByteSize() const { return m_addr_size; }
dw_offset_t GetLength() const { return m_length; }
dw_offset_t GetAbbrOffset() const { return m_abbr_offset; }
uint8_t GetUnitType() const { return m_unit_type; }
uint64_t GetTypeHash() const { return m_type_hash; }
dw_offset_t GetTypeOffset() const { return m_type_offset; }
bool IsTypeUnit() const {
return m_unit_type == DW_UT_type || m_unit_type == DW_UT_split_type;
}
uint32_t GetNextUnitOffset() const { return m_offset + m_length + 4; }
static llvm::Expected<DWARFUnitHeader>
extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section,
lldb::offset_t *offset_ptr);
};
class DWARFUnit : public lldb_private::UserID {
using die_iterator_range =
llvm::iterator_range<DWARFDebugInfoEntry::collection::iterator>;
public:
static llvm::Expected<DWARFUnitSP>
extract(SymbolFileDWARF &dwarf2Data, lldb::user_id_t uid,
const lldb_private::DWARFDataExtractor &debug_info,
DIERef::Section section, lldb::offset_t *offset_ptr);
virtual ~DWARFUnit();
void ExtractUnitDIEIfNeeded();
void ExtractDIEsIfNeeded();
class ScopedExtractDIEs {
DWARFUnit *m_cu;
public:
bool m_clear_dies = false;
ScopedExtractDIEs(DWARFUnit &cu);
~ScopedExtractDIEs();
DISALLOW_COPY_AND_ASSIGN(ScopedExtractDIEs);
ScopedExtractDIEs(ScopedExtractDIEs &&rhs);
ScopedExtractDIEs &operator=(ScopedExtractDIEs &&rhs);
};
ScopedExtractDIEs ExtractDIEsScoped();
DWARFDIE LookupAddress(const dw_addr_t address);
bool Verify(lldb_private::Stream *s) const;
virtual void Dump(lldb_private::Stream *s) const = 0;
/// Get the data that contains the DIE information for this unit.
///
/// This will return the correct bytes that contain the data for
/// this DWARFUnit. It could be .debug_info or .debug_types
/// depending on where the data for this unit originates.
///
/// \return
/// The correct data for the DIE information in this unit.
const lldb_private::DWARFDataExtractor &GetData() const;
/// Get the size in bytes of the unit header.
///
/// \return
/// Byte size of the unit header
uint32_t GetHeaderByteSize() const;
// Offset of the initial length field.
dw_offset_t GetOffset() const { return m_header.GetOffset(); }
/// Get the size in bytes of the length field in the header.
///
/// In DWARF32 this is just 4 bytes
///
/// \return
/// Byte size of the compile unit header length field
size_t GetLengthByteSize() const { return 4; }
bool ContainsDIEOffset(dw_offset_t die_offset) const {
return die_offset >= GetFirstDIEOffset() &&
die_offset < GetNextUnitOffset();
}
dw_offset_t GetFirstDIEOffset() const {
return GetOffset() + GetHeaderByteSize();
}
dw_offset_t GetNextUnitOffset() const { return m_header.GetNextUnitOffset(); }
// Size of the CU data (without initial length and without header).
size_t GetDebugInfoSize() const;
// Size of the CU data incl. header but without initial length.
uint32_t GetLength() const { return m_header.GetLength(); }
uint16_t GetVersion() const { return m_header.GetVersion(); }
const DWARFAbbreviationDeclarationSet *GetAbbreviations() const;
dw_offset_t GetAbbrevOffset() const;
uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); }
dw_addr_t GetAddrBase() const { return m_addr_base; }
dw_addr_t GetBaseAddress() const { return m_base_addr; }
dw_offset_t GetLineTableOffset();
dw_addr_t GetRangesBase() const { return m_ranges_base; }
dw_addr_t GetStrOffsetsBase() const { return m_str_offsets_base; }
void SetAddrBase(dw_addr_t addr_base);
void SetLoclistsBase(dw_addr_t loclists_base);
void SetRangesBase(dw_addr_t ranges_base);
void SetStrOffsetsBase(dw_offset_t str_offsets_base);
virtual void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) = 0;
lldb::ByteOrder GetByteOrder() const;
const DWARFDebugAranges &GetFunctionAranges();
void SetBaseAddress(dw_addr_t base_addr);
DWARFBaseDIE GetUnitDIEOnly() { return DWARFDIE(this, GetUnitDIEPtrOnly()); }
DWARFDIE DIE() { return DWARFDIE(this, DIEPtr()); }
DWARFDIE GetDIE(dw_offset_t die_offset);
DWARF: Add "dwo_num" field to the DIERef class Summary: When dwo support was introduced, it used a trick where debug info entries were referenced by the offset of the compile unit in the main file, but the die offset was relative to the dwo file. Although there was some elegance to it, this representation was starting to reach its breaking point: - the fact that the skeleton compile unit owned the DWO file meant that it was impossible (or at least hard and unintuitive) to support DWO files containing more than one compile unit. These kinds of files are produced by LTO for example. - it made it impossible to reference any DIEs in the skeleton compile unit (although the skeleton units are generally empty, clang still puts some info into them with -fsplit-dwarf-inlining). - (current motivation) it made it very hard to support type units placed in DWO files, as type units don't have any skeleton units which could be referenced in the main file This patch addresses this problem by introducing an new "dwo_num" field to the DIERef class, whose purpose is to identify the dwo file. It's kind of similar to the dwo_id field in DWARF5 unit headers, but while this is a 64bit hash whose main purpose is to catch file mismatches, this is just a smaller integer used to indentify a loaded dwo file. Currently, this is based on the index of the skeleton compile unit which owns the dwo file, but it is intended to be eventually independent of that (to support the LTO use case). Simultaneously the cu_offset is dropped to conserve space, as it is no longer necessary. This means we can remove the "BaseObjectOffset" field from the DWARFUnit class. It also means we can remove some of the workarounds put in place to support the skeleton-unit+dwo-die combo. More work is needed to remove all of them, which is out of scope of this patch. Reviewers: JDevlieghere, clayborg, aprantl Subscribers: mehdi_amini, dexonsmith, arphaman, lldb-commits Differential Revision: https://reviews.llvm.org/D63428 llvm-svn: 364009
2019-06-21 15:56:50 +08:00
DWARFUnit &GetNonSkeletonUnit();
static uint8_t GetAddressByteSize(const DWARFUnit *cu);
static uint8_t GetDefaultAddressSize();
void *GetUserData() const;
void SetUserData(void *d);
bool Supports_DW_AT_APPLE_objc_complete_type();
bool DW_AT_decl_file_attributes_are_invalid();
bool Supports_unnamed_objc_bitfields();
SymbolFileDWARF &GetSymbolFileDWARF() const { return m_dwarf; }
DWARFProducer GetProducer();
uint32_t GetProducerVersionMajor();
uint32_t GetProducerVersionMinor();
uint32_t GetProducerVersionUpdate();
uint64_t GetDWARFLanguageType();
bool GetIsOptimized();
DWARF: Add some support for non-native directory separators Summary: If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. Reviewers: clayborg, zturner, JDevlieghere Subscribers: aprantl, lldb-commits Differential Revision: https://reviews.llvm.org/D56543 llvm-svn: 351328
2019-01-16 20:30:41 +08:00
const lldb_private::FileSpec &GetCompilationDirectory();
const lldb_private::FileSpec &GetAbsolutePath();
lldb_private::FileSpec GetFile(size_t file_idx);
DWARF: Add some support for non-native directory separators Summary: If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. Reviewers: clayborg, zturner, JDevlieghere Subscribers: aprantl, lldb-commits Differential Revision: https://reviews.llvm.org/D56543 llvm-svn: 351328
2019-01-16 20:30:41 +08:00
lldb_private::FileSpec::Style GetPathStyle();
SymbolFileDWARFDwo *GetDwoSymbolFile();
die_iterator_range dies() {
ExtractDIEsIfNeeded();
return die_iterator_range(m_die_array.begin(), m_die_array.end());
}
DIERef::Section GetDebugSection() const { return m_section; }
DWARF: Add ability to reference debug info coming from multiple sections Summary: This patch adds the ability to precisely address debug info in situations when a single file can have more than one debug-info-bearing sections (as is the case with type units in DWARF v4). The changes here can be classified into roughly three categories: - the code which addresses a debug info by offset gets an additional argument, which specifies the section one should look into. - the DIERef class also gets an additional member variable specifying the section. This way, code dealing with DIERefs can know which section is the object referring to. - the user_id_t encoding steals one bit from the dwarf_id field to store the section. This means the total number of separate object files (apple .o, or normal .dwo) is limited to 2 billion, but that is fine as it's not possible to hit that number without switching to DWARF64 anyway. This patch is functionally equivalent to (and inspired by) the two patches (D61503 and D61504) by Jan Kratochvil, but there are differences in the implementation: - it uses an enum instead of a bool flag to differentiate the sections - it increases the size of DIERef struct instead of reducing the amount of addressable debug info - it sets up DWARFDebugInfo to store the units in a single vector instead of two. This sets us up for the future in which type units can also live in the debug_info section, and I believe it's cleaner because there's no need for unit index remapping There are no tests with this patch as this is essentially NFC until we start parsing type units from the debug_types section. Reviewers: JDevlieghere, clayborg, aprantl Subscribers: arphaman, jankratochvil, lldb-commits Differential Revision: https://reviews.llvm.org/D61908 llvm-svn: 360872
2019-05-16 19:07:58 +08:00
uint8_t GetUnitType() const { return m_header.GetUnitType(); }
bool IsTypeUnit() const { return m_header.IsTypeUnit(); }
/// Return a list of address ranges resulting from a (possibly encoded)
/// range list starting at a given offset in the appropriate ranges section.
llvm::Expected<DWARFRangeList> FindRnglistFromOffset(dw_offset_t offset);
/// Return a list of address ranges retrieved from an encoded range
/// list whose offset is found via a table lookup given an index (DWARF v5
/// and later).
llvm::Expected<DWARFRangeList> FindRnglistFromIndex(uint32_t index);
/// Return a rangelist's offset based on an index. The index designates
/// an entry in the rangelist table's offset array and is supplied by
/// DW_FORM_rnglistx.
llvm::Optional<uint64_t> GetRnglistOffset(uint32_t Index) const {
if (!m_rnglist_table)
return llvm::None;
if (llvm::Optional<uint64_t> off = m_rnglist_table->getOffsetEntry(Index))
return *off + m_ranges_base;
return llvm::None;
}
llvm::Optional<uint64_t> GetLoclistOffset(uint32_t Index) {
if (!m_loclist_table_header)
return llvm::None;
llvm::Optional<uint64_t> Offset = m_loclist_table_header->getOffsetEntry(Index);
if (!Offset)
return llvm::None;
return *Offset + m_loclists_base;
}
/// Return the location table for parsing the given location list data. The
/// format is chosen according to the unit type. Never returns null.
std::unique_ptr<llvm::DWARFLocationTable>
GetLocationTable(const lldb_private::DataExtractor &data) const;
const lldb_private::DWARFDataExtractor &GetLocationData() const;
protected:
DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid,
const DWARFUnitHeader &header,
const DWARFAbbreviationDeclarationSet &abbrevs,
DIERef::Section section, bool is_dwo);
llvm::Error ExtractHeader(SymbolFileDWARF &dwarf,
const lldb_private::DWARFDataExtractor &data,
lldb::offset_t *offset_ptr);
// Get the DWARF unit DWARF debug information entry. Parse the single DIE
// if needed.
const DWARFDebugInfoEntry *GetUnitDIEPtrOnly() {
ExtractUnitDIEIfNeeded();
// m_first_die_mutex is not required as m_first_die is never cleared.
if (!m_first_die)
return NULL;
return &m_first_die;
}
// Get all DWARF debug informration entries. Parse all DIEs if needed.
const DWARFDebugInfoEntry *DIEPtr() {
ExtractDIEsIfNeeded();
if (m_die_array.empty())
return NULL;
return &m_die_array[0];
}
SymbolFileDWARF &m_dwarf;
std::unique_ptr<SymbolFileDWARFDwo> m_dwo_symbol_file;
DWARFUnitHeader m_header;
const DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr;
void *m_user_data = nullptr;
// The compile unit debug information entry item
DWARFDebugInfoEntry::collection m_die_array;
mutable llvm::sys::RWMutex m_die_array_mutex;
// It is used for tracking of ScopedExtractDIEs instances.
mutable llvm::sys::RWMutex m_die_array_scoped_mutex;
// ScopedExtractDIEs instances should not call ClearDIEsRWLocked()
// as someone called ExtractDIEsIfNeeded().
std::atomic<bool> m_cancel_scopes;
// GetUnitDIEPtrOnly() needs to return pointer to the first DIE.
// But the first element of m_die_array after ExtractUnitDIEIfNeeded()
// would possibly move in memory after later ExtractDIEsIfNeeded().
DWARFDebugInfoEntry m_first_die;
llvm::sys::RWMutex m_first_die_mutex;
// A table similar to the .debug_aranges table, but this one points to the
// exact DW_TAG_subprogram DIEs
std::unique_ptr<DWARFDebugAranges> m_func_aranges_up;
dw_addr_t m_base_addr = 0;
DWARFProducer m_producer = eProducerInvalid;
uint32_t m_producer_version_major = 0;
uint32_t m_producer_version_minor = 0;
uint32_t m_producer_version_update = 0;
llvm::Optional<uint64_t> m_language_type;
lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate;
DWARF: Add some support for non-native directory separators Summary: If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. Reviewers: clayborg, zturner, JDevlieghere Subscribers: aprantl, lldb-commits Differential Revision: https://reviews.llvm.org/D56543 llvm-svn: 351328
2019-01-16 20:30:41 +08:00
llvm::Optional<lldb_private::FileSpec> m_comp_dir;
llvm::Optional<lldb_private::FileSpec> m_file_spec;
dw_addr_t m_addr_base = 0; ///< Value of DW_AT_addr_base.
dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base.
dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base.
/// Value of DW_AT_stmt_list.
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
llvm::Optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
llvm::Optional<llvm::DWARFListTableHeader> m_loclist_table_header;
const DIERef::Section m_section;
bool m_is_dwo;
private:
void ParseProducerInfo();
void ExtractDIEsRWLocked();
void ClearDIEsRWLocked();
void AddUnitDIE(const DWARFDebugInfoEntry &cu_die);
DWARF: Add some support for non-native directory separators Summary: If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. Reviewers: clayborg, zturner, JDevlieghere Subscribers: aprantl, lldb-commits Differential Revision: https://reviews.llvm.org/D56543 llvm-svn: 351328
2019-01-16 20:30:41 +08:00
void ComputeCompDirAndGuessPathStyle();
void ComputeAbsolutePath();
DWARF: Add some support for non-native directory separators Summary: If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. Reviewers: clayborg, zturner, JDevlieghere Subscribers: aprantl, lldb-commits Differential Revision: https://reviews.llvm.org/D56543 llvm-svn: 351328
2019-01-16 20:30:41 +08:00
DISALLOW_COPY_AND_ASSIGN(DWARFUnit);
};
#endif // SymbolFileDWARF_DWARFUnit_h_