2018-03-19 04:09:02 +08:00
|
|
|
//===-- DWARFUnit.h ---------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-03-19 04:09:02 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef SymbolFileDWARF_DWARFUnit_h_
|
|
|
|
#define SymbolFileDWARF_DWARFUnit_h_
|
|
|
|
|
|
|
|
#include "DWARFDIE.h"
|
|
|
|
#include "DWARFDebugInfoEntry.h"
|
|
|
|
#include "lldb/lldb-enumerations.h"
|
2018-06-05 16:52:18 +08:00
|
|
|
#include "llvm/Support/RWMutex.h"
|
2018-06-05 17:56:14 +08:00
|
|
|
#include <atomic>
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
class DWARFUnit;
|
|
|
|
class DWARFCompileUnit;
|
|
|
|
class NameToDIE;
|
|
|
|
class SymbolFileDWARF;
|
|
|
|
class SymbolFileDWARFDwo;
|
|
|
|
|
|
|
|
typedef std::shared_ptr<DWARFUnit> DWARFUnitSP;
|
|
|
|
|
|
|
|
enum DWARFProducer {
|
|
|
|
eProducerInvalid = 0,
|
|
|
|
eProducerClang,
|
|
|
|
eProducerGCC,
|
|
|
|
eProducerLLVMGCC,
|
|
|
|
eProcucerOther
|
|
|
|
};
|
|
|
|
|
2019-05-21 15:22:34 +08:00
|
|
|
/// Base class describing the header of any kind of "unit." Some information
|
|
|
|
/// is specific to certain unit types. We separate this class out so we can
|
|
|
|
/// parse the header before deciding what specific kind of unit to construct.
|
|
|
|
class DWARFUnitHeader {
|
|
|
|
dw_offset_t m_offset = 0;
|
|
|
|
dw_offset_t m_length = 0;
|
|
|
|
uint16_t m_version = 0;
|
|
|
|
dw_offset_t m_abbr_offset = 0;
|
|
|
|
uint8_t m_unit_type = 0;
|
|
|
|
uint8_t m_addr_size = 0;
|
2019-05-24 16:11:12 +08:00
|
|
|
|
|
|
|
uint64_t m_type_hash = 0;
|
|
|
|
uint32_t m_type_offset = 0;
|
|
|
|
|
2019-05-21 15:22:34 +08:00
|
|
|
uint64_t m_dwo_id = 0;
|
|
|
|
|
|
|
|
DWARFUnitHeader() = default;
|
|
|
|
|
|
|
|
public:
|
|
|
|
dw_offset_t GetOffset() const { return m_offset; }
|
|
|
|
uint16_t GetVersion() const { return m_version; }
|
|
|
|
uint16_t GetAddressByteSize() const { return m_addr_size; }
|
|
|
|
dw_offset_t GetLength() const { return m_length; }
|
|
|
|
dw_offset_t GetAbbrOffset() const { return m_abbr_offset; }
|
|
|
|
uint8_t GetUnitType() const { return m_unit_type; }
|
2019-05-24 16:11:12 +08:00
|
|
|
uint64_t GetTypeHash() const { return m_type_hash; }
|
|
|
|
dw_offset_t GetTypeOffset() const { return m_type_offset; }
|
2019-05-21 15:22:34 +08:00
|
|
|
bool IsTypeUnit() const {
|
|
|
|
return m_unit_type == DW_UT_type || m_unit_type == DW_UT_split_type;
|
|
|
|
}
|
|
|
|
uint32_t GetNextUnitOffset() const { return m_offset + m_length + 4; }
|
|
|
|
|
|
|
|
static llvm::Expected<DWARFUnitHeader>
|
2019-05-22 17:09:39 +08:00
|
|
|
extract(const lldb_private::DWARFDataExtractor &data, DIERef::Section section,
|
2019-05-21 15:22:34 +08:00
|
|
|
lldb::offset_t *offset_ptr);
|
|
|
|
};
|
|
|
|
|
[DWARF] Reimplement/simplify DWARFUnit::GetID
Summary:
The implementation of GetID used a relatively complicated algorithm,
which returned some kind of an offset of the unit in some file
(depending on the debug info flavour). The only thing this ID was used
for was to enable subseqent retrieval of the unit from the SymbolFile.
This can be made simpler if we just make the "ID" of the unit an index
into the list of the units belonging to the symbol file. We already
support indexed access to the units, so each unit already has a well
"index" -- this just makes it accessible from within the unit.
To make the distincion between "id" and "offset" clearer (and help catch
any misuses), I also rename DWARFDebugInfo::GetCompileUnit (which
accesses by offset) into DWARFDebugInfo::GetCompileUnitAtOffset.
On its own, this only brings a minor simplification, but it enables
further simplifications in the DIERef class (coming in a follow-up
patch).
Reviewers: JDevlieghere, clayborg, aprantl
Subscribers: arphaman, jdoerfert, lldb-commits, tberghammer, jankratochvil
Differential Revision: https://reviews.llvm.org/D61481
llvm-svn: 360014
2019-05-06 15:45:28 +08:00
|
|
|
class DWARFUnit : public lldb_private::UserID {
|
2018-05-24 20:12:49 +08:00
|
|
|
using die_iterator_range =
|
|
|
|
llvm::iterator_range<DWARFDebugInfoEntry::collection::iterator>;
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
public:
|
2019-05-21 15:22:34 +08:00
|
|
|
static llvm::Expected<DWARFUnitSP>
|
2019-06-14 21:01:16 +08:00
|
|
|
extract(SymbolFileDWARF &dwarf2Data, lldb::user_id_t uid,
|
2019-05-21 15:22:34 +08:00
|
|
|
const lldb_private::DWARFDataExtractor &debug_info,
|
2019-05-22 17:09:39 +08:00
|
|
|
DIERef::Section section, lldb::offset_t *offset_ptr);
|
2018-03-19 04:09:02 +08:00
|
|
|
virtual ~DWARFUnit();
|
|
|
|
|
[lldb/DWARF] Don't assume that a SymbolFileDWARFDwo contains one compile unit
Summary:
This is a preparatory patch to re-enable DWP support in lldb (we already
have code claiming to do that, but it has been completely broken for a
while now).
The idea of the new approach is to make the SymbolFileDWARFDwo class
handle both dwo and dwo files, similar to how llvm uses one DWARFContext
to handle the two.
The first step is to remove the assumption that a SymbolFileDWARFDwo
holds just a single compile unit, i.e. the GetBaseCompileUnit method.
This requires changing the way how we reach the skeleton compile unit
(and the lldb_private::CompileUnit) from a dwo unit, which was
previously done via GetSymbolFile()->GetBaseCompileUnit() (and some
virtual dispatch).
The new approach reuses the "user data" mechanism of DWARFUnits, which
was used to link dwarf units (both skeleton and split) to their
lldb_private counterparts. Now, this is done only for non-dwo units, and
instead of that, the dwo units holds a pointer to the relevant skeleton
unit.
Reviewers: JDevlieghere, aprantl, clayborg
Reviewed By: JDevlieghere, clayborg
Subscribers: arphaman, lldb-commits
Tags: #lldb
Differential Revision: https://reviews.llvm.org/D73781
2020-02-06 11:45:22 +08:00
|
|
|
bool IsDWOUnit() { return m_is_dwo; }
|
|
|
|
|
2018-05-30 01:17:46 +08:00
|
|
|
void ExtractUnitDIEIfNeeded();
|
2018-06-05 16:52:18 +08:00
|
|
|
void ExtractDIEsIfNeeded();
|
|
|
|
|
|
|
|
class ScopedExtractDIEs {
|
|
|
|
DWARFUnit *m_cu;
|
|
|
|
public:
|
|
|
|
bool m_clear_dies = false;
|
2019-06-14 21:01:16 +08:00
|
|
|
ScopedExtractDIEs(DWARFUnit &cu);
|
2018-06-05 16:52:18 +08:00
|
|
|
~ScopedExtractDIEs();
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(ScopedExtractDIEs);
|
|
|
|
ScopedExtractDIEs(ScopedExtractDIEs &&rhs);
|
|
|
|
ScopedExtractDIEs &operator=(ScopedExtractDIEs &&rhs);
|
|
|
|
};
|
|
|
|
ScopedExtractDIEs ExtractDIEsScoped();
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
DWARFDIE LookupAddress(const dw_addr_t address);
|
|
|
|
bool Verify(lldb_private::Stream *s) const;
|
2018-04-14 19:12:52 +08:00
|
|
|
virtual void Dump(lldb_private::Stream *s) const = 0;
|
2018-05-10 00:42:53 +08:00
|
|
|
/// Get the data that contains the DIE information for this unit.
|
|
|
|
///
|
|
|
|
/// This will return the correct bytes that contain the data for
|
|
|
|
/// this DWARFUnit. It could be .debug_info or .debug_types
|
|
|
|
/// depending on where the data for this unit originates.
|
|
|
|
///
|
2019-03-12 01:09:29 +08:00
|
|
|
/// \return
|
2018-05-10 00:42:53 +08:00
|
|
|
/// The correct data for the DIE information in this unit.
|
2019-05-21 15:22:34 +08:00
|
|
|
const lldb_private::DWARFDataExtractor &GetData() const;
|
|
|
|
|
|
|
|
/// Get the size in bytes of the unit header.
|
2018-05-10 00:42:53 +08:00
|
|
|
///
|
2019-03-12 01:09:29 +08:00
|
|
|
/// \return
|
2019-05-21 15:22:34 +08:00
|
|
|
/// Byte size of the unit header
|
|
|
|
uint32_t GetHeaderByteSize() const;
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
// Offset of the initial length field.
|
2019-05-21 15:22:34 +08:00
|
|
|
dw_offset_t GetOffset() const { return m_header.GetOffset(); }
|
2018-05-10 00:42:53 +08:00
|
|
|
/// Get the size in bytes of the length field in the header.
|
|
|
|
///
|
2019-03-13 04:51:05 +08:00
|
|
|
/// In DWARF32 this is just 4 bytes
|
2018-05-10 00:42:53 +08:00
|
|
|
///
|
2019-03-12 01:09:29 +08:00
|
|
|
/// \return
|
2018-05-10 00:42:53 +08:00
|
|
|
/// Byte size of the compile unit header length field
|
2019-03-13 04:51:05 +08:00
|
|
|
size_t GetLengthByteSize() const { return 4; }
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
bool ContainsDIEOffset(dw_offset_t die_offset) const {
|
|
|
|
return die_offset >= GetFirstDIEOffset() &&
|
2019-05-11 01:14:37 +08:00
|
|
|
die_offset < GetNextUnitOffset();
|
2018-03-19 04:09:02 +08:00
|
|
|
}
|
2018-05-10 00:42:53 +08:00
|
|
|
dw_offset_t GetFirstDIEOffset() const {
|
2019-05-21 15:22:34 +08:00
|
|
|
return GetOffset() + GetHeaderByteSize();
|
2018-05-10 00:42:53 +08:00
|
|
|
}
|
2019-05-21 15:22:34 +08:00
|
|
|
dw_offset_t GetNextUnitOffset() const { return m_header.GetNextUnitOffset(); }
|
2018-03-19 04:09:02 +08:00
|
|
|
// Size of the CU data (without initial length and without header).
|
|
|
|
size_t GetDebugInfoSize() const;
|
|
|
|
// Size of the CU data incl. header but without initial length.
|
2019-05-21 15:22:34 +08:00
|
|
|
uint32_t GetLength() const { return m_header.GetLength(); }
|
|
|
|
uint16_t GetVersion() const { return m_header.GetVersion(); }
|
2018-03-19 04:09:02 +08:00
|
|
|
const DWARFAbbreviationDeclarationSet *GetAbbreviations() const;
|
|
|
|
dw_offset_t GetAbbrevOffset() const;
|
2019-05-21 15:22:34 +08:00
|
|
|
uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); }
|
2018-05-10 00:42:53 +08:00
|
|
|
dw_addr_t GetAddrBase() const { return m_addr_base; }
|
2019-06-12 19:29:50 +08:00
|
|
|
dw_addr_t GetBaseAddress() const { return m_base_addr; }
|
|
|
|
dw_offset_t GetLineTableOffset();
|
2018-05-10 00:42:53 +08:00
|
|
|
dw_addr_t GetRangesBase() const { return m_ranges_base; }
|
2018-11-29 20:44:10 +08:00
|
|
|
dw_addr_t GetStrOffsetsBase() const { return m_str_offsets_base; }
|
2018-10-31 18:14:03 +08:00
|
|
|
void SetAddrBase(dw_addr_t addr_base);
|
2019-11-22 21:42:28 +08:00
|
|
|
void SetLoclistsBase(dw_addr_t loclists_base);
|
2018-10-31 18:14:03 +08:00
|
|
|
void SetRangesBase(dw_addr_t ranges_base);
|
2018-11-29 20:44:10 +08:00
|
|
|
void SetStrOffsetsBase(dw_offset_t str_offsets_base);
|
DWARF: Don't compute address ranges for type units
Summary:
Type units don't describe any code, so they should never be the result
of any address lookup queries.
Previously, we would compute the address ranges for the type units for
via the line tables they reference because the type units looked a lot
like line-tables-only compile units. However, this is not correct, as
the line tables are only referenced from type units so that other
declarations can use the file names contained in them.
In this patch I make the BuildAddressRangeTable function virtual, and
implement it only for compile units.
Testing this was a bit tricky, because the behavior depends on the order
in which we add things to the address range map. This rarely caused a
problem with DWARF v4 type units, as they are always added after all
CUs. It happened more frequently with DWARF v5, as there clang emits the
type units first. However, this is still not something that it is
required to do, so for testing I've created an assembly file where I've
deliberately sandwiched a compile unit between two type units, which
should isolate us from both changes in how the compiler emits the units
and changes in the order we process them.
Reviewers: clayborg, aprantl, JDevlieghere
Subscribers: jdoerfert, lldb-commits
Differential Revision: https://reviews.llvm.org/D62178
llvm-svn: 361465
2019-05-23 17:07:51 +08:00
|
|
|
virtual void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) = 0;
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
lldb::ByteOrder GetByteOrder() const;
|
|
|
|
|
2018-04-14 19:12:52 +08:00
|
|
|
const DWARFDebugAranges &GetFunctionAranges();
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
void SetBaseAddress(dw_addr_t base_addr);
|
|
|
|
|
2018-05-25 04:51:13 +08:00
|
|
|
DWARFBaseDIE GetUnitDIEOnly() { return DWARFDIE(this, GetUnitDIEPtrOnly()); }
|
2018-03-19 04:09:02 +08:00
|
|
|
|
2018-04-14 19:12:52 +08:00
|
|
|
DWARFDIE DIE() { return DWARFDIE(this, DIEPtr()); }
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
DWARFDIE GetDIE(dw_offset_t die_offset);
|
|
|
|
|
DWARF: Add "dwo_num" field to the DIERef class
Summary:
When dwo support was introduced, it used a trick where debug info
entries were referenced by the offset of the compile unit in the main
file, but the die offset was relative to the dwo file. Although there
was some elegance to it, this representation was starting to reach its
breaking point:
- the fact that the skeleton compile unit owned the DWO file meant that
it was impossible (or at least hard and unintuitive) to support DWO
files containing more than one compile unit. These kinds of files are
produced by LTO for example.
- it made it impossible to reference any DIEs in the skeleton compile
unit (although the skeleton units are generally empty, clang still
puts some info into them with -fsplit-dwarf-inlining).
- (current motivation) it made it very hard to support type units placed
in DWO files, as type units don't have any skeleton units which could
be referenced in the main file
This patch addresses this problem by introducing an new
"dwo_num" field to the DIERef class, whose purpose is to identify the
dwo file. It's kind of similar to the dwo_id field in DWARF5 unit
headers, but while this is a 64bit hash whose main purpose is to catch
file mismatches, this is just a smaller integer used to indentify a
loaded dwo file. Currently, this is based on the index of the skeleton
compile unit which owns the dwo file, but it is intended to be
eventually independent of that (to support the LTO use case).
Simultaneously the cu_offset is dropped to conserve space, as it is no
longer necessary. This means we can remove the "BaseObjectOffset" field
from the DWARFUnit class. It also means we can remove some of the
workarounds put in place to support the skeleton-unit+dwo-die combo.
More work is needed to remove all of them, which is out of scope of this
patch.
Reviewers: JDevlieghere, clayborg, aprantl
Subscribers: mehdi_amini, dexonsmith, arphaman, lldb-commits
Differential Revision: https://reviews.llvm.org/D63428
llvm-svn: 364009
2019-06-21 15:56:50 +08:00
|
|
|
DWARFUnit &GetNonSkeletonUnit();
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
static uint8_t GetAddressByteSize(const DWARFUnit *cu);
|
|
|
|
|
|
|
|
static uint8_t GetDefaultAddressSize();
|
|
|
|
|
|
|
|
void *GetUserData() const;
|
|
|
|
|
|
|
|
void SetUserData(void *d);
|
|
|
|
|
|
|
|
bool Supports_DW_AT_APPLE_objc_complete_type();
|
|
|
|
|
|
|
|
bool DW_AT_decl_file_attributes_are_invalid();
|
|
|
|
|
|
|
|
bool Supports_unnamed_objc_bitfields();
|
|
|
|
|
2019-06-14 21:01:16 +08:00
|
|
|
SymbolFileDWARF &GetSymbolFileDWARF() const { return m_dwarf; }
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
DWARFProducer GetProducer();
|
|
|
|
|
|
|
|
uint32_t GetProducerVersionMajor();
|
|
|
|
|
|
|
|
uint32_t GetProducerVersionMinor();
|
|
|
|
|
|
|
|
uint32_t GetProducerVersionUpdate();
|
|
|
|
|
2020-01-31 22:16:31 +08:00
|
|
|
uint64_t GetDWARFLanguageType();
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
bool GetIsOptimized();
|
|
|
|
|
DWARF: Add some support for non-native directory separators
Summary:
If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.
The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.
This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.
This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.
Reviewers: clayborg, zturner, JDevlieghere
Subscribers: aprantl, lldb-commits
Differential Revision: https://reviews.llvm.org/D56543
llvm-svn: 351328
2019-01-16 20:30:41 +08:00
|
|
|
const lldb_private::FileSpec &GetCompilationDirectory();
|
2019-06-12 19:29:50 +08:00
|
|
|
const lldb_private::FileSpec &GetAbsolutePath();
|
|
|
|
lldb_private::FileSpec GetFile(size_t file_idx);
|
DWARF: Add some support for non-native directory separators
Summary:
If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.
The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.
This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.
This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.
Reviewers: clayborg, zturner, JDevlieghere
Subscribers: aprantl, lldb-commits
Differential Revision: https://reviews.llvm.org/D56543
llvm-svn: 351328
2019-01-16 20:30:41 +08:00
|
|
|
lldb_private::FileSpec::Style GetPathStyle();
|
|
|
|
|
2019-12-21 22:29:57 +08:00
|
|
|
SymbolFileDWARFDwo *GetDwoSymbolFile();
|
2018-03-19 04:09:02 +08:00
|
|
|
|
2018-05-24 20:12:49 +08:00
|
|
|
die_iterator_range dies() {
|
2018-05-30 01:17:46 +08:00
|
|
|
ExtractDIEsIfNeeded();
|
2018-05-24 20:12:49 +08:00
|
|
|
return die_iterator_range(m_die_array.begin(), m_die_array.end());
|
|
|
|
}
|
|
|
|
|
2019-05-22 17:09:39 +08:00
|
|
|
DIERef::Section GetDebugSection() const { return m_section; }
|
DWARF: Add ability to reference debug info coming from multiple sections
Summary:
This patch adds the ability to precisely address debug info in
situations when a single file can have more than one debug-info-bearing
sections (as is the case with type units in DWARF v4).
The changes here can be classified into roughly three categories:
- the code which addresses a debug info by offset gets an additional
argument, which specifies the section one should look into.
- the DIERef class also gets an additional member variable specifying
the section. This way, code dealing with DIERefs can know which
section is the object referring to.
- the user_id_t encoding steals one bit from the dwarf_id field to store
the section. This means the total number of separate object files
(apple .o, or normal .dwo) is limited to 2 billion, but that is fine
as it's not possible to hit that number without switching to DWARF64
anyway.
This patch is functionally equivalent to (and inspired by) the two
patches (D61503 and D61504) by Jan Kratochvil, but there are differences
in the implementation:
- it uses an enum instead of a bool flag to differentiate the sections
- it increases the size of DIERef struct instead of reducing the amount
of addressable debug info
- it sets up DWARFDebugInfo to store the units in a single vector
instead of two. This sets us up for the future in which type units can
also live in the debug_info section, and I believe it's cleaner
because there's no need for unit index remapping
There are no tests with this patch as this is essentially NFC until
we start parsing type units from the debug_types section.
Reviewers: JDevlieghere, clayborg, aprantl
Subscribers: arphaman, jankratochvil, lldb-commits
Differential Revision: https://reviews.llvm.org/D61908
llvm-svn: 360872
2019-05-16 19:07:58 +08:00
|
|
|
|
2019-05-24 16:11:12 +08:00
|
|
|
uint8_t GetUnitType() const { return m_header.GetUnitType(); }
|
2019-06-12 19:29:50 +08:00
|
|
|
bool IsTypeUnit() const { return m_header.IsTypeUnit(); }
|
2019-05-24 16:11:12 +08:00
|
|
|
|
2019-05-29 17:22:36 +08:00
|
|
|
/// Return a list of address ranges resulting from a (possibly encoded)
|
|
|
|
/// range list starting at a given offset in the appropriate ranges section.
|
[lldb/DWARF] Switch to llvm debug_rnglists parser
Summary:
Our rnglist support was working only for the trivial cases (one CU),
because we only ever parsed one contribution out of the debug_rnglists
section. This means we were never able to resolve range lists for the
second and subsequent units (DW_FORM_sec_offset references came out
blang, and DW_FORM_rnglistx references always used the ranges lists from
the first unit).
Since both llvm and lldb rnglist parsers are sufficiently
self-contained, and operate similarly, we can fix this problem by
switching to the llvm parser instead. Besides the changes which are due
to variations in the interface, the main thing is that now the range
list object is a member of the DWARFUnit, instead of the entire symbol
file. This ensures that each unit can get it's own private set of range
list indices, and is consistent with how llvm's DWARFUnit does it
(overall, I've tried to structure the code the same way as the llvm
version).
I've also added a test case for the two unit scenario.
Reviewers: JDevlieghere, aprantl, clayborg
Subscribers: dblaikie, lldb-commits
Tags: #lldb
Differential Revision: https://reviews.llvm.org/D71021
2019-12-04 23:06:44 +08:00
|
|
|
llvm::Expected<DWARFRangeList> FindRnglistFromOffset(dw_offset_t offset);
|
2019-05-29 17:22:36 +08:00
|
|
|
|
|
|
|
/// Return a list of address ranges retrieved from an encoded range
|
|
|
|
/// list whose offset is found via a table lookup given an index (DWARF v5
|
|
|
|
/// and later).
|
[lldb/DWARF] Switch to llvm debug_rnglists parser
Summary:
Our rnglist support was working only for the trivial cases (one CU),
because we only ever parsed one contribution out of the debug_rnglists
section. This means we were never able to resolve range lists for the
second and subsequent units (DW_FORM_sec_offset references came out
blang, and DW_FORM_rnglistx references always used the ranges lists from
the first unit).
Since both llvm and lldb rnglist parsers are sufficiently
self-contained, and operate similarly, we can fix this problem by
switching to the llvm parser instead. Besides the changes which are due
to variations in the interface, the main thing is that now the range
list object is a member of the DWARFUnit, instead of the entire symbol
file. This ensures that each unit can get it's own private set of range
list indices, and is consistent with how llvm's DWARFUnit does it
(overall, I've tried to structure the code the same way as the llvm
version).
I've also added a test case for the two unit scenario.
Reviewers: JDevlieghere, aprantl, clayborg
Subscribers: dblaikie, lldb-commits
Tags: #lldb
Differential Revision: https://reviews.llvm.org/D71021
2019-12-04 23:06:44 +08:00
|
|
|
llvm::Expected<DWARFRangeList> FindRnglistFromIndex(uint32_t index);
|
|
|
|
|
|
|
|
/// Return a rangelist's offset based on an index. The index designates
|
|
|
|
/// an entry in the rangelist table's offset array and is supplied by
|
|
|
|
/// DW_FORM_rnglistx.
|
|
|
|
llvm::Optional<uint64_t> GetRnglistOffset(uint32_t Index) const {
|
|
|
|
if (!m_rnglist_table)
|
|
|
|
return llvm::None;
|
|
|
|
if (llvm::Optional<uint64_t> off = m_rnglist_table->getOffsetEntry(Index))
|
|
|
|
return *off + m_ranges_base;
|
|
|
|
return llvm::None;
|
|
|
|
}
|
2019-05-29 17:22:36 +08:00
|
|
|
|
2019-11-22 21:42:28 +08:00
|
|
|
llvm::Optional<uint64_t> GetLoclistOffset(uint32_t Index) {
|
|
|
|
if (!m_loclist_table_header)
|
|
|
|
return llvm::None;
|
|
|
|
|
|
|
|
llvm::Optional<uint64_t> Offset = m_loclist_table_header->getOffsetEntry(Index);
|
|
|
|
if (!Offset)
|
|
|
|
return llvm::None;
|
|
|
|
return *Offset + m_loclists_base;
|
|
|
|
}
|
|
|
|
|
2019-12-20 16:42:37 +08:00
|
|
|
/// Return the location table for parsing the given location list data. The
|
|
|
|
/// format is chosen according to the unit type. Never returns null.
|
|
|
|
std::unique_ptr<llvm::DWARFLocationTable>
|
|
|
|
GetLocationTable(const lldb_private::DataExtractor &data) const;
|
|
|
|
|
|
|
|
const lldb_private::DWARFDataExtractor &GetLocationData() const;
|
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
protected:
|
2019-06-14 21:01:16 +08:00
|
|
|
DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid,
|
2019-05-21 15:22:34 +08:00
|
|
|
const DWARFUnitHeader &header,
|
2019-05-22 17:09:39 +08:00
|
|
|
const DWARFAbbreviationDeclarationSet &abbrevs,
|
2019-12-18 00:12:29 +08:00
|
|
|
DIERef::Section section, bool is_dwo);
|
2019-05-22 17:09:39 +08:00
|
|
|
|
2019-06-14 21:01:16 +08:00
|
|
|
llvm::Error ExtractHeader(SymbolFileDWARF &dwarf,
|
2019-05-22 17:09:39 +08:00
|
|
|
const lldb_private::DWARFDataExtractor &data,
|
|
|
|
lldb::offset_t *offset_ptr);
|
2018-04-14 19:12:52 +08:00
|
|
|
|
DWARF: Don't compute address ranges for type units
Summary:
Type units don't describe any code, so they should never be the result
of any address lookup queries.
Previously, we would compute the address ranges for the type units for
via the line tables they reference because the type units looked a lot
like line-tables-only compile units. However, this is not correct, as
the line tables are only referenced from type units so that other
declarations can use the file names contained in them.
In this patch I make the BuildAddressRangeTable function virtual, and
implement it only for compile units.
Testing this was a bit tricky, because the behavior depends on the order
in which we add things to the address range map. This rarely caused a
problem with DWARF v4 type units, as they are always added after all
CUs. It happened more frequently with DWARF v5, as there clang emits the
type units first. However, this is still not something that it is
required to do, so for testing I've created an assembly file where I've
deliberately sandwiched a compile unit between two type units, which
should isolate us from both changes in how the compiler emits the units
and changes in the order we process them.
Reviewers: clayborg, aprantl, JDevlieghere
Subscribers: jdoerfert, lldb-commits
Differential Revision: https://reviews.llvm.org/D62178
llvm-svn: 361465
2019-05-23 17:07:51 +08:00
|
|
|
// Get the DWARF unit DWARF debug information entry. Parse the single DIE
|
|
|
|
// if needed.
|
|
|
|
const DWARFDebugInfoEntry *GetUnitDIEPtrOnly() {
|
|
|
|
ExtractUnitDIEIfNeeded();
|
|
|
|
// m_first_die_mutex is not required as m_first_die is never cleared.
|
|
|
|
if (!m_first_die)
|
|
|
|
return NULL;
|
|
|
|
return &m_first_die;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get all DWARF debug informration entries. Parse all DIEs if needed.
|
|
|
|
const DWARFDebugInfoEntry *DIEPtr() {
|
|
|
|
ExtractDIEsIfNeeded();
|
|
|
|
if (m_die_array.empty())
|
|
|
|
return NULL;
|
|
|
|
return &m_die_array[0];
|
|
|
|
}
|
|
|
|
|
2019-06-14 21:01:16 +08:00
|
|
|
SymbolFileDWARF &m_dwarf;
|
2018-04-14 19:12:52 +08:00
|
|
|
std::unique_ptr<SymbolFileDWARFDwo> m_dwo_symbol_file;
|
2019-05-21 15:22:34 +08:00
|
|
|
DWARFUnitHeader m_header;
|
2018-04-14 19:12:52 +08:00
|
|
|
const DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr;
|
|
|
|
void *m_user_data = nullptr;
|
|
|
|
// The compile unit debug information entry item
|
|
|
|
DWARFDebugInfoEntry::collection m_die_array;
|
2018-06-05 16:52:18 +08:00
|
|
|
mutable llvm::sys::RWMutex m_die_array_mutex;
|
|
|
|
// It is used for tracking of ScopedExtractDIEs instances.
|
|
|
|
mutable llvm::sys::RWMutex m_die_array_scoped_mutex;
|
|
|
|
// ScopedExtractDIEs instances should not call ClearDIEsRWLocked()
|
|
|
|
// as someone called ExtractDIEsIfNeeded().
|
|
|
|
std::atomic<bool> m_cancel_scopes;
|
2018-05-30 01:17:46 +08:00
|
|
|
// GetUnitDIEPtrOnly() needs to return pointer to the first DIE.
|
|
|
|
// But the first element of m_die_array after ExtractUnitDIEIfNeeded()
|
|
|
|
// would possibly move in memory after later ExtractDIEsIfNeeded().
|
|
|
|
DWARFDebugInfoEntry m_first_die;
|
2018-06-05 16:52:18 +08:00
|
|
|
llvm::sys::RWMutex m_first_die_mutex;
|
2018-04-14 19:12:52 +08:00
|
|
|
// A table similar to the .debug_aranges table, but this one points to the
|
|
|
|
// exact DW_TAG_subprogram DIEs
|
2019-02-13 14:25:41 +08:00
|
|
|
std::unique_ptr<DWARFDebugAranges> m_func_aranges_up;
|
2018-04-14 19:12:52 +08:00
|
|
|
dw_addr_t m_base_addr = 0;
|
|
|
|
DWARFProducer m_producer = eProducerInvalid;
|
|
|
|
uint32_t m_producer_version_major = 0;
|
|
|
|
uint32_t m_producer_version_minor = 0;
|
|
|
|
uint32_t m_producer_version_update = 0;
|
2020-01-31 22:16:31 +08:00
|
|
|
llvm::Optional<uint64_t> m_language_type;
|
2018-04-14 19:12:52 +08:00
|
|
|
lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate;
|
DWARF: Add some support for non-native directory separators
Summary:
If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.
The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.
This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.
This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.
Reviewers: clayborg, zturner, JDevlieghere
Subscribers: aprantl, lldb-commits
Differential Revision: https://reviews.llvm.org/D56543
llvm-svn: 351328
2019-01-16 20:30:41 +08:00
|
|
|
llvm::Optional<lldb_private::FileSpec> m_comp_dir;
|
2019-06-12 19:29:50 +08:00
|
|
|
llvm::Optional<lldb_private::FileSpec> m_file_spec;
|
2019-11-22 21:42:28 +08:00
|
|
|
dw_addr_t m_addr_base = 0; ///< Value of DW_AT_addr_base.
|
|
|
|
dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base.
|
|
|
|
dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base.
|
2019-06-12 19:29:50 +08:00
|
|
|
|
|
|
|
/// Value of DW_AT_stmt_list.
|
|
|
|
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
|
|
|
|
|
2018-11-29 20:44:10 +08:00
|
|
|
dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
|
[lldb/DWARF] Switch to llvm debug_rnglists parser
Summary:
Our rnglist support was working only for the trivial cases (one CU),
because we only ever parsed one contribution out of the debug_rnglists
section. This means we were never able to resolve range lists for the
second and subsequent units (DW_FORM_sec_offset references came out
blang, and DW_FORM_rnglistx references always used the ranges lists from
the first unit).
Since both llvm and lldb rnglist parsers are sufficiently
self-contained, and operate similarly, we can fix this problem by
switching to the llvm parser instead. Besides the changes which are due
to variations in the interface, the main thing is that now the range
list object is a member of the DWARFUnit, instead of the entire symbol
file. This ensures that each unit can get it's own private set of range
list indices, and is consistent with how llvm's DWARFUnit does it
(overall, I've tried to structure the code the same way as the llvm
version).
I've also added a test case for the two unit scenario.
Reviewers: JDevlieghere, aprantl, clayborg
Subscribers: dblaikie, lldb-commits
Tags: #lldb
Differential Revision: https://reviews.llvm.org/D71021
2019-12-04 23:06:44 +08:00
|
|
|
|
|
|
|
llvm::Optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
|
2019-11-22 21:42:28 +08:00
|
|
|
llvm::Optional<llvm::DWARFListTableHeader> m_loclist_table_header;
|
[lldb/DWARF] Switch to llvm debug_rnglists parser
Summary:
Our rnglist support was working only for the trivial cases (one CU),
because we only ever parsed one contribution out of the debug_rnglists
section. This means we were never able to resolve range lists for the
second and subsequent units (DW_FORM_sec_offset references came out
blang, and DW_FORM_rnglistx references always used the ranges lists from
the first unit).
Since both llvm and lldb rnglist parsers are sufficiently
self-contained, and operate similarly, we can fix this problem by
switching to the llvm parser instead. Besides the changes which are due
to variations in the interface, the main thing is that now the range
list object is a member of the DWARFUnit, instead of the entire symbol
file. This ensures that each unit can get it's own private set of range
list indices, and is consistent with how llvm's DWARFUnit does it
(overall, I've tried to structure the code the same way as the llvm
version).
I've also added a test case for the two unit scenario.
Reviewers: JDevlieghere, aprantl, clayborg
Subscribers: dblaikie, lldb-commits
Tags: #lldb
Differential Revision: https://reviews.llvm.org/D71021
2019-12-04 23:06:44 +08:00
|
|
|
|
2019-05-22 17:09:39 +08:00
|
|
|
const DIERef::Section m_section;
|
2019-12-18 00:12:29 +08:00
|
|
|
bool m_is_dwo;
|
2018-03-19 04:09:02 +08:00
|
|
|
|
|
|
|
private:
|
2018-04-14 19:12:52 +08:00
|
|
|
void ParseProducerInfo();
|
2018-06-05 16:52:18 +08:00
|
|
|
void ExtractDIEsRWLocked();
|
|
|
|
void ClearDIEsRWLocked();
|
2018-04-14 19:12:52 +08:00
|
|
|
|
2018-05-30 01:17:46 +08:00
|
|
|
void AddUnitDIE(const DWARFDebugInfoEntry &cu_die);
|
2018-03-19 04:09:02 +08:00
|
|
|
|
DWARF: Add some support for non-native directory separators
Summary:
If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.
The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.
This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.
This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.
Reviewers: clayborg, zturner, JDevlieghere
Subscribers: aprantl, lldb-commits
Differential Revision: https://reviews.llvm.org/D56543
llvm-svn: 351328
2019-01-16 20:30:41 +08:00
|
|
|
void ComputeCompDirAndGuessPathStyle();
|
2019-06-12 19:29:50 +08:00
|
|
|
void ComputeAbsolutePath();
|
DWARF: Add some support for non-native directory separators
Summary:
If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.
The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.
This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.
This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.
Reviewers: clayborg, zturner, JDevlieghere
Subscribers: aprantl, lldb-commits
Differential Revision: https://reviews.llvm.org/D56543
llvm-svn: 351328
2019-01-16 20:30:41 +08:00
|
|
|
|
2018-03-19 04:09:02 +08:00
|
|
|
DISALLOW_COPY_AND_ASSIGN(DWARFUnit);
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif // SymbolFileDWARF_DWARFUnit_h_
|