llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

2069 lines
76 KiB
C++
Raw Normal View History

//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains support for writing dwarf debug info into asm files.
//
//===----------------------------------------------------------------------===//
#include "DwarfDebug.h"
#include "ByteStreamer.h"
#include "DIEHash.h"
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
#include "DebugLocEntry.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "DwarfUnit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
2013-07-24 06:16:41 +08:00
static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
static cl::opt<bool>
GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate GNU-style pubnames and pubtypes"),
cl::init(false));
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
cl::desc("Generate dwarf aranges"),
cl::init(false));
namespace {
2014-01-28 07:50:03 +08:00
enum DefaultOnOff { Default, Enable, Disable };
}
static cl::opt<DefaultOnOff> UnknownLocations(
"use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::values(clEnumVal(Default, "At top of block or after label"),
clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
cl::init(Default));
2013-07-24 06:16:41 +08:00
static cl::opt<DefaultOnOff>
DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::desc("Output prototype dwarf accelerator tables."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
clEnumVal(Disable, "Disabled")),
2013-07-24 06:16:41 +08:00
cl::init(Default));
2013-07-24 06:16:41 +08:00
static cl::opt<DefaultOnOff>
SplitDwarf("split-dwarf", cl::Hidden,
2013-12-05 07:24:28 +08:00
cl::desc("Output DWARF5 split debug info."),
2013-07-24 06:16:41 +08:00
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
clEnumVal(Disable, "Disabled")),
2013-07-24 06:16:41 +08:00
cl::init(Default));
static cl::opt<DefaultOnOff>
DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate DWARF pubnames and pubtypes sections"),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
clEnumVal(Disable, "Disabled")),
cl::init(Default));
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
AbstractLinkageNames
};
static cl::opt<LinkageNameOption>
DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
cl::desc("Which DWARF linkage-name attributes to emit."),
cl::values(clEnumValN(DefaultLinkageNames, "Default",
"Default for platform"),
clEnumValN(AllLinkageNames, "All", "All"),
clEnumValN(AbstractLinkageNames, "Abstract",
"Abstract subprograms")),
cl::init(DefaultLinkageNames));
static const char *const DWARFGroupName = "dwarf";
static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
void DebugLocDwarfExpression::emitSigned(int64_t Value) {
BS.EmitSLEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
// This information is not available while emitting .debug_loc entries.
return false;
}
//===----------------------------------------------------------------------===//
bool DbgVariable::isBlockByrefVariable() const {
assert(Var && "Invalid complex DbgVariable!");
return Var->getType().resolve()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
DIType *Ty = Var->getType().resolve();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
/* Byref variables, in Blocks, are declared by the programmer as
"SomeType VarName;", but the compiler creates a
__Block_byref_x_VarName struct, and gives the variable VarName
either the struct, or a pointer to the struct, as its type. This
is necessary for various behind-the-scenes things the compiler
needs to do with by-reference variables in blocks.
However, as far as the original *programmer* is concerned, the
variable should still have type 'SomeType', as originally declared.
The following function dives into the __Block_byref_x_VarName
struct to find the original type of the variable. This will be
passed back to the code generating the type for the Debug
Information Entry for the variable 'VarName'. 'VarName' will then
have the original type 'SomeType' in its debug information.
The original type 'SomeType' will be the type of the field named
'VarName' inside the __Block_byref_x_VarName struct.
NOTE: In order for this to not completely fail on the debugger
side, the Debug Information Entry for the variable VarName needs to
have a DW_AT_location that tells the debugger how to unwind through
the pointers and __Block_byref_x_VarName struct to find the actual
value of the variable. The function addBlockByrefType does this. */
DIType *subType = Ty;
uint16_t tag = Ty->getTag();
2013-09-05 03:53:21 +08:00
if (tag == dwarf::DW_TAG_pointer_type)
subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
auto Elements = cast<DICompositeType>(subType)->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
auto *DT = cast<DIDerivedType>(Elements[i]);
if (getName() == DT->getName())
return resolve(DT->getBaseType());
}
}
return Ty;
}
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
if (FrameIndexExprs.size() == 1)
return FrameIndexExprs;
assert(all_of(FrameIndexExprs,
[](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
B.Expr->getFragmentInfo()->OffsetInBits;
});
return FrameIndexExprs;
}
static const DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(A->TM.getTargetTriple().isOSDarwin()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
2014-04-24 14:44:33 +08:00
CurFn = nullptr;
const Triple &TT = Asm->TM.getTargetTriple();
// Make sure we know our "debugger tuning." The target option takes
// precedence; fall back to triple-based defaults.
if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
else if (IsDarwin)
DebuggerTuning = DebuggerKind::LLDB;
else if (TT.isPS4CPU())
DebuggerTuning = DebuggerKind::SCE;
else
DebuggerTuning = DebuggerKind::GDB;
// Turn on accelerator tables for LLDB by default.
if (DwarfAccelTables == Default)
HasDwarfAccelTables = tuneForLLDB();
else
2013-08-27 04:58:35 +08:00
HasDwarfAccelTables = DwarfAccelTables == Enable;
HasAppleExtensionAttributes = tuneForLLDB();
// Handle split DWARF. Off by default for now.
if (SplitDwarf == Default)
HasSplitDwarf = false;
else
HasSplitDwarf = SplitDwarf == Enable;
// Pubnames/pubtypes on by default for GDB.
if (DwarfPubSections == Default)
HasDwarfPubSections = tuneForGDB();
else
HasDwarfPubSections = DwarfPubSections == Enable;
// SCE defaults to linkage names only for abstract subprograms.
if (DwarfLinkageNames == DefaultLinkageNames)
UseAllLinkageNames = !tuneForSCE();
else
UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
// Use dwarf 4 by default if nothing is requested.
DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
// Work around a GDB bug. GDB doesn't support the standard opcode;
// SCE doesn't support GNU's; LLDB prefers the standard opcode, which
// is defined as of DWARF 3.
// See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
// https://sourceware.org/bugzilla/show_bug.cgi?id=11616
UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
// GDB does not fully support the DWARF 4 representation for bitfields.
UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
DwarfDebug::~DwarfDebug() { }
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
}
static bool hasObjCCategory(StringRef Name) {
2013-11-19 17:04:36 +08:00
if (!isObjCClass(Name))
return false;
return Name.find(") ") != StringRef::npos;
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
StringRef &Category) {
if (!hasObjCCategory(In)) {
Class = In.slice(In.find('[') + 1, In.find(' '));
Category = "";
return;
}
Class = In.slice(In.find('[') + 1, In.find('('));
Category = In.slice(In.find('[') + 1, In.find(' '));
}
static StringRef getObjCMethodName(StringRef In) {
return In.slice(In.find(' ') + 1, In.find(']'));
}
// Add the various names to the Dwarf accelerator table names.
// TODO: Determine whether or not we should add names for programs
// that do not have a DW_AT_name or DW_AT_linkage_name field - this
// is only slightly different than the lookup of non-standard ObjC names.
void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
if (!SP->isDefinition())
2013-11-19 17:04:36 +08:00
return;
addAccelName(SP->getName(), Die);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
addAccelName(SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP->getName())) {
StringRef Class, Category;
getObjCClassCategory(SP->getName(), Class, Category);
addAccelObjC(Class, Die);
if (Category != "")
addAccelObjC(Category, Die);
// Also add the base method name to the name table.
addAccelName(getObjCMethodName(SP->getName()), Die);
}
}
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
if (Scope->isAbstractScope())
return false;
// We don't create a DIE if there is no Range.
const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
if (Ranges.empty())
return true;
if (Ranges.size() > 1)
return false;
// We don't create a DIE if we have a single Range and the end label
// is null.
return !getLabelAfterInsn(Ranges.front().second);
}
template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
F(CU);
if (auto *SkelCU = CU.getSkeleton())
if (CU.getCUNode()->getSplitDebugInlining())
F(*SkelCU);
}
void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
ProcessedSPNodes.insert(SP);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
auto &CU = *CUMap.lookup(SP->getUnit());
forBothCUs(CU, [&](DwarfCompileUnit &CU) {
CU.constructAbstractSubprogramScopeDIE(Scope);
});
}
void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
if (!GenerateGnuPubSections)
return;
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
}
// Create new DwarfCompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
DwarfCompileUnit &
DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
StringRef FN = DIUnit->getFilename();
CompilationDir = DIUnit->getDirectory();
auto OwnedUnit = make_unique<DwarfCompileUnit>(
InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
DIE &Die = NewCU.getUnitDie();
InfoHolder.addUnit(std::move(OwnedUnit));
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
DIUnit->getSplitDebugFilename());
}
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
// explicitly describe the directory of all files, never relying on the
// compilation directory.
if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
NewCU.getUniqueID(), CompilationDir);
StringRef Producer = DIUnit->getProducer();
StringRef Flags = DIUnit->getFlags();
if (!Flags.empty()) {
std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
} else
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
if (!useSplitDwarf()) {
NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
// skeleton CU and so we don't need to duplicate it here.
if (!CompilationDir.empty())
NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(NewCU, Die);
}
if (useAppleExtensionAttributes()) {
if (DIUnit->isOptimized())
NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
StringRef Flags = DIUnit->getFlags();
if (!Flags.empty())
NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
if (unsigned RVer = DIUnit->getRuntimeVersion())
NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
}
if (useSplitDwarf())
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
else
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
if (DIUnit->getDWOId()) {
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
DIUnit->getDWOId());
if (!DIUnit->getSplitDebugFilename().empty())
// This is a prefabricated skeleton CU.
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
DIUnit->getSplitDebugFilename());
}
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&Die, &NewCU});
return NewCU;
}
void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity *N) {
if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
D->addChild(TheCU.constructImportedEntityDIE(N));
}
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
std::sort(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
if (A.Expr != B.Expr && A.Expr && B.Expr) {
auto FragmentA = A.Expr->getFragmentInfo();
auto FragmentB = B.Expr->getFragmentInfo();
if (FragmentA && FragmentB)
return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
}
return false;
});
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
DwarfCompileUnit::GlobalExpr B) {
return A.Expr == B.Expr;
}),
GVEs.end());
return GVEs;
}
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
void DwarfDebug::beginModule() {
NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
DWARFGroupDescription, TimePassesIsEnabled);
if (DisableDebugInfoPrinting)
return;
const Module *M = MMI->getModule();
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
// Tell MMI whether we have debug info.
MMI->setDebugInfoAvailability(NumDebugCUs > 0);
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
for (const GlobalVariable &Global : M->globals()) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
Global.getDebugInfo(GVs);
for (auto *GVE : GVs)
GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
}
for (DICompileUnit *CUNode : M->debug_compile_units()) {
DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
CU.addImportedEntity(IE);
// Global Variables.
for (auto *GVE : CUNode->getGlobalVariables())
GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
DenseSet<DIGlobalVariable *> Processed;
for (auto *GVE : CUNode->getGlobalVariables()) {
DIGlobalVariable *GV = GVE->getVariable();
if (Processed.insert(GV).second)
CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
}
for (auto *Ty : CUNode->getEnumTypes()) {
// The enum types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
CU.getOrCreateTypeDIE(cast<DIType>(Ty));
}
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
for (auto *IE : CUNode->getImportedEntities())
constructAndAddImportedEntityDIE(CU, IE);
}
}
void DwarfDebug::finishVariableDefinitions() {
for (const auto &Var : ConcreteVariables) {
DIE *VariableDie = Var->getDIE();
assert(VariableDie);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
// in the ConcreteVariables list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
assert(Unit);
DbgVariable *AbsVar = getExistingAbstractVariable(
InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
if (AbsVar && AbsVar->getDIE()) {
Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
*AbsVar->getDIE());
} else
Unit->applyVariableAttributes(*Var, *VariableDie);
}
}
void DwarfDebug::finishSubprogramDefinitions() {
for (const DISubprogram *SP : ProcessedSPNodes)
if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
CU.finishSubprogramDefinition(SP);
});
}
void DwarfDebug::finalizeModuleInfo() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
finishSubprogramDefinitions();
finishVariableDefinitions();
2013-12-05 07:24:38 +08:00
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
TheCU.constructContainingTypeDIEs();
// Add CU specific attributes if we need to add any.
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
if (useSplitDwarf()) {
// Emit a unique identifier for this CU.
uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
if (!AddrPool.isEmpty()) {
const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
Sym, Sym);
}
if (!SkCU->getRangeLists().empty()) {
const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
Sym, Sym);
}
}
// If we have code split among multiple sections or non-contiguous
// ranges of code then emit a DW_AT_ranges attribute on the unit that will
// remain in the .o file, otherwise add a DW_AT_low_pc.
// FIXME: We should use ranges allow reordering of code ala
// .subsections_via_symbols in mach-o. This would mean turning on
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1)
// A DW_AT_low_pc attribute may also be specified in combination with
// DW_AT_ranges to specify the default base address for use in
// location lists (see Section 2.6.2) and range lists (see Section
// 2.17.3).
U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
else
U.setBaseAddress(TheCU.getRanges().front().getStart());
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
if (CUNode->getMacros())
U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
U.getMacroLabelBegin(),
TLOF.getDwarfMacinfoSection()->getBeginSymbol());
}
// Compute DIE offsets and sizes.
InfoHolder.computeSizeAndOffsets();
if (useSplitDwarf())
SkeletonHolder.computeSizeAndOffsets();
}
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
2014-04-28 12:05:08 +08:00
assert(CurFn == nullptr);
assert(CurMI == nullptr);
// If we aren't actually generating debug info (check beginModule -
// conditionalized on !DisableDebugInfoPrinting and the presence of the
// llvm.dbg.cu metadata node)
if (!MMI->hasDebugInfo())
2013-11-19 17:04:36 +08:00
return;
// Finalize the debug info for the module.
finalizeModuleInfo();
emitDebugStr();
if (useSplitDwarf())
emitDebugLocDWO();
else
// Emit info into a debug loc section.
emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
// Emit all the DIEs into a debug info section.
emitDebugInfo();
// Emit info into a debug aranges section.
if (GenerateARangeSection)
emitDebugARanges();
// Emit info into a debug ranges section.
emitDebugRanges();
// Emit info into a debug macinfo section.
emitDebugMacinfo();
if (useSplitDwarf()) {
emitDebugStrDWO();
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
// Emit DWO addresses.
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
}
2012-08-23 15:32:06 +08:00
// Emit info into the dwarf accelerator table sections.
if (useDwarfAccelTables()) {
emitAccelNames();
emitAccelObjC();
emitAccelNamespaces();
emitAccelTypes();
}
// Emit the pubnames and pubtypes sections if requested.
if (HasDwarfPubSections) {
emitDebugPubNames(GenerateGnuPubSections);
emitDebugPubTypes(GenerateGnuPubSections);
}
2009-11-24 09:14:22 +08:00
// clean up.
AbstractVariables.clear();
}
// Find abstract variable, if any, associated with Var.
DbgVariable *
DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
const DILocalVariable *&Cleansed) {
// More then one inlined variable corresponds to one abstract variable.
Cleansed = IV.first;
auto I = AbstractVariables.find(Cleansed);
if (I != AbstractVariables.end())
return I->second.get();
return nullptr;
}
DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
const DILocalVariable *Cleansed;
return getExistingAbstractVariable(IV, Cleansed);
}
void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
LexicalScope *Scope) {
assert(Scope && Scope->isAbstractScope());
auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
}
void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
if (getExistingAbstractVariable(IV, Cleansed))
return;
createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
InlinedVariable IV, const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
if (getExistingAbstractVariable(IV, Cleansed))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
createAbstractVariable(Cleansed, Scope);
}
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
DenseSet<InlinedVariable> &Processed) {
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
2013-11-19 17:04:36 +08:00
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
"Expected inlined-at fields to agree");
InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
Processed.insert(Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
2014-04-24 14:44:33 +08:00
if (!Scope)
continue;
ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
ConcreteVariables.push_back(std::move(RegVar));
}
2010-05-21 03:57:06 +08:00
}
// Get .debug_loc entry for the instruction range starting at MI.
static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
2011-07-09 01:09:57 +08:00
Move the complex address expression out of DIVariable and into an extra argument of the llvm.dbg.declare/llvm.dbg.value intrinsics. Previously, DIVariable was a variable-length field that has an optional reference to a Metadata array consisting of a variable number of complex address expressions. In the case of OpPiece expressions this is wasting a lot of storage in IR, because when an aggregate type is, e.g., SROA'd into all of its n individual members, the IR will contain n copies of the DIVariable, all alike, only differing in the complex address reference at the end. By making the complex address into an extra argument of the dbg.value/dbg.declare intrinsics, all of the pieces can reference the same variable and the complex address expressions can be uniqued across the CU, too. Down the road, this will allow us to move other flags, such as "indirection" out of the DIVariable, too. The new intrinsics look like this: declare void @llvm.dbg.declare(metadata %storage, metadata %var, metadata %expr) declare void @llvm.dbg.value(metadata %storage, i64 %offset, metadata %var, metadata %expr) This patch adds a new LLVM-local tag to DIExpressions, so we can detect and pretty-print DIExpression metadata nodes. What this patch doesn't do: This patch does not touch the "Indirect" field in DIVariable; but moving that into the expression would be a natural next step. http://reviews.llvm.org/D4919 rdar://problem/17994491 Thanks to dblaikie and dexonsmith for reviewing this patch! Note: I accidentally committed a bogus older version of this patch previously. llvm-svn: 218787
2014-10-02 02:55:02 +08:00
assert(MI->getNumOperands() == 4);
if (MI->getOperand(0).isReg()) {
2011-07-09 01:09:57 +08:00
MachineLocation MLoc;
// If the second operand is an immediate, this is a
// register-indirect address.
if (!MI->getOperand(1).isImm())
MLoc.set(MI->getOperand(0).getReg());
else
MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
return DebugLocEntry::Value(Expr, MLoc);
2011-07-09 01:09:57 +08:00
}
if (MI->getOperand(0).isImm())
return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
2011-07-09 01:09:57 +08:00
if (MI->getOperand(0).isFPImm())
return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
2011-07-09 01:09:57 +08:00
if (MI->getOperand(0).isCImm())
return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
2011-07-09 01:09:57 +08:00
Move the complex address expression out of DIVariable and into an extra argument of the llvm.dbg.declare/llvm.dbg.value intrinsics. Previously, DIVariable was a variable-length field that has an optional reference to a Metadata array consisting of a variable number of complex address expressions. In the case of OpPiece expressions this is wasting a lot of storage in IR, because when an aggregate type is, e.g., SROA'd into all of its n individual members, the IR will contain n copies of the DIVariable, all alike, only differing in the complex address reference at the end. By making the complex address into an extra argument of the dbg.value/dbg.declare intrinsics, all of the pieces can reference the same variable and the complex address expressions can be uniqued across the CU, too. Down the road, this will allow us to move other flags, such as "indirection" out of the DIVariable, too. The new intrinsics look like this: declare void @llvm.dbg.declare(metadata %storage, metadata %var, metadata %expr) declare void @llvm.dbg.value(metadata %storage, i64 %offset, metadata %var, metadata %expr) This patch adds a new LLVM-local tag to DIExpressions, so we can detect and pretty-print DIExpression metadata nodes. What this patch doesn't do: This patch does not touch the "Indirect" field in DIVariable; but moving that into the expression would be a natural next step. http://reviews.llvm.org/D4919 rdar://problem/17994491 Thanks to dblaikie and dexonsmith for reviewing this patch! Note: I accidentally committed a bogus older version of this patch previously. llvm-svn: 218787
2014-10-02 02:55:02 +08:00
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
2011-07-09 01:09:57 +08:00
}
/// \brief If this and Next are describing different fragments of the same
/// variable, merge them by appending Next's values to the current
/// list of values.
/// Return true if the merge was successful.
bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
if (Begin == Next.Begin) {
auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
return false;
// We can only merge entries if none of the fragments overlap any others.
// In doing so, we can take advantage of the fact that both lists are
// sorted.
for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
for (; j < Next.Values.size(); ++j) {
int res = DebugHandlerBase::fragmentCmp(
cast<DIExpression>(Values[i].Expression),
cast<DIExpression>(Next.Values[j].Expression));
if (res == 0) // The two expressions overlap, we can't merge.
return false;
// Values[i] is entirely before Next.Values[j],
// so go back to the next entry of Values.
else if (res == -1)
break;
// Next.Values[j] is entirely before Values[i], so go on to the
// next entry of Next.Values.
}
}
addValues(Next.Values);
End = Next.End;
return true;
}
return false;
}
/// Build the location list for all DBG_VALUEs in the function that
/// describe the same variable. If the ranges of several independent
/// fragments of the same variable overlap partially, split them up and
/// combine the ranges. The resulting DebugLocEntries are will have
/// strict monotonically increasing begin addresses and will never
/// overlap.
//
// Input:
//
// Ranges History [var, loc, fragment ofs size]
// 0 | [x, (reg0, fragment 0, 32)]
// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
// 2 | | ...
// 3 | [clobber reg0]
// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
2015-02-18 04:02:28 +08:00
// x.
//
// Output:
//
// [0-1] [x, (reg0, fragment 0, 32)]
// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)]
// [3-4] [x, (reg1, fragment 32, 32)]
// [4- ] [x, (mem, fragment 0, 64)]
void
DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::InstrRanges &Ranges) {
SmallVector<DebugLocEntry::Value, 4> OpenRanges;
for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
const MachineInstr *Begin = I->first;
const MachineInstr *End = I->second;
assert(Begin->isDebugValue() && "Invalid History entry");
// Check if a variable is inaccessible in this range.
if (Begin->getNumOperands() > 1 &&
Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
OpenRanges.clear();
continue;
}
// If this fragment overlaps with any open ranges, truncate them.
const DIExpression *DIExpr = Begin->getDebugExpression();
auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
return fragmentsOverlap(DIExpr, R.getExpression());
Move the complex address expression out of DIVariable and into an extra argument of the llvm.dbg.declare/llvm.dbg.value intrinsics. Previously, DIVariable was a variable-length field that has an optional reference to a Metadata array consisting of a variable number of complex address expressions. In the case of OpPiece expressions this is wasting a lot of storage in IR, because when an aggregate type is, e.g., SROA'd into all of its n individual members, the IR will contain n copies of the DIVariable, all alike, only differing in the complex address reference at the end. By making the complex address into an extra argument of the dbg.value/dbg.declare intrinsics, all of the pieces can reference the same variable and the complex address expressions can be uniqued across the CU, too. Down the road, this will allow us to move other flags, such as "indirection" out of the DIVariable, too. The new intrinsics look like this: declare void @llvm.dbg.declare(metadata %storage, metadata %var, metadata %expr) declare void @llvm.dbg.value(metadata %storage, i64 %offset, metadata %var, metadata %expr) This patch adds a new LLVM-local tag to DIExpressions, so we can detect and pretty-print DIExpression metadata nodes. What this patch doesn't do: This patch does not touch the "Indirect" field in DIVariable; but moving that into the expression would be a natural next step. http://reviews.llvm.org/D4919 rdar://problem/17994491 Thanks to dblaikie and dexonsmith for reviewing this patch! Note: I accidentally committed a bogus older version of this patch previously. llvm-svn: 218787
2014-10-02 02:55:02 +08:00
});
OpenRanges.erase(Last, OpenRanges.end());
const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
const MCSymbol *EndLabel;
if (End != nullptr)
EndLabel = getLabelAfterInsn(End);
else if (std::next(I) == Ranges.end())
EndLabel = Asm->getFunctionEnd();
else
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
auto Value = getDebugLocValue(Begin);
DebugLocEntry Loc(StartLabel, EndLabel, Value);
bool couldMerge = false;
// If this is a fragment, it may belong to the current DebugLocEntry.
if (DIExpr->isFragment()) {
// Add this value to the list of open ranges.
OpenRanges.push_back(Value);
// Attempt to add the fragment to the last entry.
if (!DebugLoc.empty())
if (DebugLoc.back().MergeValues(Loc))
couldMerge = true;
}
if (!couldMerge) {
// Need to add a new DebugLocEntry. Add all values from still
// valid non-overlapping fragments.
if (OpenRanges.size())
Loc.addValues(OpenRanges);
DebugLoc.push_back(std::move(Loc));
}
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
auto CurEntry = DebugLoc.rbegin();
Move the complex address expression out of DIVariable and into an extra argument of the llvm.dbg.declare/llvm.dbg.value intrinsics. Previously, DIVariable was a variable-length field that has an optional reference to a Metadata array consisting of a variable number of complex address expressions. In the case of OpPiece expressions this is wasting a lot of storage in IR, because when an aggregate type is, e.g., SROA'd into all of its n individual members, the IR will contain n copies of the DIVariable, all alike, only differing in the complex address reference at the end. By making the complex address into an extra argument of the dbg.value/dbg.declare intrinsics, all of the pieces can reference the same variable and the complex address expressions can be uniqued across the CU, too. Down the road, this will allow us to move other flags, such as "indirection" out of the DIVariable, too. The new intrinsics look like this: declare void @llvm.dbg.declare(metadata %storage, metadata %var, metadata %expr) declare void @llvm.dbg.value(metadata %storage, i64 %offset, metadata %var, metadata %expr) This patch adds a new LLVM-local tag to DIExpressions, so we can detect and pretty-print DIExpression metadata nodes. What this patch doesn't do: This patch does not touch the "Indirect" field in DIVariable; but moving that into the expression would be a natural next step. http://reviews.llvm.org/D4919 rdar://problem/17994491 Thanks to dblaikie and dexonsmith for reviewing this patch! Note: I accidentally committed a bogus older version of this patch previously. llvm-svn: 218787
2014-10-02 02:55:02 +08:00
DEBUG({
dbgs() << CurEntry->getValues().size() << " Values:\n";
for (auto &Value : CurEntry->getValues())
Value.dump();
Move the complex address expression out of DIVariable and into an extra argument of the llvm.dbg.declare/llvm.dbg.value intrinsics. Previously, DIVariable was a variable-length field that has an optional reference to a Metadata array consisting of a variable number of complex address expressions. In the case of OpPiece expressions this is wasting a lot of storage in IR, because when an aggregate type is, e.g., SROA'd into all of its n individual members, the IR will contain n copies of the DIVariable, all alike, only differing in the complex address reference at the end. By making the complex address into an extra argument of the dbg.value/dbg.declare intrinsics, all of the pieces can reference the same variable and the complex address expressions can be uniqued across the CU, too. Down the road, this will allow us to move other flags, such as "indirection" out of the DIVariable, too. The new intrinsics look like this: declare void @llvm.dbg.declare(metadata %storage, metadata %var, metadata %expr) declare void @llvm.dbg.value(metadata %storage, i64 %offset, metadata %var, metadata %expr) This patch adds a new LLVM-local tag to DIExpressions, so we can detect and pretty-print DIExpression metadata nodes. What this patch doesn't do: This patch does not touch the "Indirect" field in DIVariable; but moving that into the expression would be a natural next step. http://reviews.llvm.org/D4919 rdar://problem/17994491 Thanks to dblaikie and dexonsmith for reviewing this patch! Note: I accidentally committed a bogus older version of this patch previously. llvm-svn: 218787
2014-10-02 02:55:02 +08:00
dbgs() << "-----\n";
});
auto PrevEntry = std::next(CurEntry);
if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
DebugLoc.pop_back();
}
}
DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
InlinedVariable IV) {
ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
return ConcreteVariables.back().get();
}
// Determine whether this DBG_VALUE is valid at the beginning of the function.
static bool validAtEntry(const MachineInstr *MInsn) {
auto MBB = MInsn->getParent();
// Is it in the entry basic block?
if (!MBB->pred_empty())
return false;
for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
return false;
return true;
}
// Find variables for each lexical scope.
void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
DenseSet<InlinedVariable> &Processed) {
2013-07-04 05:37:03 +08:00
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(Processed);
for (const auto &I : DbgValues) {
InlinedVariable IV = I.first;
if (Processed.count(IV))
2010-05-21 03:57:06 +08:00
continue;
// Instruction ranges, specifying where IV is accessible.
const auto &Ranges = I.second;
if (Ranges.empty())
continue;
2014-04-24 14:44:33 +08:00
LexicalScope *Scope = nullptr;
if (const DILocation *IA = IV.second)
Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
else
Scope = LScopes.findLexicalScope(IV.first->getScope());
2010-05-21 03:57:06 +08:00
// If variable scope is not found then skip this variable.
2010-05-21 08:10:20 +08:00
if (!Scope)
2010-05-21 03:57:06 +08:00
continue;
Processed.insert(IV);
DbgVariable *RegVar = createConcreteVariable(*Scope, IV);
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
// Check if there is a single DBG_VALUE, valid throughout the function.
// A single constant is also considered valid for the entire function.
if (Ranges.size() == 1 &&
(MInsn->getOperand(0).isImm() ||
(validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
RegVar->initializeDbgValue(MInsn);
continue;
}
2013-01-29 01:33:26 +08:00
// Handle multiple DBG_VALUE instructions describing one variable.
DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
// Build the location list for this variable.
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
SmallVector<DebugLocEntry, 8> Entries;
buildLocationList(Entries, Ranges);
2016-03-01 01:06:46 +08:00
// If the variable has a DIBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
// identifier map.
const DIBasicType *BT = dyn_cast<DIBasicType>(
static_cast<const Metadata *>(IV.first->getType()));
// Finalize the entry by lowering it into a DWARF bytestream.
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
for (auto &Entry : Entries)
Entry.finalize(*Asm, List, BT);
}
// Collect info for variables that were optimized out.
for (const DILocalVariable *DV : SP->getVariables()) {
if (Processed.insert(InlinedVariable(DV, nullptr)).second)
if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
createConcreteVariable(*Scope, InlinedVariable(DV, nullptr));
}
}
// Process beginning of an instruction.
void DwarfDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
if (MI->isDebugValue() || MI->isCFIInstruction())
return;
const DebugLoc &DL = MI->getDebugLoc();
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
// the last line number actually emitted, to see if it was line 0.
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
if (DL == PrevInstLoc) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
// We have an explicit location, same as the previous location.
// But we might be coming back to it after a line 0 record.
if (LastAsmLine == 0 && DL.getLine() != 0) {
// Reinstate the source location but not marked as a statement.
const MDNode *Scope = DL.getScope();
recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
}
return;
}
if (!DL) {
// We have an unspecified location, which might want to be line 0.
// If we have already emitted a line-0 record, don't repeat it.
if (LastAsmLine == 0)
return;
// If user said Don't Do That, don't do that.
if (UnknownLocations == Disable)
return;
// See if we have a reason to emit a line-0 record now.
// Reasons to emit a line-0 record include:
// - User asked for it (UnknownLocations).
// - Instruction has a label, so it's referenced from somewhere else,
// possibly debug information; we want it to have a source location.
// - Instruction is at the top of a block; we don't want to inherit the
// location from the physically previous (maybe unrelated) block.
if (UnknownLocations == Enable || PrevLabel ||
(PrevInstBB && PrevInstBB != MI->getParent())) {
// Preserve the file and column numbers, if we can, to save space in
// the encoded line table.
// Do not update PrevInstLoc, it remembers the last non-0 line.
const MDNode *Scope = nullptr;
unsigned Column = 0;
if (PrevInstLoc) {
Scope = PrevInstLoc.getScope();
Column = PrevInstLoc.getCol();
}
recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
}
return;
}
// We have an explicit location, different from the previous location.
// Don't repeat a line-0 record, but otherwise emit the new location.
// (The new location might be an explicit line 0, which we do emit.)
if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
return;
unsigned Flags = 0;
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
PrologEndLoc = DebugLoc();
}
// If the line changed, we call that a new statement; unless we went to
// line 0 and came back, in which case it is not a new statement.
unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
if (DL.getLine() && DL.getLine() != OldLine)
Flags |= DWARF2_FLAG_IS_STMT;
const MDNode *Scope = DL.getScope();
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
// If we're not at line 0, remember this location.
if (DL.getLine())
PrevInstLoc = DL;
}
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc())
return MI.getDebugLoc();
return DebugLoc();
}
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
if (LScopes.empty())
return;
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
Recommit r212203: Don't try to construct debug LexicalScopes hierarchy for functions that do not have top level debug information. Reverted by Eric Christopher (Thanks!) in r212203 after Bob Wilson reported LTO issues. Duncan Exon Smith and Aditya Nandakumar helped provide a reduced reproduction, though the failure wasn't too hard to guess, and even easier with the example to confirm. The assertion that the subprogram metadata associated with an llvm::Function matches the scope data referenced by the DbgLocs on the instructions in that function is not valid under LTO. In LTO, a C++ inline function might exist in multiple CUs and the subprogram metadata nodes will refer to the same llvm::Function. In this case, depending on the order of the CUs, the first intance of the subprogram metadata may not be the one referenced by the instructions in that function and the assertion will fail. A test case (test/DebugInfo/cross-cu-linkonce-distinct.ll) is added, the assertion removed and a comment added to explain this situation. This was then reverted again in r213581 as it caused PR20367. The root cause of this was the early exit in LiveDebugVariables meant that spurious DBG_VALUE intrinsics that referenced dead variables were not removed, causing an assertion/crash later on. The fix is to have LiveDebugVariables strip all DBG_VALUE intrinsics in functions without debug info as they're not needed anyway. Test case added to cover this situation (that occurs when a debug-having function is inlined into a nodebug function) in test/DebugInfo/X86/nodebug_with_debug_loc.ll Original commit message: If a function isn't actually in a CU's subprogram list in the debug info metadata, ignore all the DebugLocs and don't try to build scopes, track variables, etc. While this is possibly a minor optimization, it's also a correctness fix for an incoming patch that will add assertions to LexicalScopes and the debug info verifier to ensure that all scope chains lead to debug info for the current function. Fix up a few test cases that had broken/incomplete debug info that could violate this constraint. Add a test case where this occurs by design (inlining a debug-info-having function in an attribute nodebug function - we want this to work because /if/ the nodebug function is then inlined into a debug-info-having function, it should be fine (and will work fine - we just stitch the scopes up as usual), but should the inlining not happen we need to not assert fail either). llvm-svn: 213952
2014-07-26 00:10:16 +08:00
// FnScope->getScopeNode() and DI->second should represent the same function,
// though they may not be the same MDNode due to inline functions merged in
// LTO where the debug info metadata still differs (either due to distinct
// written differences - two versions of a linkonce_odr function
// written/copied into two separate files, or some sub-optimal metadata that
// isn't structurally identical (see: file path/name info from clang, which
// includes the directory of the cpp file being built, even when the file name
// is absolute (such as an <> lookup header)))
auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit());
if (!TheCU) {
assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug &&
"DICompileUnit missing from llvm.dbg.cu?");
return;
}
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
else
Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
if (DILocation *L = PrologEndLoc) {
// We'd like to list the prologue as "not statements" but GDB behaves
// poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
auto *SP = L->getInlinedAtScope()->getSubprogram();
recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
}
}
void DwarfDebug::skippedNonDebugFunction() {
// If we don't have a subprogram for this function then there will be a hole
// in the range information. Keep note of this by setting the previously used
// section to nullptr.
PrevCU = nullptr;
CurFn = nullptr;
}
// Gather and emit post-function debug information.
void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
const DISubprogram *SP = MF->getFunction()->getSubprogram();
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
DenseSet<InlinedVariable> ProcessedVars;
collectVariableInfo(TheCU, SP, ProcessedVars);
// Add the range of this function to the list of ranges for the CU.
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
Add -debug-info-for-profiling to emit more debug info for sample pgo profile collection Summary: SamplePGO binaries built with -gmlt to collect profile. The current -gmlt debug info is limited, and we need some additional info: * start line of all subprograms * linkage name of all subprograms * standalone subprograms (functions that has neither inlined nor been inlined) This patch adds these information to the -gmlt binary. The impact on speccpu2006 binary size (size increase comparing with -g0 binary, also includes data for -g binary, which does not change with this patch): -gmlt(orig) -gmlt(patched) -g 433.milc 4.68% 5.40% 19.73% 444.namd 8.45% 8.93% 45.99% 447.dealII 97.43% 115.21% 374.89% 450.soplex 27.75% 31.88% 126.04% 453.povray 21.81% 26.16% 92.03% 470.lbm 0.60% 0.67% 1.96% 482.sphinx3 5.77% 6.47% 26.17% 400.perlbench 17.81% 19.43% 73.08% 401.bzip2 3.73% 3.92% 12.18% 403.gcc 31.75% 34.48% 122.75% 429.mcf 0.78% 0.88% 3.89% 445.gobmk 6.08% 7.92% 42.27% 456.hmmer 10.36% 11.25% 35.23% 458.sjeng 5.08% 5.42% 14.36% 462.libquantum 1.71% 1.96% 6.36% 464.h264ref 15.61% 16.56% 43.92% 471.omnetpp 11.93% 15.84% 60.09% 473.astar 3.11% 3.69% 14.18% 483.xalancbmk 56.29% 81.63% 353.22% geomean 15.60% 18.30% 57.81% Debug info size change for -gmlt binary with this patch: 433.milc 13.46% 444.namd 5.35% 447.dealII 18.21% 450.soplex 14.68% 453.povray 19.65% 470.lbm 6.03% 482.sphinx3 11.21% 400.perlbench 8.91% 401.bzip2 4.41% 403.gcc 8.56% 429.mcf 8.24% 445.gobmk 29.47% 456.hmmer 8.19% 458.sjeng 6.05% 462.libquantum 11.23% 464.h264ref 5.93% 471.omnetpp 31.89% 473.astar 16.20% 483.xalancbmk 44.62% geomean 16.83% Reviewers: davidxl, echristo, dblaikie Reviewed By: echristo, dblaikie Subscribers: aprantl, probinson, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D25434 llvm-svn: 292457
2017-01-19 08:44:11 +08:00
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
// is still needed as we need its source location.
if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
Add -debug-info-for-profiling to emit more debug info for sample pgo profile collection Summary: SamplePGO binaries built with -gmlt to collect profile. The current -gmlt debug info is limited, and we need some additional info: * start line of all subprograms * linkage name of all subprograms * standalone subprograms (functions that has neither inlined nor been inlined) This patch adds these information to the -gmlt binary. The impact on speccpu2006 binary size (size increase comparing with -g0 binary, also includes data for -g binary, which does not change with this patch): -gmlt(orig) -gmlt(patched) -g 433.milc 4.68% 5.40% 19.73% 444.namd 8.45% 8.93% 45.99% 447.dealII 97.43% 115.21% 374.89% 450.soplex 27.75% 31.88% 126.04% 453.povray 21.81% 26.16% 92.03% 470.lbm 0.60% 0.67% 1.96% 482.sphinx3 5.77% 6.47% 26.17% 400.perlbench 17.81% 19.43% 73.08% 401.bzip2 3.73% 3.92% 12.18% 403.gcc 31.75% 34.48% 122.75% 429.mcf 0.78% 0.88% 3.89% 445.gobmk 6.08% 7.92% 42.27% 456.hmmer 10.36% 11.25% 35.23% 458.sjeng 5.08% 5.42% 14.36% 462.libquantum 1.71% 1.96% 6.36% 464.h264ref 15.61% 16.56% 43.92% 471.omnetpp 11.93% 15.84% 60.09% 473.astar 3.11% 3.69% 14.18% 483.xalancbmk 56.29% 81.63% 353.22% geomean 15.60% 18.30% 57.81% Debug info size change for -gmlt binary with this patch: 433.milc 13.46% 444.namd 5.35% 447.dealII 18.21% 450.soplex 14.68% 453.povray 19.65% 470.lbm 6.03% 482.sphinx3 11.21% 400.perlbench 8.91% 401.bzip2 4.41% 403.gcc 8.56% 429.mcf 8.24% 445.gobmk 29.47% 456.hmmer 8.19% 458.sjeng 6.05% 462.libquantum 11.23% 464.h264ref 5.93% 471.omnetpp 31.89% 473.astar 16.20% 483.xalancbmk 44.62% geomean 16.83% Reviewers: davidxl, echristo, dblaikie Reviewed By: echristo, dblaikie Subscribers: aprantl, probinson, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D25434 llvm-svn: 292457
2017-01-19 08:44:11 +08:00
TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
PrevLabel = nullptr;
CurFn = nullptr;
return;
}
#ifndef NDEBUG
size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
#endif
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
auto *SP = cast<DISubprogram>(AScope->getScopeNode());
// Collect info for variables that were optimized out.
for (const DILocalVariable *DV : SP->getVariables()) {
if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
continue;
ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
DV->getScope());
assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
&& "ensureAbstractVariableIsCreated inserted abstract scopes");
}
constructAbstractSubprogramScopeDIE(AScope);
}
ProcessedSPNodes.insert(SP);
TheCU.constructSubprogramScopeDIE(SP, FnScope);
if (auto *SkelCU = TheCU.getSkeleton())
if (!LScopes.getAbstractScopesList().empty() &&
TheCU.getCUNode()->getSplitDebugInlining())
SkelCU->constructSubprogramScopeDIE(SP, FnScope);
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
2014-04-24 14:44:33 +08:00
PrevLabel = nullptr;
CurFn = nullptr;
}
// Register a source line with debug info. Returns the unique label that was
// emitted and which provides correspondence to the source line list.
void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
unsigned Flags) {
StringRef Fn;
StringRef Dir;
unsigned Src = 1;
unsigned Discriminator = 0;
if (auto *Scope = cast_or_null<DIScope>(S)) {
Fn = Scope->getFilename();
Dir = Scope->getDirectory();
if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
if (getDwarfVersion() >= 4)
Discriminator = LBF->getDiscriminator();
unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
.getOrCreateSourceID(Fn, Dir);
}
Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
Discriminator, Fn);
}
//===----------------------------------------------------------------------===//
// Emit Methods
//===----------------------------------------------------------------------===//
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitUnits(/* UseOffsets */ false);
}
// Emit the abbreviation section.
void DwarfDebug::emitAbbreviations() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
StringRef TableName) {
Accel.FinalizeTable(Asm, TableName);
Asm->OutStreamer->SwitchSection(Section);
// Emit the full data.
Accel.emit(Asm, Section->getBeginSymbol(), this);
}
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
"Names");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
"ObjC");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
emitAccel(AccelNamespace,
Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
"namespac");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
"types");
}
// Public name handling.
// The format for the various pubnames:
//
// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
// for the DIE that is named.
//
// gnu pubnames - offset/index value/name tuples where the offset is the offset
// into the CU and the index value is computed according to the type of value
// for the DIE that is named.
//
// For type units the offset is the offset of the skeleton DIE. For split dwarf
// it's the offset within the debug_info/debug_types dwo section, however, the
// reference in the pubname header doesn't change.
/// computeIndexValue - Compute the gdb index value for the DIE and CU.
static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
const DIE *Die) {
// Entities that ended up only in a Type Unit reference the CU instead (since
// the pub entry has offsets within the CU there's no real offset that can be
// provided anyway). As it happens all such entities (namespaces and types,
// types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
// not to be true it would be necessary to persist this information from the
// point at which the entry is added to the index data structure - since by
// the time the index is built from that, the original type/namespace DIE in a
// type unit has already been destroyed so it can't be queried for properties
// like tag, etc.
if (Die->getTag() == dwarf::DW_TAG_compile_unit)
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
dwarf::GIEL_EXTERNAL);
dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
// We could have a specification DIE that has our most of our knowledge,
// look for that now.
Reapply "AsmPrinter: Change DIEValue to be stored by value" This reverts commit r238350, effectively reapplying r238349 after fixing (all?) the problems, all somehow related to how I was using `AlignedArrayCharUnion<>` inside `DIEValue`: - MSVC can only handle `sizeof()` on types, not values. Change the assert. - GCC doesn't know the `is_trivially_copyable` type trait. Instead of asserting it, add destructors. - Call placement new even when constructing POD (i.e., the pointers). - Instead of copying the char buffer, copy the casted classes. I've left in a couple of `static_assert`s that I think both MSVC and GCC know how to handle. If the bots disagree with me, I'll remove them. - Check that the constructed type is either standard layout or a pointer. This protects against a programming error: we really want the "small" `DIEValue`s to be small and simple, so don't accidentally change them not to be. - Similarly, check that the size of the buffer is no bigger than a `uint64_t` or a pointer. (I thought checking against `sizeof(uint64_t)` would be good enough, but Chandler suggested that pointers might sometimes be bigger than that in the context of sanitizers.) I've also committed r238359 in the meantime, which introduces a DIEValue.def to simplify dispatching between the various types (thanks to a review comment by David Blaikie). Without that, this commit would be almost unintelligible. Here's the original commit message: -- Change `DIEValue` to be stored/passed/etc. by value, instead of reference. It's now a discriminated union, with a `Val` field storing the actual type. The classes that used to inherit from `DIEValue` no longer do. There are two categories of these: - Small values fit in a single pointer and are stored by value. - Large values require auxiliary storage, and are stored by reference. The only non-mechanical change is to tools/dsymutil/DwarfLinker.cpp. It was relying on `DIEInteger`s being passed around by reference, so I replaced that assumption with a `PatchLocation` type that stores a safe reference to where the `DIEInteger` lives instead. This commit causes a temporary regression in memory usage, since I've left merging `DIEAbbrevData` into `DIEValue` for a follow-up commit. I measured an increase from 845 MB to 879 MB, around 3.9%. The follow-up drops it lower than the starting point, and I've only recently brought the memory this low anyway, so I'm committing these changes separately to keep them incremental. (I also considered swapping the commits, but the other one first would cause a lot more code churn.) (I'm looking at `llc` memory usage on `verify-uselistorder.lto.opt.bc`; see r236629 for details.) -- llvm-svn: 238362
2015-05-28 06:14:58 +08:00
if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
if (SpecDIE.findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
} else if (Die->findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
switch (Die->getTag()) {
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_enumeration_type:
return dwarf::PubIndexEntryDescriptor(
dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus
? dwarf::GIEL_STATIC
: dwarf::GIEL_EXTERNAL);
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_subrange_type:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
case dwarf::DW_TAG_namespace:
return dwarf::GIEK_TYPE;
case dwarf::DW_TAG_subprogram:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
case dwarf::DW_TAG_variable:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
case dwarf::DW_TAG_enumerator:
return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
dwarf::GIEL_STATIC);
default:
return dwarf::GIEK_NONE;
}
}
/// emitDebugPubNames - Emit visible names into a debug pubnames section.
///
void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
MCSection *PSec = GnuStyle
? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection();
emitDebugPubSection(GnuStyle, PSec, "Names",
&DwarfCompileUnit::getGlobalNames);
}
void DwarfDebug::emitDebugPubSection(
bool GnuStyle, MCSection *PSec, StringRef Name,
const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
for (const auto &NU : CUMap) {
DwarfCompileUnit *TheU = NU.second;
const auto &Globals = (TheU->*Accessor)();
if (Globals.empty())
continue;
if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
// Start the dwarf pubnames section.
Asm->OutStreamer->SwitchSection(PSec);
// Emit the header.
Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
Asm->OutStreamer->EmitLabel(BeginLabel);
Asm->OutStreamer->AddComment("DWARF Version");
Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
Asm->OutStreamer->AddComment("Compilation Unit Length");
Asm->EmitInt32(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
const char *Name = GI.getKeyData();
const DIE *Entity = GI.second;
Asm->OutStreamer->AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
Asm->OutStreamer->AddComment(
Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
Asm->EmitInt8(Desc.toBits());
}
Asm->OutStreamer->AddComment("External Name");
Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
}
Asm->OutStreamer->AddComment("End Mark");
Asm->EmitInt32(0);
Asm->OutStreamer->EmitLabel(EndLabel);
}
}
void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
MCSection *PSec = GnuStyle
? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection();
emitDebugPubSection(GnuStyle, PSec, "Types",
&DwarfCompileUnit::getGlobalTypes);
2009-11-24 09:14:22 +08:00
}
2016-01-24 16:18:55 +08:00
/// Emit null-terminated strings into a debug str section.
void DwarfDebug::emitDebugStr() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
const DebugLocStream::Entry &Entry) {
auto &&Comments = DebugLocs.getComments(Entry);
auto Comment = Comments.begin();
auto End = Comments.end();
for (uint8_t Byte : DebugLocs.getBytes(Entry))
Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
}
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
DwarfExpression &DwarfExpr) {
auto *DIExpr = Value.getExpression();
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
// Regular entry.
if (Value.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
BT->getEncoding() == dwarf::DW_ATE_signed_char))
DwarfExpr.addSignedConstant(Value.getInt());
else
DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
MachineLocation Location = Value.getLoc();
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
SmallVector<uint64_t, 8> Ops;
PR32382: Fix emitting complex DWARF expressions. The DWARF specification knows 3 kinds of non-empty simple location descriptions: 1. Register location descriptions - describe a variable in a register - consist of only a DW_OP_reg 2. Memory location descriptions - describe the address of a variable 3. Implicit location descriptions - describe the value of a variable - end with DW_OP_stack_value & friends The existing DwarfExpression code is pretty much ignorant of these restrictions. This used to not matter because we only emitted very short expressions that we happened to get right by accident. This patch makes DwarfExpression aware of the rules defined by the DWARF standard and now chooses the right kind of location description for each expression being emitted. This would have been an NFC commit (for the existing testsuite) if not for the way that clang describes captured block variables. Based on how the previous code in LLVM emitted locations, DW_OP_deref operations that should have come at the end of the expression are put at its beginning. Fixing this means changing the semantics of DIExpression, so this patch bumps the version number of DIExpression and implements a bitcode upgrade. There are two major changes in this patch: I had to fix the semantics of dbg.declare for describing function arguments. After this patch a dbg.declare always takes the *address* of a variable as the first argument, even if the argument is not an alloca. When lowering a DBG_VALUE, the decision of whether to emit a register location description or a memory location description depends on the MachineLocation — register machine locations may get promoted to memory locations based on their DIExpression. (Future) optimization passes that want to salvage implicit debug location for variables may do so by appending a DW_OP_stack_value. For example: DBG_VALUE, [RBP-8] --> DW_OP_fbreg -8 DBG_VALUE, RAX --> DW_OP_reg0 +0 DBG_VALUE, RAX, DIExpression(DW_OP_deref) --> DW_OP_reg0 +0 All testcases that were modified were regenerated from clang. I also added source-based testcases for each of these to the debuginfo-tests repository over the last week to make sure that no synchronized bugs slip in. The debuginfo-tests compile from source and run the debugger. https://bugs.llvm.org/show_bug.cgi?id=32382 <rdar://problem/31205000> Differential Revision: https://reviews.llvm.org/D31439 llvm-svn: 300522
2017-04-18 09:21:53 +08:00
if (Location.isIndirect() && Location.getOffset()) {
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Location.getOffset());
}
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
DIExpressionCursor Cursor(Ops);
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
return DwarfExpr.addExpression(std::move(Cursor));
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.addUnsignedConstant(RawBytes);
}
DwarfExpr.addExpression(std::move(ExprCursor));
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
const DIBasicType *BT) {
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
const DebugLocEntry::Value &Value = Values[0];
if (Value.isFragment()) {
// Emit all fragments that belong to the same variable and range.
assert(all_of(Values, [](DebugLocEntry::Value P) {
return P.isFragment();
}) && "all values are expected to be fragments");
assert(std::is_sorted(Values.begin(), Values.end()) &&
"fragments are expected to be sorted");
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
for (auto Fragment : Values)
emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr);
} else {
assert(Values.size() == 1 && "only fragments may have >1 value");
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr);
}
Fix LLVM's use of DW_OP_bit_piece in DWARF expressions. LLVM's use of DW_OP_bit_piece is incorrect and a based on a misunderstanding of the wording in the DWARF specification. The offset argument of DW_OP_bit_piece refers to the offset into the location that is on the top of the DWARF expression stack, and not an offset into the source variable. This has since also been clarified in the DWARF specification. This patch fixes all uses of DW_OP_bit_piece to emit the correct offset and simplifies the DwarfExpression class to semi-automaticaly emit empty DW_OP_pieces to adjust the offset of the source variable, thus simplifying the code using DwarfExpression. While this is an incompatible bugfix, in practice I don't expect this to be much of a problem since LLVM's old interpretation and the correct interpretation of DW_OP_bit_piece differ only when there are gaps in the fragmented locations of the described variables or if individual fragments are smaller than a byte. LLDB at least won't interpret locations with gaps in them because is has no way to present undefined bits in a variable, and there is a high probability that an old-form expression will be malformed when interpreted correctly, because the DW_OP_bit_piece offset will be outside of the location at the top of the stack. As a nice side-effect, this patch enables us to use a more efficient encoding for subregisters: In order to express a sub-register at a non-zero offset we now use a DW_OP_bit_piece instead of shifting the value into place manually. This patch also adds missing test coverage for code paths that weren't exercised before. <rdar://problem/29335809> Differential Revision: https://reviews.llvm.org/D27550 llvm-svn: 289266
2016-12-10 04:43:40 +08:00
DwarfExpr.finalize();
}
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
Asm->EmitInt16(DebugLocs.getBytes(Entry).size());
// Emit the entry.
APByteStreamer Streamer(*Asm);
emitDebugLocEntry(Streamer, Entry);
}
2013-07-03 05:36:07 +08:00
// Emit locations into the debug loc section.
void DwarfDebug::emitDebugLoc() {
// Start the dwarf loc section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
unsigned char Size = Asm->MAI->getCodePointerSize();
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
const DwarfCompileUnit *CU = List.CU;
for (const auto &Entry : DebugLocs.getEntries(List)) {
// Set up the range. This range is relative to the entry point of the
// compile unit. This is a hard coded 0 for low_pc when we're emitting
// ranges, or the DW_AT_low_pc on the compile unit otherwise.
if (auto *Base = CU->getBaseAddress()) {
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
} else {
Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
}
emitDebugLocEntryLocation(Entry);
}
Asm->OutStreamer->EmitIntValue(0, Size);
Asm->OutStreamer->EmitIntValue(0, Size);
}
}
void DwarfDebug::emitDebugLocDWO() {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
for (const auto &Entry : DebugLocs.getEntries(List)) {
// Just always use start_length for now - at least that's one address
// rather than two. We could get fancier and try to, say, reuse an
// address we know we've emitted elsewhere (the start of the function?
// The start of the CU or CU subrange that encloses this range?)
Asm->EmitInt8(dwarf::DW_LLE_startx_length);
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
AsmPrinter: Create a unified .debug_loc stream This commit removes `DebugLocList` and replaces it with `DebugLocStream`. - `DebugLocEntry` no longer contains its byte/comment streams. - The `DebugLocEntry` list for a variable/inlined-at pair is allocated on the stack, and released right after `DebugLocEntry::finalize()` (possible because of the refactoring in r231023). Now, only one list is in memory at a time now. - There's a single unified stream for the `.debug_loc` section that persists, stored in the new `DebugLocStream` data structure. The last point is important: this collapses the nested `SmallVector<>`s from `DebugLocList` into unified streams. We previously had something like the following: vec<tuple<Label, CU, vec<tuple<BeginSym, EndSym, vec<Value>, vec<char>, vec<string>>>>> A `SmallVector` can avoid allocations, but is statically fairly large for a vector: three pointers plus the size of the small storage, which is the number of elements in small mode times the element size). Nesting these is expensive, since an inner vector's size contributes to the element size of an outer one. (Nesting any vector is expensive...) In the old data structure, the outer vector's *element* size was 632B, excluding allocation costs for when the middle and inner vectors exceeded their small sizes. 312B of this was for the "three" pointers in the vector-tree beneath it. If you assume 1M functions with an average of 10 variable/inlined-at pairs each (in an LTO scenario), that's almost 6GB (besides inner allocations), with almost 3GB for the "three" pointers. This came up in a heap profile a little while ago of a `clang -flto -g` bootstrap, with `DwarfDebug::collectVariableInfo()` using something like 10-15% of the total memory. With this commit, we have: tuple<vec<tuple<Label, CU, Offset>>, vec<tuple<BeginSym, EndSym, Offset, Offset>>, vec<char>, vec<string>> The offsets are used to create `ArrayRef` slices of adjacent `SmallVector`s. This reduces the number of vectors to four (unrelated to the number of variable/inlined-at pairs), and caps the number of allocations at the same number. Besides saving memory and limiting allocations, this is NFC. I don't know my way around this code very well yet, but I wonder if we could go further: why stream to a side-table, instead of directly to the output stream? llvm-svn: 235229
2015-04-18 05:34:47 +08:00
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
emitDebugLocEntryLocation(Entry);
}
Asm->EmitInt8(dwarf::DW_LLE_end_of_list);
}
}
struct ArangeSpan {
const MCSymbol *Start, *End;
};
// Emit a debug aranges section, containing a CU lookup for any
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
// Provides a unique id per text section.
MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
// Filter labels by section.
for (const SymbolCU &SCU : ArangeLabels) {
if (SCU.Sym->isInSection()) {
// Make a note of this symbol and it's section.
MCSection *Section = &SCU.Sym->getSection();
if (!Section->getKind().isMetadata())
SectionMap[Section].push_back(SCU);
} else {
// Some symbols (e.g. common/bss on mach-o) can have no section but still
// appear in the output. This sucks as we rely on sections to build
// arange spans. We can do it without, but it's icky.
SectionMap[nullptr].push_back(SCU);
}
}
DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
for (auto &I : SectionMap) {
MCSection *Section = I.first;
SmallVector<SymbolCU, 8> &List = I.second;
if (List.size() < 1)
continue;
// If we have no section (e.g. common), just write out
// individual spans for each symbol.
if (!Section) {
for (const SymbolCU &Cur : List) {
ArangeSpan Span;
Span.Start = Cur.Sym;
Span.End = nullptr;
assert(Cur.CU);
Spans[Cur.CU].push_back(Span);
}
continue;
}
// Sort the symbols by offset within the section.
2016-05-20 07:17:37 +08:00
std::sort(
List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
// Symbols with no order assigned should be placed at the end.
// (e.g. section end labels)
if (IA == 0)
return false;
if (IB == 0)
return true;
return IA < IB;
});
// Insert a final terminator.
List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
// Build spans between each label.
const MCSymbol *StartSym = List[0].Sym;
for (size_t n = 1, e = List.size(); n < e; n++) {
const SymbolCU &Prev = List[n - 1];
const SymbolCU &Cur = List[n];
// Try and build the longest span we can within the same CU.
if (Cur.CU != Prev.CU) {
ArangeSpan Span;
Span.Start = StartSym;
Span.End = Cur.Sym;
assert(Prev.CU);
Spans[Prev.CU].push_back(Span);
StartSym = Cur.Sym;
}
}
}
// Start the dwarf aranges section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
unsigned PtrSize = Asm->MAI->getCodePointerSize();
// Build a list of CUs used.
std::vector<DwarfCompileUnit *> CUs;
for (const auto &it : Spans) {
DwarfCompileUnit *CU = it.first;
CUs.push_back(CU);
}
// Sort the CU list (again, to ensure consistent output order).
std::sort(CUs.begin(), CUs.end(),
[](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
return A->getUniqueID() < B->getUniqueID();
});
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
std::vector<ArangeSpan> &List = Spans[CU];
// Describe the skeleton CU's offset and length, not the dwo file's.
if (auto *Skel = CU->getSkeleton())
CU = Skel;
// Emit size of content not including length itself.
2013-11-19 17:04:36 +08:00
unsigned ContentSize =
sizeof(int16_t) + // DWARF ARange version number
sizeof(int32_t) + // Offset of CU in the .debug_info section
sizeof(int8_t) + // Pointer Size (in bytes)
sizeof(int8_t); // Segment Size (in bytes)
unsigned TupleSize = PtrSize * 2;
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
unsigned Padding =
OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize);
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
// For each compile unit, write the list of spans it covers.
Asm->OutStreamer->AddComment("Length of ARange Set");
Asm->EmitInt32(ContentSize);
Asm->OutStreamer->AddComment("DWARF Arange version number");
Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
Asm->emitDwarfSymbolReference(CU->getLabelBegin());
Asm->OutStreamer->AddComment("Address Size (in bytes)");
Asm->EmitInt8(PtrSize);
Asm->OutStreamer->AddComment("Segment Size (in bytes)");
Asm->EmitInt8(0);
Asm->OutStreamer->emitFill(Padding, 0xff);
for (const ArangeSpan &Span : List) {
Asm->EmitLabelReference(Span.Start, PtrSize);
// Calculate the size as being from the span start to it's end.
if (Span.End) {
Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize);
} else {
// For symbols without an end marker (e.g. common), we
// write a single arange entry containing just that one symbol.
uint64_t Size = SymSize[Span.Start];
if (Size == 0)
Size = 1;
Asm->OutStreamer->EmitIntValue(Size, PtrSize);
}
}
Asm->OutStreamer->AddComment("ARange terminator");
Asm->OutStreamer->EmitIntValue(0, PtrSize);
Asm->OutStreamer->EmitIntValue(0, PtrSize);
}
}
2016-01-24 16:18:55 +08:00
/// Emit address ranges into a debug ranges section.
void DwarfDebug::emitDebugRanges() {
// Start the dwarf ranges section.
Asm->OutStreamer->SwitchSection(
2013-11-19 17:04:36 +08:00
Asm->getObjFileLowering().getDwarfRangesSection());
// Size for our labels.
unsigned char Size = Asm->MAI->getCodePointerSize();
// Grab the specific ranges for the compile units in the module.
for (const auto &I : CUMap) {
DwarfCompileUnit *TheCU = I.second;
if (auto *Skel = TheCU->getSkeleton())
TheCU = Skel;
// Iterate over the misc ranges for the compile units in the module.
for (const RangeSpanList &List : TheCU->getRangeLists()) {
// Emit our symbol so we can find the beginning of the range.
Asm->OutStreamer->EmitLabel(List.getSym());
for (const RangeSpan &Range : List.getRanges()) {
const MCSymbol *Begin = Range.getStart();
const MCSymbol *End = Range.getEnd();
assert(Begin && "Range without a begin symbol?");
assert(End && "Range without an end symbol?");
if (auto *Base = TheCU->getBaseAddress()) {
Asm->EmitLabelDifference(Begin, Base, Size);
Asm->EmitLabelDifference(End, Base, Size);
} else {
Asm->OutStreamer->EmitSymbolValue(Begin, Size);
Asm->OutStreamer->EmitSymbolValue(End, Size);
}
}
// And terminate the list with two 0 values.
Asm->OutStreamer->EmitIntValue(0, Size);
Asm->OutStreamer->EmitIntValue(0, Size);
}
}
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
for (auto *MN : Nodes) {
if (auto *M = dyn_cast<DIMacro>(MN))
emitMacro(*M);
else if (auto *F = dyn_cast<DIMacroFile>(MN))
emitMacroFile(*F, U);
else
llvm_unreachable("Unexpected DI type!");
}
}
void DwarfDebug::emitMacro(DIMacro &M) {
Asm->EmitULEB128(M.getMacinfoType());
Asm->EmitULEB128(M.getLine());
StringRef Name = M.getName();
StringRef Value = M.getValue();
Asm->OutStreamer->EmitBytes(Name);
if (!Value.empty()) {
// There should be one space between macro name and macro value.
Asm->EmitInt8(' ');
Asm->OutStreamer->EmitBytes(Value);
}
Asm->EmitInt8('\0');
}
void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
Asm->EmitULEB128(F.getLine());
DIFile *File = F.getFile();
unsigned FID =
U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
Asm->EmitULEB128(FID);
handleMacroNodes(F.getElements(), U);
Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
}
2016-01-24 16:18:55 +08:00
/// Emit macros into a debug macinfo section.
void DwarfDebug::emitDebugMacinfo() {
// Start the dwarf macinfo section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfMacinfoSection());
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
handleMacroNodes(CUNode->getMacros(), U);
}
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->EmitInt8(0);
}
2012-12-12 03:42:09 +08:00
// DWARF5 Experimental Separate Dwarf emitters.
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
U.getCUNode()->getSplitDebugFilename());
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(*NewU, Die);
SkeletonHolder.addUnit(std::move(NewU));
}
// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
// DW_AT_addr_base, DW_AT_ranges_base.
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
NewCU.initStmtList();
initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
return NewCU;
}
2012-12-12 03:42:09 +08:00
// Emit the .debug_info.dwo section for separated dwarf. This contains the
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
// Don't emit relocations into the dwo file.
InfoHolder.emitUnits(/* UseOffsets */ true);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
// abbreviations for the .debug_info.dwo section.
void DwarfDebug::emitDebugAbbrevDWO() {
assert(useSplitDwarf() && "No split dwarf?");
InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
}
void DwarfDebug::emitDebugLineDWO() {
assert(useSplitDwarf() && "No split dwarf?");
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLineDWOSection());
SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
}
// Emit the .debug_str.dwo section for separated dwarf. This contains the
// string section and is identical in format to traditional .debug_str
// sections.
void DwarfDebug::emitDebugStrDWO() {
assert(useSplitDwarf() && "No split dwarf?");
MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
OffSec);
}
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
if (!useSplitDwarf())
return nullptr;
if (SingleCU)
SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
return &SplitTypeUnitFileTable;
}
uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, so we actually
// need the "high" word.
MD5::MD5Result Result;
Hash.final(Result);
return Result.high();
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
StringRef Identifier, DIE &RefDie,
const DICompositeType *CTy) {
// Fast path if we're building some type units and one has already used the
// address pool we know we're going to throw away all this work anyway, so
// don't bother building dependent types.
if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
return;
auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
if (!Ins.second) {
CU.addDIETypeSignature(RefDie, Ins.first->second);
return;
}
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
CU.getLanguage());
uint64_t Signature = makeTypeSignature(Identifier);
NewTU.setTypeSignature(Signature);
Ins.first->second = Signature;
if (useSplitDwarf())
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
else {
CU.applyStmtList(UnitDie);
This change removes the dependency on DwarfDebug that was used for DW_FORM_ref_addr by making a new DIEUnit class in DIE.cpp. The DIEUnit class represents a compile or type unit and it owns the unit DIE as an instance variable. This allows anyone with a DIE, to get the unit DIE, and then get back to its DIEUnit without adding any new ivars to the DIE class. Why was this needed? The DIE class has an Offset that is always the CU relative DIE offset, not the "offset in debug info section" as was commented in the header file (the comment has been corrected). This is great for performance because most DIE references are compile unit relative and this means most code that accessed the DIE's offset didn't need to make it into a compile unit relative offset because it already was. When we needed to emit a DW_FORM_ref_addr though, we needed to find the absolute offset of the DIE by finding the DIE's compile/type unit. This class did have the absolute debug info/type offset and could be added to the CU relative offset to compute the absolute offset. With this change we can easily get back to a DIE's DIEUnit which will have this needed offset. Prior to this is required having a DwarfDebug and required calling: DwarfCompileUnit *DwarfDebug::lookupUnit(const DIE *CU) const; Now we can use the DIEUnit class to do so without needing DwarfDebug. All clients now use DIEUnit objects (the DwarfDebug stack and the DwarfLinker). A follow on patch for the DWARF generator will also take advantage of this. Differential Revision: https://reviews.llvm.org/D27170 llvm-svn: 288399
2016-12-02 02:56:29 +08:00
NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
}
NewTU.setType(NewTU.createTypeDIE(CTy));
if (TopLevelType) {
auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
TypeUnitsUnderConstruction.clear();
// Types referencing entries in the address table cannot be placed in type
// units.
if (AddrPool.hasBeenUsed()) {
// Remove all the types built while building this type.
// This is pessimistic as some of these types might not be dependent on
// the type that used an address.
for (const auto &TU : TypeUnitsToAdd)
TypeSignatures.erase(TU.second);
// Construct this type in the CU directly.
// This is inefficient because all the dependent types will be rebuilt
// from scratch, including building them in type units, discovering that
// they depend on addresses, throwing them out and rebuilding them.
CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
return;
}
// If the type wasn't dependent on fission addresses, finish adding the type
// and all its dependent types.
for (auto &TU : TypeUnitsToAdd) {
InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
}
}
CU.addDIETypeSignature(RefDie, Signature);
}
// Accelerator table mutators - add each name along with its companion
// DIE to the proper table while ensuring that the name that we're going
// to reference is in the string table. We do this since the names we
// add may not only be identical to the names in the DIE.
void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
if (!useDwarfAccelTables())
return;
AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
}
void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
if (!useDwarfAccelTables())
return;
AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
}
void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
if (!useDwarfAccelTables())
return;
AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
}
void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
if (!useDwarfAccelTables())
return;
AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}