Revert "Revert "Debug Info: Type Units: Simplify type hashing using IR-provided unique names.""

This reverts commit r198398, thus reapplying r198397.

I had accidentally introduced an endianness issue when applying the hash
to the type unit. Using support::ulittle64_t in the reinterpret_cast in
addDwarfTypeUnitType fixes this issue.

Original commit message:

Debug Info: Type Units: Simplify type hashing using IR-provided unique
names.

What's good for LTO metadata size problems ought to be good for non-LTO
debug info size too, so let's rely on the same uniqueness in both cases.
If it's insufficient for non-LTO for whatever reason (since we now won't
be uniquing CU-local types or any C types - but these are likely to not
be the most significant contributors to type bloat) we should consider a
frontend solution that'll help both LTO and non-LTO alike, rather than
using DWARF-level DIE-hashing that only helps non-LTO debug info size.

It's also much simpler this way and benefits C++ even more since we can
deduplicate lexically separate definitions of the same C++ type since
they have the same mangled name.

llvm-svn: 198436
This commit is contained in:
David Blaikie 2014-01-03 18:59:42 +00:00
parent 1d17bde708
commit cfb2115e66
5 changed files with 50 additions and 124 deletions

View File

@ -58,11 +58,6 @@ static cl::opt<bool> UnknownLocations(
cl::desc("Make an absence of debug location information explicit."),
cl::init(false));
static cl::opt<bool>
GenerateODRHash("generate-odr-hash", cl::Hidden,
cl::desc("Add an ODR hash to external type DIEs."),
cl::init(false));
static cl::opt<bool> GenerateCUHash("generate-cu-hash", cl::Hidden,
cl::desc("Add the CU hash as the dwo_id."),
cl::init(false));
@ -1019,41 +1014,6 @@ void DwarfDebug::collectDeadVariables() {
}
}
// Type Signature [7.27] and ODR Hash code.
/// \brief Grabs the string in whichever attribute is passed in and returns
/// a reference to it. Returns "" if the attribute doesn't exist.
static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) {
DIEValue *V = Die->findAttribute(Attr);
if (DIEString *S = dyn_cast_or_null<DIEString>(V))
return S->getString();
return StringRef("");
}
/// Return true if the current DIE is contained within an anonymous namespace.
static bool isContainedInAnonNamespace(DIE *Die) {
DIE *Parent = Die->getParent();
while (Parent) {
if (Parent->getTag() == dwarf::DW_TAG_namespace &&
getDIEStringAttr(Parent, dwarf::DW_AT_name) == "")
return true;
Parent = Parent->getParent();
}
return false;
}
/// Test if the current CU language is C++ and that we have
/// a named type that is not contained in an anonymous namespace.
static bool shouldAddODRHash(DwarfTypeUnit *CU, DIE *Die) {
return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
!isContainedInAnonNamespace(Die);
}
void DwarfDebug::finalizeModuleInfo() {
// Collect info for variables that were optimized out.
collectDeadVariables();
@ -3041,8 +3001,8 @@ void DwarfDebug::emitDebugStrDWO() {
OffSec, StrSym);
}
void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, DIE *RefDie,
DICompositeType CTy) {
void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, StringRef Identifier,
DIE *RefDie, DICompositeType CTy) {
const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy];
if (!TU) {
DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit);
@ -3057,16 +3017,14 @@ void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, DIE *RefDie,
DIE *Die = NewTU->createTypeDIE(CTy);
if (GenerateODRHash && shouldAddODRHash(NewTU, Die))
NewTU->addUInt(UnitDie, dwarf::DW_AT_GNU_odr_signature,
dwarf::DW_FORM_data8,
DIEHash().computeDIEODRSignature(*Die));
// FIXME: This won't handle circularly referential structures, as the DIE
// may have references to other DIEs still under construction and missing
// their signature. Hashing should walk through the signatures to their
// referenced type, or possibly walk the precomputed hashes of related types
// at the end.
uint64_t Signature = DIEHash().computeTypeSignature(*Die);
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
MD5::MD5Result Result;
Hash.final(Result);
uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8);
NewTU->setTypeSignature(Signature);
NewTU->setType(Die);

View File

@ -695,7 +695,8 @@ public:
/// \brief Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(uint16_t Language, DIE *Die, DICompositeType CTy);
void addDwarfTypeUnitType(uint16_t Language, StringRef Identifier, DIE *Die,
DICompositeType CTy);
/// \brief Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }

View File

@ -928,41 +928,6 @@ DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
return TyDIE;
}
/// Return true if the type is appropriately scoped to be contained inside
/// its own type unit.
static bool isDwarfTypeUnitScoped(DIType Ty, const DwarfDebug *DD) {
DIScope Parent = DD->resolve(Ty.getContext());
while (Parent) {
// Don't generate a hash for anything scoped inside a function.
if (Parent.isSubprogram())
return false;
Parent = DD->resolve(Parent.getContext());
}
return true;
}
/// Return true if the type should be split out into a type unit.
static bool shouldCreateDwarfTypeUnit(DICompositeType CTy,
const DwarfDebug *DD) {
if (!GenerateDwarfTypeUnits)
return false;
uint16_t Tag = CTy.getTag();
switch (Tag) {
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_class_type:
// If this is a class, structure, union, or enumeration type
// that is a definition (not a declaration), and not scoped
// inside a function then separate this out as a type unit.
return !CTy.isForwardDecl() && isDwarfTypeUnitScoped(CTy, DD);
default:
return false;
}
}
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
@ -989,11 +954,13 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
constructTypeDIE(*TyDIE, DIBasicType(Ty));
else if (Ty.isCompositeType()) {
DICompositeType CTy(Ty);
if (shouldCreateDwarfTypeUnit(CTy, DD)) {
DD->addDwarfTypeUnitType(getLanguage(), TyDIE, CTy);
// Skip updating the accellerator tables since this is not the full type
return TyDIE;
}
if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
if (MDString *TypeId = CTy.getIdentifier()) {
DD->addDwarfTypeUnitType(getLanguage(), TypeId->getString(), TyDIE,
CTy);
// Skip updating the accellerator tables since this is not the full type
return TyDIE;
}
constructTypeDIE(*TyDIE, CTy);
} else {
assert(Ty.isDerivedType() && "Unknown kind of DIType");

View File

@ -5,8 +5,9 @@
; struct foo {
; } f;
; CHECK: DW_TAG_type_unit
; CHECK-NEXT: DW_AT_language [DW_FORM_data2] (0x000c)
; no known LLVM frontends produce appropriate unique identifiers for C types,
; so we don't produce type units for them
; CHECK-NOT: DW_TAG_type_unit
%struct.foo = type {}

View File

@ -1,6 +1,6 @@
; REQUIRES: object-emission
; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -generate-odr-hash -mtriple=x86_64-unknown-linux-gnu
; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
; RUN: llvm-dwarfdump %t | FileCheck %s
; Generated from:
@ -46,18 +46,31 @@
; CHECK-LABEL: .debug_info contents:
; CHECK: Compile Unit: length = [[CU_SIZE:[0-9a-f]+]]
; CHECK: DW_TAG_structure_type
; CHECK-NEXT: DW_AT_signature
; CHECK: DW_TAG_class_type
; CHECK-NEXT: DW_AT_signature
; Ensure the CU-local type 'walrus' is not placed in a type unit.
; CHECK: DW_TAG_structure_type
; CHECK-NEXT: debug_str{{.*}}"walrus"
; CHECK-NEXT: DW_AT_byte_size
; CHECK-NEXT: DW_AT_decl_file
; CHECK-NEXT: DW_AT_decl_line
; CHECK-LABEL: .debug_types contents:
; Check that we generate a hash for bar and the value.
; CHECK-LABEL: type_signature = 0x6a7ee3d400662e88
; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x200520c0d5b90eff)
; CHECK-NOT: type_signature
; CHECK-LABEL: type_signature = 0x1d02f3be30cc5688
; CHECK: DW_TAG_structure_type
; CHECK-NEXT: debug_str{{.*}}"bar"
; Check that we generate a hash for fluffy and the value.
; CHECK-LABEL: type_signature = 0x139b2e1ea94afec7
; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x9a0124d5a0c21c52)
; CHECK-NOT: type_signature
; CHECK-LABEL: type_signature = 0xb04af47397402e77
; CHECK-NOT: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x9a0124d5a0c21c52)
; CHECK: DW_TAG_namespace
; CHECK-NEXT: debug_str{{.*}}"echidna"
; CHECK: DW_TAG_namespace
@ -67,34 +80,22 @@
; CHECK: DW_TAG_class_type
; CHECK-NEXT: debug_str{{.*}}"fluffy"
; namespace and won't violate any ODR-ness.
; CHECK-LABEL: type_signature = 0xc0d031d6449dbca7
; CHECK: DW_TAG_type_unit
; CHECK-NOT: NULL
; We emit no hash for walrus since the type is contained in an anonymous
; CHECK-NOT: DW_AT_GNU_odr_signature
; CHECK: DW_TAG_structure_type
; CHECK-NEXT: debug_str{{.*}}"walrus"
; CHECK-NEXT: DW_AT_byte_size
; CHECK-NEXT: DW_AT_decl_file
; CHECK-NEXT: DW_AT_decl_line
; CHECK: DW_TAG_subprogram
; Check that we generate a hash for wombat and the value, but not for the
; anonymous type contained within.
; CHECK-LABEL: type_signature = 0x73776f130648b986
; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x685bcc220141e9d7)
; CHECK-NOT: type_signature
; CHECK-LABEL: type_signature = 0xfd756cee88f8a118
; CHECK-NOT: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x685bcc220141e9d7)
; CHECK: DW_TAG_structure_type
; CHECK-NEXT: debug_str{{.*}}"wombat"
; CHECK-LABEL: type_signature = 0xbf6fc40e82583d7c
; CHECK-NOT: type_signature
; CHECK-LABEL: type_signature = 0xe94f6d3843e62d6b
; CHECK: DW_TAG_type_unit
; CHECK-NOT: NULL
; Check that we generate no ODR hash for the anonymous type nested inside 'wombat'
; CHECK-NOT: DW_AT_GNU_odr_signature
; CHECK: DW_TAG_structure_type
; The signature for the outer 'wombat' type
; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0x73776f130648b986)
; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0xfd756cee88f8a118)
; CHECK: DW_TAG_structure_type
; CHECK-NOT: DW_AT_name
; CHECK-NOT: DW_AT_GNU_odr_signature
@ -107,18 +108,16 @@
; Don't emit pubtype entries for type DIEs in the compile unit that just indirect to a type unit.
; CHECK-NEXT: unit_size = [[CU_SIZE]]
; CHECK-NEXT: Offset Name
; CHECK-NEXT: "walrus"
; Type unit for 'bar'
; CHECK-NEXT: unit_size = 0x0000002b
; CHECK-NEXT: unit_size = 0x00000023
; CHECK-NEXT: Offset Name
; CHECK-NEXT: "bar"
; CHECK-NEXT: unit_size = 0x00000065
; CHECK-NEXT: unit_size = 0x0000005d
; CHECK-NEXT: Offset Name
; CHECK-NEXT: "int"
; CHECK-NEXT: "echidna::capybara::mongoose::fluffy"
; CHECK-NEXT: unit_size = 0x0000003b
; CHECK-NEXT: Offset Name
; CHECK-NEXT: "walrus"
; CHECK-NEXT: unit_size = 0x00000042
; CHECK-NEXT: unit_size = 0x0000003a
; CHECK-NEXT: Offset Name
; CHECK-NEXT: "wombat"
; CHECK-NEXT: unit_size = 0x0000004b