From ac69af7ad6560978883ce08ebf837b49046778c8 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 19 Dec 2018 19:34:24 +0000 Subject: [PATCH] llvm-dwarfdump: Improve/fix pretty printing of array dimensions This is to address post-commit feedback from Paul Robinson on r348954. The original commit misinterprets count and upper bound as the same thing (I thought I saw GCC producing an upper bound the same as Clang's count, but GCC correctly produces an upper bound that's one less than the count (in C, that is, where arrays are zero indexed)). I want to preserve the C-like output for the common case, so in the absence of a lower bound the count (or one greater than the upper bound) is rendered between []. In the trickier cases, where a lower bound is specified, a half-open range is used (eg: lower bound 1, count 2 would be "[1, 3)" and an unknown parts use a '?' (eg: "[1, ?)" or "[?, 7)" or "[?, ? + 3)"). Reviewers: aprantl, probinson, JDevlieghere Differential Revision: https://reviews.llvm.org/D55721 llvm-svn: 349670 --- llvm/include/llvm/BinaryFormat/Dwarf.def | 82 +++++++++---------- llvm/include/llvm/BinaryFormat/Dwarf.h | 5 +- llvm/lib/BinaryFormat/Dwarf.cpp | 19 ++++- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 65 +++++++++++---- llvm/lib/IR/DebugInfo.cpp | 5 +- .../llvm-dwarfdump/X86/prettyprint_types.s | 62 +++++++++++++- 6 files changed, 169 insertions(+), 69 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index cb9f7f5145d1..6ad3cb57f62f 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -43,7 +43,7 @@ #endif #ifndef HANDLE_DW_LANG -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) #endif #ifndef HANDLE_DW_ATE @@ -632,50 +632,50 @@ HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) // DWARF languages. -HANDLE_DW_LANG(0x0001, C89, 2, DWARF) -HANDLE_DW_LANG(0x0002, C, 2, DWARF) -HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF) -HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF) -HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF) -HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF) -HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF) -HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF) -HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF) -HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF) +HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF) +HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF) +HANDLE_DW_LANG(0x0003, Ada83, 1, 2, DWARF) +HANDLE_DW_LANG(0x0004, C_plus_plus, 0, 2, DWARF) +HANDLE_DW_LANG(0x0005, Cobol74, 1, 2, DWARF) +HANDLE_DW_LANG(0x0006, Cobol85, 1, 2, DWARF) +HANDLE_DW_LANG(0x0007, Fortran77, 1, 2, DWARF) +HANDLE_DW_LANG(0x0008, Fortran90, 1, 2, DWARF) +HANDLE_DW_LANG(0x0009, Pascal83, 1, 2, DWARF) +HANDLE_DW_LANG(0x000a, Modula2, 1, 2, DWARF) // New in DWARF v3: -HANDLE_DW_LANG(0x000b, Java, 3, DWARF) -HANDLE_DW_LANG(0x000c, C99, 3, DWARF) -HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF) -HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF) -HANDLE_DW_LANG(0x000f, PLI, 3, DWARF) -HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF) -HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF) -HANDLE_DW_LANG(0x0012, UPC, 3, DWARF) -HANDLE_DW_LANG(0x0013, D, 3, DWARF) +HANDLE_DW_LANG(0x000b, Java, 0, 3, DWARF) +HANDLE_DW_LANG(0x000c, C99, 0, 3, DWARF) +HANDLE_DW_LANG(0x000d, Ada95, 1, 3, DWARF) +HANDLE_DW_LANG(0x000e, Fortran95, 1, 3, DWARF) +HANDLE_DW_LANG(0x000f, PLI, 1, 3, DWARF) +HANDLE_DW_LANG(0x0010, ObjC, 0, 3, DWARF) +HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 0, 3, DWARF) +HANDLE_DW_LANG(0x0012, UPC, 0, 3, DWARF) +HANDLE_DW_LANG(0x0013, D, 0, 3, DWARF) // New in DWARF v4: -HANDLE_DW_LANG(0x0014, Python, 4, DWARF) +HANDLE_DW_LANG(0x0014, Python, 0, 4, DWARF) // New in DWARF v5: -HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF) -HANDLE_DW_LANG(0x0016, Go, 5, DWARF) -HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF) -HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF) -HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF) -HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF) -HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF) -HANDLE_DW_LANG(0x001c, Rust, 5, DWARF) -HANDLE_DW_LANG(0x001d, C11, 5, DWARF) -HANDLE_DW_LANG(0x001e, Swift, 5, DWARF) -HANDLE_DW_LANG(0x001f, Julia, 5, DWARF) -HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF) -HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF) -HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF) -HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF) -HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF) -HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF) +HANDLE_DW_LANG(0x0015, OpenCL, 0, 5, DWARF) +HANDLE_DW_LANG(0x0016, Go, 0, 5, DWARF) +HANDLE_DW_LANG(0x0017, Modula3, 1, 5, DWARF) +HANDLE_DW_LANG(0x0018, Haskell, 0, 5, DWARF) +HANDLE_DW_LANG(0x0019, C_plus_plus_03, 0, 5, DWARF) +HANDLE_DW_LANG(0x001a, C_plus_plus_11, 0, 5, DWARF) +HANDLE_DW_LANG(0x001b, OCaml, 0, 5, DWARF) +HANDLE_DW_LANG(0x001c, Rust, 0, 5, DWARF) +HANDLE_DW_LANG(0x001d, C11, 0, 5, DWARF) +HANDLE_DW_LANG(0x001e, Swift, 0, 5, DWARF) +HANDLE_DW_LANG(0x001f, Julia, 1, 5, DWARF) +HANDLE_DW_LANG(0x0020, Dylan, 0, 5, DWARF) +HANDLE_DW_LANG(0x0021, C_plus_plus_14, 0, 5, DWARF) +HANDLE_DW_LANG(0x0022, Fortran03, 1, 5, DWARF) +HANDLE_DW_LANG(0x0023, Fortran08, 1, 5, DWARF) +HANDLE_DW_LANG(0x0024, RenderScript, 0, 5, DWARF) +HANDLE_DW_LANG(0x0025, BLISS, 0, 5, DWARF) // Vendor extensions: -HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS) -HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE) -HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND) +HANDLE_DW_LANG(0x8001, Mips_Assembler, None, 0, MIPS) +HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE) +HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, 0, BORLAND) // DWARF attribute type encodings. HANDLE_DW_ATE(0x01, address, 2, DWARF) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index c25b6cd0966b..525a04d5e6cf 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -184,7 +184,8 @@ enum DefaultedMemberAttribute { }; enum SourceLanguage { -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID, +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ + DW_LANG_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" DW_LANG_lo_user = 0x8000, DW_LANG_hi_user = 0xffff @@ -490,6 +491,8 @@ unsigned AttributeEncodingVendor(TypeKind E); unsigned LanguageVendor(SourceLanguage L); /// @} +Optional LanguageLowerBound(SourceLanguage L); + /// A helper struct providing information about the byte size of DW_FORM /// values that vary in size depending on the DWARF version, address byte /// size, or DWARF32/DWARF64. diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index fe8ce2bd7aa3..46f8056774b7 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -301,7 +301,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { switch (Language) { default: return StringRef(); -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; #include "llvm/BinaryFormat/Dwarf.def" @@ -310,7 +310,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch(LanguageString) -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ .Case("DW_LANG_" #NAME, DW_LANG_##NAME) #include "llvm/BinaryFormat/Dwarf.def" .Default(0); @@ -320,7 +320,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { switch (Lang) { default: return 0; -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return VERSION; #include "llvm/BinaryFormat/Dwarf.def" @@ -331,13 +331,24 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { switch (Lang) { default: return 0; -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return DWARF_VENDOR_##VENDOR; #include "llvm/BinaryFormat/Dwarf.def" } } +Optional llvm::dwarf::LanguageLowerBound(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return None; +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return LOWER_BOUND; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { case DW_ID_case_sensitive: diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 551e292fb032..e6018d9cf22e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -154,6 +154,52 @@ static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) { OS << TagStr.substr(7, TagStr.size() - 12) << " "; } +static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) { + Optional Bound; + for (const DWARFDie &C : D.children()) + if (C.getTag() == DW_TAG_subrange_type) { + Optional LB; + Optional Count; + Optional UB; + Optional DefaultLB; + if (Optional L = C.find(DW_AT_lower_bound)) + LB = L->getAsUnsignedConstant(); + if (Optional CountV = C.find(DW_AT_count)) + Count = CountV->getAsUnsignedConstant(); + if (Optional UpperV = C.find(DW_AT_upper_bound)) + UB = UpperV->getAsUnsignedConstant(); + if (Optional LV = + D.getDwarfUnit()->getUnitDIE().find(DW_AT_language)) + if (Optional LC = LV->getAsUnsignedConstant()) + if ((DefaultLB = + LanguageLowerBound(static_cast(*LC)))) + if (LB && *LB == *DefaultLB) + LB = None; + if (!LB && !Count && !UB) + OS << "[]"; + else if (!LB && (Count || UB) && DefaultLB) + OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']'; + else { + OS << "[["; + if (LB) + OS << *LB; + else + OS << '?'; + OS << ", "; + if (Count) + if (LB) + OS << *LB + *Count; + else + OS << "? + " << *Count; + else if (UB) + OS << *UB + 1; + else + OS << '?'; + OS << ")]"; + } + } +} + /// Recursively dump the DIE type name when applicable. static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { if (!D.isValid()) @@ -201,24 +247,7 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { break; } case DW_TAG_array_type: { - Optional Bound; - for (const DWARFDie &C : D.children()) - if (C.getTag() == DW_TAG_subrange_type) { - OS << '['; - uint64_t LowerBound = 0; - if (Optional L = C.find(DW_AT_lower_bound)) - if (Optional LB = L->getAsUnsignedConstant()) { - LowerBound = *LB; - OS << LowerBound << '-'; - } - if (Optional CountV = C.find(DW_AT_count)) { - if (Optional C = CountV->getAsUnsignedConstant()) - OS << (*C + LowerBound); - } else if (Optional UpperV = C.find(DW_AT_upper_bound)) - if (Optional U = UpperV->getAsUnsignedConstant()) - OS << *U; - OS << ']'; - } + dumpArrayType(OS, D); break; } case DW_TAG_pointer_type: diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index d1ff54571782..9fa31773b598 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -697,8 +697,9 @@ void Instruction::applyMergedLocation(const DILocation *LocA, static unsigned map_from_llvmDWARFsourcelanguage(LLVMDWARFSourceLanguage lang) { switch (lang) { -#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ -case LLVMDWARFSourceLanguage##NAME: return ID; +#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ + case LLVMDWARFSourceLanguage##NAME: \ + return ID; #include "llvm/BinaryFormat/Dwarf.def" #undef HANDLE_DW_LANG } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s index b3e871e7c82e..afeee4cfba64 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s @@ -19,13 +19,41 @@ # CHECK: DW_AT_type{{.*}}"int foo::*" # array_type -# Testing lower_bound, upper_bound, lower and upper, lower and count, and count separately. -# CHECK: DW_AT_type{{.*}}"int[1-][2][1-2][1-3][2]" +# CHECK: DW_AT_type{{.*}}"int +# Testing with a default lower bound of 0 and the following explicit bounds: +# lower_bound(1) +# CHECK-NOT: {{.}} +# CHECK-SAME: {{\[}}[1, ?)] +# upper_bound(2) +# CHECK-NOT: {{.}} +# CHECK-SAME: [3] +# lower(1) and upper(2) +# CHECK-NOT: {{.}} +# CHECK-SAME: {{\[}}[1, 3)] +# lower(1) and count(3) +# CHECK-NOT: {{.}} +# CHECK-SAME: {{\[}}[1, 4)] +# lower(0) and count(4) - testing that the lower bound matching language +# default is not rendered +# CHECK-NOT: {{.}} +# CHECK-SAME: [4] +# count(2) +# CHECK-SAME: [2] +# no attributes +# CHECK-NOT: {{.}} +# CHECK-SAME: []{{"\)$}} + # subroutine types # CHECK: DW_AT_type{{.*}}"int()" # CHECK: DW_AT_type{{.*}}"void(int)" # CHECK: DW_AT_type{{.*}}"void(int, int)" + +# array_type with a language with a default lower bound of 1 instead of 0 and +# an upper bound of 2. This describes an array with 2 elements (whereas with a +# default lower bound of 0 it would be an array of 3 elements) +# CHECK: DW_AT_type{{.*}}"int[2]" + .section .debug_str,"MS",@progbits,1 .Lint_name: .asciz "int" @@ -155,6 +183,11 @@ .byte 19 # DW_FORM_ref4 .byte 0 # EOM(1) .byte 0 # EOM(2) + .byte 18 # Abbreviation Code + .byte 0x21 # DW_TAG_subrange_type + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) .byte 0 # EOM(3) .section .debug_info,"",@progbits .Lcu_begin: @@ -196,9 +229,13 @@ .byte 2 # DW_AT_upper_bound .byte 12 # DW_AT_subrange_type .byte 1 # DW_AT_lower_bound - .byte 2 # DW_AT_count + .byte 3 # DW_AT_count + .byte 12 # DW_AT_subrange_type + .byte 0 # DW_AT_lower_bound + .byte 4 # DW_AT_count .byte 13 # DW_AT_subrange_type .byte 2 # DW_AT_count + .byte 18 # DW_AT_subrange_type .byte 0 # End Of Children Mark .Lsub_int_empty_type: .byte 15 # DW_TAG_subroutine_type @@ -236,3 +273,22 @@ .long .Lsub_void_int_int_type - .Lcu_begin # DW_AT_type .byte 0 # End Of Children Mark .Lunit_end: +.Lcu2_begin: + .long .Lcu2_unit_end - .Lcu2_unit_start # Length of Unit +.Lcu2_unit_start: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # DW_TAG_compile_unit + .short 13 # DW_AT_language +.Lcu2_int_type: + .byte 2 # DW_TAG_base_type + .long .Lint_name # DW_AT_name +.Lcu2_array_type: + .byte 8 # DW_TAG_array_type + .long .Lcu2_int_type - .Lcu2_begin # DW_AT_type + .byte 10 # DW_AT_subrange_type + .byte 2 # DW_AT_upper_bound + .byte 3 # DW_TAG_variable + .long .Lcu2_array_type - .Lcu2_begin # DW_AT_type +.Lcu2_unit_end: