forked from OSchip/llvm-project
DebugInfo: Use base address selection entries in debug_ranges to reduce relocations
(from comments in the test) Group ranges in a range list that apply to the same section and use a base address selection entry to reduce the number of relocations to one reloc per section per range list. DWARF5 debug_rnglist will be more efficient than this in terms of relocations, but it's still better than one reloc per entry in a range list. This is an object/executable size tradeoff - shrinking objects, but growing the linked executable. In one large binary tested, total object size (not just debug info) shrank by 16%, entirely relocation entries. Linked executable grew by 4%. This was with compressed debug info in the objects, uncompressed in the linked executable. Without compression in the objects, the win would be smaller (the growth of debug_ranges itself would be more significant). llvm-svn: 309526
This commit is contained in:
parent
16a2f5ac8e
commit
89c81a0b91
|
@ -1865,12 +1865,36 @@ void DwarfDebug::emitDebugRanges() {
|
||||||
// Emit our symbol so we can find the beginning of the range.
|
// Emit our symbol so we can find the beginning of the range.
|
||||||
Asm->OutStreamer->EmitLabel(List.getSym());
|
Asm->OutStreamer->EmitLabel(List.getSym());
|
||||||
|
|
||||||
|
// Gather all the ranges that apply to the same section so they can share
|
||||||
|
// a base address entry.
|
||||||
|
MapVector<const MCSection *, std::vector<const RangeSpan *>> MV;
|
||||||
for (const RangeSpan &Range : List.getRanges()) {
|
for (const RangeSpan &Range : List.getRanges()) {
|
||||||
const MCSymbol *Begin = Range.getStart();
|
MV[&Range.getStart()->getSection()].push_back(&Range);
|
||||||
const MCSymbol *End = Range.getEnd();
|
}
|
||||||
|
|
||||||
|
auto *CUBase = TheCU->getBaseAddress();
|
||||||
|
for (const auto &P : MV) {
|
||||||
|
// Don't bother with a base address entry if there's only one range in
|
||||||
|
// this section in this range list - for example ranges for a CU will
|
||||||
|
// usually consist of single regions from each of many sections
|
||||||
|
// (-ffunction-sections, or just C++ inline functions) except under LTO
|
||||||
|
// or optnone where there may be holes in a single CU's section
|
||||||
|
// contrubutions.
|
||||||
|
auto *Base = CUBase;
|
||||||
|
if (!Base && P.second.size() > 1) {
|
||||||
|
// FIXME/use care: This may not be a useful base address if it's not
|
||||||
|
// the lowest address/range in this object.
|
||||||
|
Base = P.second.front()->getStart();
|
||||||
|
Asm->OutStreamer->EmitIntValue(-1, Size);
|
||||||
|
Asm->OutStreamer->EmitSymbolValue(Base, Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto *RS : P.second) {
|
||||||
|
const MCSymbol *Begin = RS->getStart();
|
||||||
|
const MCSymbol *End = RS->getEnd();
|
||||||
assert(Begin && "Range without a begin symbol?");
|
assert(Begin && "Range without a begin symbol?");
|
||||||
assert(End && "Range without an end symbol?");
|
assert(End && "Range without an end symbol?");
|
||||||
if (auto *Base = TheCU->getBaseAddress()) {
|
if (Base) {
|
||||||
Asm->EmitLabelDifference(Begin, Base, Size);
|
Asm->EmitLabelDifference(Begin, Base, Size);
|
||||||
Asm->EmitLabelDifference(End, Base, Size);
|
Asm->EmitLabelDifference(End, Base, Size);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1878,6 +1902,7 @@ void DwarfDebug::emitDebugRanges() {
|
||||||
Asm->OutStreamer->EmitSymbolValue(End, Size);
|
Asm->OutStreamer->EmitSymbolValue(End, Size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// And terminate the list with two 0 values.
|
// And terminate the list with two 0 values.
|
||||||
Asm->OutStreamer->EmitIntValue(0, Size);
|
Asm->OutStreamer->EmitIntValue(0, Size);
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
; RUN: llc -filetype=asm -mtriple=x86_64-pc-linux-gnu %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
; Group ranges in a range list that apply to the same section and use a base
|
||||||
|
; address selection entry to reduce the number of relocations to one reloc per
|
||||||
|
; section per range list. DWARF5 debug_rnglist will be more efficient than this
|
||||||
|
; in terms of relocations, but it's still better than one reloc per entry in a
|
||||||
|
; range list.
|
||||||
|
|
||||||
|
; This is an object/executable size tradeoff - shrinking objects, but growing
|
||||||
|
; the linked executable. In one large binary tested, total object size (not just
|
||||||
|
; debug info) shrank by 16%, entirely relocation entries. Linked executable
|
||||||
|
; grew by 4%. This was with compressed debug info in the objects, uncompressed
|
||||||
|
; in the linked executable. Without compression in the objects, the win would be
|
||||||
|
; smaller (the growth of debug_ranges itself would be more significant).
|
||||||
|
|
||||||
|
; CHECK: {{^.Ldebug_ranges0}}
|
||||||
|
; CHECK: .quad -1
|
||||||
|
; CHECK: .quad .Lfunc_begin0
|
||||||
|
; CHECK: .quad .Lfunc_begin0-.Lfunc_begin0
|
||||||
|
; CHECK: .quad .Lfunc_end0-.Lfunc_begin0
|
||||||
|
; CHECK: .quad .Lfunc_begin2-.Lfunc_begin0
|
||||||
|
; CHECK: .quad .Lfunc_end2-.Lfunc_begin0
|
||||||
|
; CHECK: .quad .Lfunc_begin1
|
||||||
|
; CHECK: .quad .Lfunc_end1
|
||||||
|
; CHECK: .quad 0
|
||||||
|
; CHECK: .quad 0
|
||||||
|
|
||||||
|
|
||||||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||||||
|
define void @_Z2f1v() #0 !dbg !7 {
|
||||||
|
entry:
|
||||||
|
ret void, !dbg !10
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||||||
|
define void @_Z2f2v() #0 section "narf" !dbg !11 {
|
||||||
|
entry:
|
||||||
|
ret void, !dbg !12
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||||||
|
define void @_Z2f3v() #0 !dbg !13 {
|
||||||
|
entry:
|
||||||
|
ret void, !dbg !14
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||||
|
|
||||||
|
!llvm.dbg.cu = !{!0}
|
||||||
|
!llvm.module.flags = !{!3, !4, !5}
|
||||||
|
!llvm.ident = !{!6}
|
||||||
|
|
||||||
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 309510) (llvm/trunk 309516)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||||
|
!1 = !DIFile(filename: "range.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
|
||||||
|
!2 = !{}
|
||||||
|
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||||
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||||
|
!5 = !{i32 1, !"wchar_size", i32 4}
|
||||||
|
!6 = !{!"clang version 6.0.0 (trunk 309510) (llvm/trunk 309516)"}
|
||||||
|
!7 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||||
|
!8 = !DISubroutineType(types: !9)
|
||||||
|
!9 = !{null}
|
||||||
|
!10 = !DILocation(line: 2, column: 1, scope: !7)
|
||||||
|
!11 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||||
|
!12 = !DILocation(line: 4, column: 1, scope: !11)
|
||||||
|
!13 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 5, type: !8, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||||
|
!14 = !DILocation(line: 6, column: 1, scope: !13)
|
Loading…
Reference in New Issue