[lld-macho] Emit STABS symbols for debugging, and drop debug sections

Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.

With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.

Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.

Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:

1. We can split up subsections by symbol even if `.subsections_with_symbols`
   is not set, but include constraints to ensure those subsections retain
   their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
   and I'm more inclined toward it, but I'm not sure if there are use cases
   that it doesn't handle well. As such I'm punting on the decision for now.

Reviewed By: clayborg

Differential Revision: https://reviews.llvm.org/D89257
This commit is contained in:
Jez Ng 2020-12-01 14:45:01 -08:00
parent ba4e45a0aa
commit 3fcb0eeb15
11 changed files with 383 additions and 8 deletions

View File

@ -9,6 +9,7 @@ add_lld_library(lldMachO2
UnwindInfoSection.cpp
Driver.cpp
DriverUtils.cpp
Dwarf.cpp
ExportTrie.cpp
InputFiles.cpp
InputSection.cpp

49
lld/MachO/Dwarf.cpp Normal file
View File

@ -0,0 +1,49 @@
//===- DWARF.cpp ----------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Dwarf.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "OutputSegment.h"
#include <memory>
using namespace lld;
using namespace lld::macho;
using namespace llvm;
std::unique_ptr<DwarfObject> DwarfObject::create(ObjFile *obj) {
auto dObj = std::make_unique<DwarfObject>();
bool hasDwarfInfo = false;
for (SubsectionMap subsecMap : obj->subsections) {
for (auto it : subsecMap) {
InputSection *isec = it.second;
if (!(isDebugSection(isec->flags) &&
isec->segname == segment_names::dwarf))
continue;
if (isec->name == "__debug_info") {
dObj->infoSection.Data = toStringRef(isec->data);
hasDwarfInfo = true;
continue;
}
if (StringRef *s = StringSwitch<StringRef *>(isec->name)
.Case("__debug_abbrev", &dObj->abbrevSection)
.Case("__debug_str", &dObj->strSection)
.Default(nullptr)) {
*s = toStringRef(isec->data);
hasDwarfInfo = true;
}
}
}
if (hasDwarfInfo)
return dObj;
return nullptr;
}

53
lld/MachO/Dwarf.h Normal file
View File

@ -0,0 +1,53 @@
//===- DWARF.h -----------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------------===//
#ifndef LLD_MACHO_DWARF_H
#define LLD_MACHO_DWARF_H
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFObject.h"
namespace lld {
namespace macho {
class ObjFile;
// Implements the interface between LLVM's DWARF-parsing utilities and LLD's
// InputSection structures.
class DwarfObject final : public llvm::DWARFObject {
public:
bool isLittleEndian() const override { return true; }
llvm::Optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &sec,
uint64_t pos) const override {
// TODO: implement this
return llvm::None;
}
void forEachInfoSections(
llvm::function_ref<void(const llvm::DWARFSection &)> f) const override {
f(infoSection);
}
llvm::StringRef getAbbrevSection() const override { return abbrevSection; }
llvm::StringRef getStrSection() const override { return strSection; }
// Returns an instance of DwarfObject if the given object file has the
// relevant DWARF debug sections.
static std::unique_ptr<DwarfObject> create(ObjFile *);
private:
llvm::DWARFSection infoSection;
llvm::StringRef abbrevSection;
llvm::StringRef strSection;
};
} // namespace macho
} // namespace lld
#endif

View File

@ -44,6 +44,7 @@
#include "InputFiles.h"
#include "Config.h"
#include "Driver.h"
#include "Dwarf.h"
#include "ExportTrie.h"
#include "InputSection.h"
#include "MachOStructs.h"
@ -54,6 +55,7 @@
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/DWARF.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Reproduce.h"
@ -387,6 +389,28 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) {
// parsed all the symbols.
for (size_t i = 0, n = subsections.size(); i < n; ++i)
parseRelocations(sectionHeaders[i], subsections[i]);
parseDebugInfo();
}
void ObjFile::parseDebugInfo() {
std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
if (!dObj)
return;
auto *ctx = make<DWARFContext>(
std::move(dObj), "",
[&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
[&](Error warning) {
warn(getName() + ": " + toString(std::move(warning)));
});
// TODO: Since object files can contain a lot of DWARF info, we should verify
// that we are parsing just the info we need
const DWARFContext::compile_unit_range &units = ctx->compile_units();
auto it = units.begin();
compileUnit = it->get();
assert(std::next(it) == units.end());
}
// The path can point to either a dylib or a .tbd file.

View File

@ -15,6 +15,7 @@
#include "lld/Common/Memory.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TextAPI/MachO/InterfaceFile.h"
@ -91,6 +92,11 @@ class ObjFile : public InputFile {
public:
explicit ObjFile(MemoryBufferRef mb);
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
llvm::DWARFUnit *compileUnit = nullptr;
private:
void parseDebugInfo();
};
// command-line -sectcreate file

View File

@ -35,15 +35,20 @@ struct Reloc {
llvm::PointerUnion<Symbol *, InputSection *> referent;
};
inline bool isZeroFill(uint8_t flags) {
inline bool isZeroFill(uint32_t flags) {
return llvm::MachO::isVirtualSection(flags & llvm::MachO::SECTION_TYPE);
}
inline bool isThreadLocalVariables(uint8_t flags) {
inline bool isThreadLocalVariables(uint32_t flags) {
return (flags & llvm::MachO::SECTION_TYPE) ==
llvm::MachO::S_THREAD_LOCAL_VARIABLES;
}
inline bool isDebugSection(uint32_t flags) {
return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
llvm::MachO::S_ATTR_DEBUG;
}
class InputSection {
public:
virtual ~InputSection() = default;

View File

@ -23,6 +23,7 @@ constexpr const char data[] = "__DATA";
constexpr const char linkEdit[] = "__LINKEDIT";
constexpr const char dataConst[] = "__DATA_CONST";
constexpr const char ld[] = "__LD"; // output only with -r
constexpr const char dwarf[] = "__DWARF";
} // namespace segment_names

View File

@ -20,7 +20,9 @@
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Path.h"
using namespace llvm;
using namespace llvm::support;
@ -574,17 +576,100 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
stringTableSection(stringTableSection) {}
uint64_t SymtabSection::getRawSize() const {
return symbols.size() * sizeof(structs::nlist_64);
return getNumSymbols() * sizeof(structs::nlist_64);
}
void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
StabsEntry stab(MachO::N_SO);
SmallString<261> dir(compileUnit->getCompilationDir());
StringRef sep = sys::path::get_separator();
// We don't use `path::append` here because we want an empty `dir` to result
// in an absolute path. `append` would give us a relative path for that case.
if (!dir.endswith(sep))
dir += sep;
stab.strx = stringTableSection.addString(
saver.save(dir + compileUnit->getUnitDIE().getShortName()));
stabs.emplace_back(std::move(stab));
}
void SymtabSection::emitEndSourceStab() {
StabsEntry stab(MachO::N_SO);
stab.sect = 1;
stabs.emplace_back(std::move(stab));
}
void SymtabSection::emitObjectFileStab(ObjFile *file) {
StabsEntry stab(MachO::N_OSO);
stab.sect = target->cpuSubtype;
SmallString<261> path(file->getName());
std::error_code ec = sys::fs::make_absolute(path);
if (ec)
fatal("failed to get absolute path for " + file->getName());
stab.strx = stringTableSection.addString(saver.save(path.str()));
stab.desc = 1;
stabs.emplace_back(std::move(stab));
}
void SymtabSection::emitFunStabs(Defined *defined) {
{
StabsEntry stab(MachO::N_FUN);
stab.sect = 1;
stab.strx = stringTableSection.addString(defined->getName());
stab.value = defined->getVA();
stabs.emplace_back(std::move(stab));
}
{
StabsEntry stab(MachO::N_FUN);
// FIXME this should be the size of the symbol. Using the section size in
// lieu is only correct if .subsections_via_symbols is set.
stab.value = defined->isec->getSize();
stabs.emplace_back(std::move(stab));
}
}
void SymtabSection::finalizeContents() {
// TODO support other symbol types
InputFile *lastFile = nullptr;
for (Symbol *sym : symtab->getSymbols()) {
// TODO support other symbol types
if (isa<Defined>(sym) || sym->isInGot() || sym->isInStubs()) {
sym->symtabIndex = symbols.size();
symbols.push_back({sym, stringTableSection.addString(sym->getName())});
}
// Emit STABS symbols so that dsymutil and/or the debugger can map address
// regions in the final binary to the source and object files from which
// they originated.
if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isAbsolute())
continue;
InputSection *isec = defined->isec;
// XXX is it right to assume that all symbols in __text are function
// symbols?
if (isec->name == "__text") {
ObjFile *file = dyn_cast<ObjFile>(isec->file);
assert(file);
if (!file->compileUnit)
continue;
if (lastFile == nullptr || lastFile != file) {
if (lastFile != nullptr)
emitEndSourceStab();
lastFile = file;
emitBeginSourceStab(file->compileUnit);
emitObjectFileStab(file);
}
emitFunStabs(defined);
}
// TODO emit stabs for non-function symbols too
}
}
if (!stabs.empty())
emitEndSourceStab();
}
void SymtabSection::writeTo(uint8_t *buf) const {
@ -602,12 +687,23 @@ void SymtabSection::writeTo(uint8_t *buf) const {
nList->n_type = MachO::N_EXT | MachO::N_SECT;
nList->n_sect = defined->isec->parent->index;
// For the N_SECT symbol type, n_value is the address of the symbol
nList->n_value = defined->value + defined->isec->getVA();
nList->n_value = defined->getVA();
}
nList->n_desc |= defined->isWeakDef() ? MachO::N_WEAK_DEF : 0;
}
++nList;
}
// Emit the stabs entries after the "real" symbols. We cannot emit them
// before as that would render Symbol::symtabIndex inaccurate.
for (const StabsEntry &entry : stabs) {
nList->n_strx = entry.strx;
nList->n_type = entry.type;
nList->n_sect = entry.sect;
nList->n_desc = entry.desc;
nList->n_value = entry.value;
++nList;
}
}
IndirectSymtabSection::IndirectSymtabSection()
@ -656,7 +752,7 @@ StringTableSection::StringTableSection()
uint32_t StringTableSection::addString(StringRef str) {
uint32_t strx = size;
strings.push_back(str);
strings.push_back(str); // TODO: consider deduplicating strings
size += str.size() + 1; // account for null terminator
return strx;
}

View File

@ -20,6 +20,10 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class DWARFUnit;
} // namespace llvm
namespace lld {
namespace macho {
@ -48,6 +52,7 @@ constexpr const char ehFrame[] = "__eh_frame";
class Defined;
class DylibSymbol;
class LoadCommand;
class ObjFile;
class SyntheticSection : public OutputSection {
public:
@ -405,16 +410,32 @@ struct SymtabEntry {
size_t strx;
};
struct StabsEntry {
uint8_t type;
uint32_t strx = 0;
uint8_t sect = 0;
uint16_t desc = 0;
uint64_t value = 0;
explicit StabsEntry(uint8_t type) : type(type) {}
};
class SymtabSection : public LinkEditSection {
public:
SymtabSection(StringTableSection &);
void finalizeContents();
size_t getNumSymbols() const { return symbols.size(); }
size_t getNumSymbols() const { return stabs.size() + symbols.size(); }
uint64_t getRawSize() const override;
void writeTo(uint8_t *buf) const override;
private:
void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
void emitEndSourceStab();
void emitObjectFileStab(ObjFile *);
void emitFunStabs(Defined *);
StringTableSection &stringTableSection;
std::vector<StabsEntry> stabs;
std::vector<SymtabEntry> symbols;
};

View File

@ -578,6 +578,10 @@ void Writer::createOutputSections() {
MapVector<std::pair<StringRef, StringRef>, MergedOutputSection *>
mergedOutputSections;
for (InputSection *isec : inputSections) {
// Instead of emitting DWARF sections, we emit STABS symbols to the object
// files that contain them.
if (isDebugSection(isec->flags) && isec->segname == segment_names::dwarf)
continue;
MergedOutputSection *&osec =
mergedOutputSections[{isec->segname, isec->name}];
if (osec == nullptr)
@ -591,8 +595,9 @@ void Writer::createOutputSections() {
if (unwindInfoSection && segname == segment_names::ld) {
assert(osec->name == section_names::compactUnwind);
unwindInfoSection->setCompactUnwindSection(osec);
} else
} else {
getOrCreateOutputSegment(segname)->addOutputSection(osec);
}
}
for (SyntheticSection *ssec : syntheticSections) {

114
lld/test/MachO/stabs.s Normal file
View File

@ -0,0 +1,114 @@
# REQUIRES: x86
# UNSUPPORTED: system-windows
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
# RUN: %lld -lSystem %t/test.o %t/foo.o -o %t/test
# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t
## Check that we emit absolute paths to the object files in our OSO entries
## even if our inputs are relative paths.
# RUN: cd %t && %lld -lSystem test.o foo.o -o test
# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t
# CHECK-DAG: [[#%x, MAIN:]] T _main
# CHECK-DAG: [[#%x, FOO: ]] T _foo
# CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp
# CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/test.o
# CHECK-NEXT: [[#MAIN]] - 01 0000 FUN _main
# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
# CHECK-NEXT: 0000000000000000 - 01 0000 SO
# CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp
# CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/foo.o
# CHECK-NEXT: [[#FOO]] - 01 0000 FUN _foo
# CHECK-NEXT: 0000000000000001 - 00 0000 FUN
# CHECK-NEXT: 0000000000000000 - 01 0000 SO
#--- test.s
.text
.globl _main
_main:
Lfunc_begin0:
retq
Lfunc_end0:
.section __DWARF,__debug_str,regular,debug
.asciz "test.cpp" ## string offset=0
.asciz "/tmp" ## string offset=9
.section __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
.byte 1 ## Abbreviation Code
.byte 17 ## DW_TAG_compile_unit
.byte 1 ## DW_CHILDREN_yes
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 27 ## DW_AT_comp_dir
.byte 14 ## DW_FORM_strp
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Ldebug_info_start0:
.short 4 ## DWARF version number
.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
.long Lset1
.byte 8 ## Address Size (in bytes)
.byte 1 ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit
.long 0 ## DW_AT_name
.long 9 ## DW_AT_comp_dir
.quad Lfunc_begin0 ## DW_AT_low_pc
.set Lset3, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc
.long Lset3
.byte 0 ## End Of Children Mark
Ldebug_info_end0:
.subsections_via_symbols
.section __DWARF,__debug_line,regular,debug
#--- foo.s
.text
.globl _foo
_foo:
Lfunc_begin0:
retq
Lfunc_end0:
.section __DWARF,__debug_str,regular,debug
.asciz "foo.cpp" ## string offset=0
.asciz "" ## string offset=8
.section __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
.byte 1 ## Abbreviation Code
.byte 17 ## DW_TAG_compile_unit
.byte 1 ## DW_CHILDREN_yes
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 27 ## DW_AT_comp_dir
.byte 14 ## DW_FORM_strp
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.section __DWARF,__debug_info,regular,debug
.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
.long Lset0
Ldebug_info_start0:
.short 4 ## DWARF version number
.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
.long Lset1
.byte 8 ## Address Size (in bytes)
.byte 1 ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit
.long 0 ## DW_AT_name
.long 8 ## DW_AT_comp_dir
.quad Lfunc_begin0 ## DW_AT_low_pc
.set Lset3, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc
.long Lset3
.byte 0 ## End Of Children Mark
Ldebug_info_end0:
.subsections_via_symbols
.section __DWARF,__debug_line,regular,debug