[lld-macho] Add basic symbol table output

This diff implements basic support for writing a symbol table.

- Attributes are loosely supported for extern symbols and not at all for
  other types

Immediate future work will involve implementing section merging.

Initial version by Kellie Medlin <kelliem@fb.com>

Differential Revision: https://reviews.llvm.org/D76742
This commit is contained in:
Jez Ng 2020-04-27 13:28:49 -07:00 committed by Shoaib Meenai
parent e4872d7f08
commit a3d95a50ee
4 changed files with 176 additions and 2 deletions

View File

@ -9,6 +9,7 @@
#include "SyntheticSections.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
@ -128,6 +129,59 @@ void BindingSection::writeTo(uint8_t *buf) {
memcpy(buf, contents.data(), contents.size());
}
SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
}
size_t SymtabSection::getSize() const {
return symbols.size() * sizeof(nlist_64);
}
void SymtabSection::finalizeContents() {
// TODO: We should filter out some symbols.
for (Symbol *sym : symtab->getSymbols())
symbols.push_back({sym, stringTableSection.addString(sym->getName())});
}
void SymtabSection::writeTo(uint8_t *buf) {
auto *nList = reinterpret_cast<nlist_64 *>(buf);
for (const SymtabEntry &entry : symbols) {
// TODO support other symbol types
// TODO populate n_desc
if (auto defined = dyn_cast<Defined>(entry.sym)) {
nList->n_strx = entry.strx;
nList->n_type = N_EXT | N_SECT;
nList->n_sect = defined->isec->sectionIndex;
// For the N_SECT symbol type, n_value is the address of the symbol
nList->n_value = defined->value + defined->isec->addr;
}
++nList;
}
}
StringTableSection::StringTableSection() {
segname = segment_names::linkEdit;
name = section_names::stringTable;
}
uint32_t StringTableSection::addString(StringRef str) {
uint32_t strx = size;
strings.push_back(str);
size += str.size() + 1; // account for null terminator
return strx;
}
void StringTableSection::writeTo(uint8_t *buf) {
uint32_t off = 0;
for (StringRef str : strings) {
memcpy(buf + off, str.data(), str.size());
off += str.size() + 1; // account for null terminator
}
}
InStruct in;
} // namespace macho

View File

@ -23,6 +23,8 @@ namespace section_names {
constexpr const char *pageZero = "__pagezero";
constexpr const char *header = "__mach_header";
constexpr const char *binding = "__binding";
constexpr const char *symbolTable = "__symbol_table";
constexpr const char *stringTable = "__string_table";
} // namespace section_names
@ -93,6 +95,49 @@ public:
SmallVector<char, 128> contents;
};
// Stores the strings referenced by the symbol table.
class StringTableSection : public InputSection {
public:
StringTableSection();
// Returns the start offset of the added string.
uint32_t addString(StringRef);
size_t getSize() const override { return size; }
// Like other sections in __LINKEDIT, the string table section is special: its
// offsets are recorded in the LC_SYMTAB load command, instead of in section
// headers.
bool isHidden() const override { return true; }
void writeTo(uint8_t *buf) override;
private:
// An n_strx value of 0 always indicates the empty string, so we must locate
// our non-empty string values at positive offsets in the string table.
// Therefore we insert a dummy value at position zero.
std::vector<StringRef> strings{"\0"};
size_t size = 1;
};
struct SymtabEntry {
Symbol *sym;
size_t strx;
};
class SymtabSection : public InputSection {
public:
SymtabSection(StringTableSection &);
void finalizeContents();
size_t getNumSymbols() const { return symbols.size(); }
size_t getSize() const override;
// Like other sections in __LINKEDIT, the symtab section is special: its
// offsets are recorded in the LC_SYMTAB load command, instead of in section
// headers.
bool isHidden() const override { return true; }
void writeTo(uint8_t *buf) override;
private:
StringTableSection &stringTableSection;
std::vector<SymtabEntry> symbols;
};
struct InStruct {
GotSection *got = nullptr;
};

View File

@ -52,6 +52,8 @@ public:
uint64_t fileOff = 0;
MachHeaderSection *headerSection = nullptr;
BindingSection *bindingSection = nullptr;
SymtabSection *symtabSection = nullptr;
StringTableSection *stringTableSection = nullptr;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@ -163,13 +165,23 @@ class LCMain : public LoadCommand {
class LCSymtab : public LoadCommand {
public:
LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
: symtabSection(symtabSection), stringTableSection(stringTableSection) {}
uint32_t getSize() const override { return sizeof(symtab_command); }
void writeTo(uint8_t *buf) const override {
auto *c = reinterpret_cast<symtab_command *>(buf);
c->cmd = LC_SYMTAB;
c->cmdsize = getSize();
c->symoff = symtabSection->getFileOffset();
c->nsyms = symtabSection->getNumSymbols();
c->stroff = stringTableSection->getFileOffset();
c->strsize = stringTableSection->getFileSize();
}
SymtabSection *symtabSection = nullptr;
StringTableSection *stringTableSection = nullptr;
};
class LCLoadDylib : public LoadCommand {
@ -238,7 +250,12 @@ public:
{defaultPosition, {}},
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
// sections must be ordered after other sections).
{segment_names::linkEdit, {section_names::binding}},
{segment_names::linkEdit,
{
section_names::binding,
section_names::symbolTable,
section_names::stringTable,
}},
};
for (uint32_t i = 0, n = ordering.size(); i < n; ++i) {
@ -294,7 +311,8 @@ void Writer::scanRelocations() {
void Writer::createLoadCommands() {
headerSection->addLoadCommand(make<LCDyldInfo>(bindingSection));
headerSection->addLoadCommand(make<LCLoadDylinker>());
headerSection->addLoadCommand(make<LCSymtab>());
headerSection->addLoadCommand(
make<LCSymtab>(symtabSection, stringTableSection));
headerSection->addLoadCommand(make<LCDysymtab>());
headerSection->addLoadCommand(make<LCMain>());
@ -323,6 +341,8 @@ void Writer::createLoadCommands() {
void Writer::createHiddenSections() {
headerSection = createInputSection<MachHeaderSection>();
bindingSection = createInputSection<BindingSection>();
stringTableSection = createInputSection<StringTableSection>();
symtabSection = createInputSection<SymtabSection>(*stringTableSection);
createInputSection<PageZeroSection>();
}
@ -405,6 +425,7 @@ void Writer::run() {
// Fill __LINKEDIT contents.
bindingSection->finalizeContents();
symtabSection->finalizeContents();
// Now that __LINKEDIT is filled out, do a proper calculation of its
// addresses and offsets. We don't have to recalculate the other segments

54
lld/test/MachO/symtab.s Normal file
View File

@ -0,0 +1,54 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: lld -flavor darwinnew -o %t %t.o
# RUN: llvm-readobj -symbols %t | FileCheck %s
# CHECK: Symbols [
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: _main
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __text (0x1)
# CHECK-NEXT: RefType:
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]
# CHECK-NEXT: Value:
# CHECK-NEXT: }
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: bar
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __text (0x1)
# CHECK-NEXT: RefType:
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]
# CHECK-NEXT: Value:
# CHECK-NEXT: }
# CHECK-NEXT: Symbol {
# CHECK-NEXT: Name: foo
# CHECK-NEXT: Extern
# CHECK-NEXT: Type: Section (0xE)
# CHECK-NEXT: Section: __data
# CHECK-NEXT: RefType:
# CHECK-NEXT: Flags [ (0x0)
# CHECK-NEXT: ]
# CHECK-NEXT: Value:
# CHECK-NEXT: }
# CHECK-NEXT: ]
.data
.global foo
foo:
.asciz "Hello world!\n"
.text
.global bar
.global _main
_main:
mov $0, %rax
ret
bar:
mov $2, %rax
ret