[lld-macho] Support binding dysyms to any section

Previously, we only supported binding dysyms to the GOT. This
diff adds support for binding them to any arbitrary section. C++
programs appear to use this, I believe for vtables and type_info.

This diff also makes our bind opcode encoding a bit smarter -- we now
encode just the differences between bindings, which will make things
more compact.

I was initially concerned about the performance overhead of iterating
over these relocations, but it turns out that the number of such
relocations is small. A quick analysis of my llvm-project build
directory showed that < 1.3% out of ~7M relocations are RELOC_UNSIGNED
bindings to symbols (including both dynamic and static symbols).

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D83103
This commit is contained in:
Jez Ng 2020-07-02 21:19:55 -07:00
parent 7ec6927bad
commit 53eb7fda51
6 changed files with 143 additions and 47 deletions

View File

@ -34,7 +34,8 @@ struct X86_64 : TargetInfo {
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const override;
void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
const Reloc &) override;
uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
};
@ -208,8 +209,9 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
in.stubHelper->addr);
}
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
switch (type) {
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
const InputSection *isec, const Reloc &r) {
switch (r.type) {
case X86_64_RELOC_GOT_LOAD:
// TODO: implement mov -> lea relaxation for non-dynamic symbols
case X86_64_RELOC_GOT:
@ -220,7 +222,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
in.stubs->addEntry(*dysym);
break;
}
case X86_64_RELOC_UNSIGNED:
case X86_64_RELOC_UNSIGNED: {
if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
if (r.length != 3) {
error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
dysym->getName() + " must have r_length = 3");
return;
}
in.binding->addEntry(dysym, isec, r.offset, r.addend);
}
break;
}
case X86_64_RELOC_SIGNED:
case X86_64_RELOC_SIGNED_1:
case X86_64_RELOC_SIGNED_2:
@ -228,7 +240,7 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
break;
case X86_64_RELOC_SUBTRACTOR:
case X86_64_RELOC_TLV:
fatal("TODO: handle relocation type " + std::to_string(type));
fatal("TODO: handle relocation type " + std::to_string(r.type));
break;
default:
llvm_unreachable("unexpected relocation type");

View File

@ -11,6 +11,7 @@
#include "ExportTrie.h"
#include "InputFiles.h"
#include "MachOStructs.h"
#include "MergedOutputSection.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
@ -95,7 +96,68 @@ void GotSection::writeTo(uint8_t *buf) const {
BindingSection::BindingSection()
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
bool BindingSection::isNeeded() const {
return bindings.size() != 0 || in.got->isNeeded();
}
namespace {
struct Binding {
OutputSegment *segment = nullptr;
uint64_t offset = 0;
int64_t addend = 0;
uint8_t ordinal = 0;
};
} // namespace
// Encode a sequence of opcodes that tell dyld to write the address of dysym +
// addend at osec->addr + outSecOff.
//
// The bind opcode "interpreter" remembers the values of each binding field, so
// we only need to encode the differences between bindings. Hence the use of
// lastBinding.
static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
uint64_t outSecOff, int64_t addend,
Binding &lastBinding, raw_svector_ostream &os) {
using namespace llvm::MachO;
OutputSegment *seg = osec->parent;
uint64_t offset = osec->getSegmentOffset() + outSecOff;
if (lastBinding.segment != seg) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
seg->index);
encodeULEB128(offset, os);
lastBinding.segment = seg;
lastBinding.offset = offset;
} else if (lastBinding.offset != offset) {
assert(lastBinding.offset <= offset);
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
encodeULEB128(offset - lastBinding.offset, os);
lastBinding.offset = offset;
}
if (lastBinding.ordinal != dysym.file->ordinal) {
if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
dysym.file->ordinal);
} else {
error("TODO: Support larger dylib symbol ordinals");
return;
}
lastBinding.ordinal = dysym.file->ordinal;
}
if (lastBinding.addend != addend) {
os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
encodeSLEB128(addend, os);
lastBinding.addend = addend;
}
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
<< dysym.getName() << '\0'
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
// DO_BIND causes dyld to both perform the binding and increment the offset
lastBinding.offset += WordSize;
}
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
// interprets to update a record with the following fields:
@ -111,43 +173,39 @@ bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
// entry. It does *not* clear the record state after doing the bind, so
// subsequent opcodes only need to encode the differences between bindings.
void BindingSection::finalizeContents() {
if (!isNeeded())
return;
raw_svector_ostream os{contents};
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
in.got->parent->index);
encodeULEB128(in.got->getSegmentOffset(), os);
uint32_t entries_to_skip = 0;
Binding lastBinding;
bool didEncode = false;
size_t gotIdx = 0;
for (const Symbol *sym : in.got->getEntries()) {
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (entries_to_skip != 0) {
os << static_cast<uint8_t>(MachO::BIND_OPCODE_ADD_ADDR_ULEB);
encodeULEB128(WordSize * entries_to_skip, os);
entries_to_skip = 0;
didEncode = true;
encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
}
++gotIdx;
}
// TODO: Implement compact encoding -- we only need to encode the
// differences between consecutive symbol entries.
if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
dysym->file->ordinal);
} else {
error("TODO: Support larger dylib symbol ordinals");
continue;
// Sorting the relocations by segment and address allows us to encode them
// more compactly.
llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
OutputSegment *segA = a.isec->parent->parent;
OutputSegment *segB = b.isec->parent->parent;
if (segA != segB)
return segA->fileOff < segB->fileOff;
OutputSection *osecA = a.isec->parent;
OutputSection *osecB = b.isec->parent;
if (osecA != osecB)
return osecA->addr < osecB->addr;
if (a.isec != b.isec)
return a.isec->outSecOff < b.isec->outSecOff;
return a.offset < b.offset;
});
for (const BindingEntry &b : bindings) {
didEncode = true;
encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
b.addend, lastBinding, os);
}
os << static_cast<uint8_t>(
MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
<< dysym->getName() << '\0'
<< static_cast<uint8_t>(MachO::BIND_OPCODE_SET_TYPE_IMM |
MachO::BIND_TYPE_POINTER)
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND);
} else {
// We have a defined symbol with a pre-populated address; skip over it.
++entries_to_skip;
}
}
if (didEncode)
os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
}

View File

@ -94,6 +94,16 @@ private:
llvm::SetVector<const Symbol *> entries;
};
struct BindingEntry {
const DylibSymbol *dysym;
const InputSection *isec;
uint64_t offset;
int64_t addend;
BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
uint64_t offset, int64_t addend)
: dysym(dysym), isec(isec), offset(offset), addend(addend) {}
};
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
class BindingSection : public SyntheticSection {
public:
@ -107,6 +117,13 @@ public:
bool isNeeded() const override;
void writeTo(uint8_t *buf) const override;
void addEntry(const DylibSymbol *dysym, const InputSection *isec,
uint64_t offset, int64_t addend) {
bindings.emplace_back(dysym, isec, offset, addend);
}
private:
std::vector<BindingEntry> bindings;
SmallVector<char, 128> contents;
};
@ -256,6 +273,7 @@ private:
};
struct InStruct {
BindingSection *binding = nullptr;
GotSection *got = nullptr;
LazyPointerSection *lazyPointers = nullptr;
StubsSection *stubs = nullptr;

View File

@ -53,7 +53,8 @@ public:
// depending on the relocation type. prepareSymbolRelocation() will set up the
// GOT/stubs entries, and getSymbolVA() will return the addresses of those
// entries.
virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
const Reloc &) = 0;
virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
uint32_t cpuType;

View File

@ -54,7 +54,6 @@ public:
uint64_t addr = 0;
uint64_t fileOff = 0;
MachHeaderSection *headerSection = nullptr;
BindingSection *bindingSection = nullptr;
LazyBindingSection *lazyBindingSection = nullptr;
ExportSection *exportSection = nullptr;
StringTableSection *stringTableSection = nullptr;
@ -254,7 +253,7 @@ void Writer::scanRelocations() {
error("undefined symbol " + s->getName() + ", referenced from " +
sys::path::filename(isec->file->getName()));
else
target->prepareSymbolRelocation(*s, r.type);
target->prepareSymbolRelocation(*s, isec, r);
}
}
}
@ -262,7 +261,7 @@ void Writer::scanRelocations() {
void Writer::createLoadCommands() {
headerSection->addLoadCommand(
make<LCDyldInfo>(bindingSection, lazyBindingSection, exportSection));
make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
headerSection->addLoadCommand(
make<LCSymtab>(symtabSection, stringTableSection));
headerSection->addLoadCommand(make<LCDysymtab>());
@ -404,7 +403,6 @@ static void sortSegmentsAndSections() {
void Writer::createOutputSections() {
// First, create hidden sections
headerSection = make<MachHeaderSection>();
bindingSection = make<BindingSection>();
lazyBindingSection = make<LazyBindingSection>();
stringTableSection = make<StringTableSection>();
symtabSection = make<SymtabSection>(*stringTableSection);
@ -513,7 +511,7 @@ void Writer::run() {
assignAddresses(seg);
// Fill __LINKEDIT contents.
bindingSection->finalizeContents();
in.binding->finalizeContents();
lazyBindingSection->finalizeContents();
exportSection->finalizeContents();
symtabSection->finalizeContents();
@ -535,6 +533,7 @@ void Writer::run() {
void macho::writeResult() { Writer().run(); }
void macho::createSyntheticSections() {
in.binding = make<BindingSection>();
in.got = make<GotSection>();
in.lazyPointers = make<LazyPointerSection>();
in.stubs = make<StubsSection>();

View File

@ -34,6 +34,9 @@
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
# CHECK-DAG: __DATA __data 0x[[#%x, DATA_ADDR:]] pointer 0 libhello _hello_world
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 8]] pointer 8 libhello _hello_its_me
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 16]] pointer -15 libgoodbye _goodbye_world
.section __TEXT,__text
.globl _main
@ -59,3 +62,8 @@ _main:
syscall
mov $0, %rax
ret
.data
.quad _hello_world
.quad _hello_its_me + 0x8
.quad _goodbye_world - 0xf