[lld-macho] Implement weak binding for branch relocations

Since there is no "weak lazy" lookup, function calls to weak symbols are
always non-lazily bound. We emit both regular non-lazy bindings as well
as weak bindings, in order that the weak bindings may overwrite the
non-lazy bindings if an appropriate symbol is found at runtime. However,
the bound addresses will still be written (non-lazily) into the
LazyPointerSection.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D86573
This commit is contained in:
Jez Ng 2020-08-27 15:54:42 -07:00
parent 7f717b6d1f
commit e263287c79
8 changed files with 143 additions and 67 deletions

View File

@ -29,7 +29,7 @@ struct X86_64 : TargetInfo {
const relocation_info &) const override;
void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override;
void writeStub(uint8_t *buf, const DylibSymbol &) const override;
void writeStub(uint8_t *buf, const macho::Symbol &) const override;
void writeStubHelperHeader(uint8_t *buf) const override;
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const override;
@ -182,7 +182,7 @@ static constexpr uint8_t stub[] = {
0xff, 0x25, 0, 0, 0, 0, // jmpq *__la_symbol_ptr(%rip)
};
void X86_64::writeStub(uint8_t *buf, const DylibSymbol &sym) const {
void X86_64::writeStub(uint8_t *buf, const macho::Symbol &sym) const {
memcpy(buf, stub, 2); // just copy the two nonzero bytes
uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
writeRipRelative(buf, stubAddr, sizeof(stub),
@ -231,9 +231,23 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol *sym,
break;
}
case X86_64_RELOC_BRANCH: {
// TODO: weak dysyms should go into the weak binding section instead
if (auto *dysym = dyn_cast<DylibSymbol>(sym))
in.stubs->addEntry(dysym);
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (in.stubs->addEntry(dysym)) {
if (sym->isWeakDef()) {
in.binding->addEntry(dysym, in.lazyPointers,
sym->stubsIndex * WordSize);
in.weakBinding->addEntry(sym, in.lazyPointers,
sym->stubsIndex * WordSize);
} else {
in.lazyBinding->addEntry(dysym);
}
}
} else if (auto *defined = dyn_cast<Defined>(sym)) {
if (defined->isWeakDef() && defined->isExternal())
if (in.stubs->addEntry(sym))
in.weakBinding->addEntry(sym, in.lazyPointers,
sym->stubsIndex * WordSize);
}
break;
}
case X86_64_RELOC_UNSIGNED: {
@ -277,10 +291,11 @@ uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym,
case X86_64_RELOC_GOT_LOAD:
case X86_64_RELOC_GOT:
return in.got->addr + sym.gotIndex * WordSize;
case X86_64_RELOC_BRANCH:
if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
return in.stubs->addr + dysym->stubsIndex * sizeof(stub);
case X86_64_RELOC_BRANCH: {
if (sym.isInStubs())
return in.stubs->addr + sym.stubsIndex * sizeof(stub);
return sym.getVA();
}
case X86_64_RELOC_UNSIGNED:
case X86_64_RELOC_SIGNED:
case X86_64_RELOC_SIGNED_1:

View File

@ -60,11 +60,16 @@ public:
// Whether this symbol is in the GOT or TLVPointer sections.
bool isInGot() const { return gotIndex != UINT32_MAX; }
// Whether this symbol is in the StubsSection.
bool isInStubs() const { return stubsIndex != UINT32_MAX; }
// The index of this symbol in the GOT or the TLVPointer section, depending
// on whether it is a thread-local. A given symbol cannot be referenced by
// both these sections at once.
uint32_t gotIndex = UINT32_MAX;
uint32_t stubsIndex = UINT32_MAX;
protected:
Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}
@ -114,13 +119,13 @@ public:
: Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {}
bool isWeakDef() const override { return weakDef; }
bool isTlv() const override { return tlv; }
bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
DylibFile *file;
uint32_t stubsIndex = UINT32_MAX;
uint32_t stubsHelperIndex = UINT32_MAX;
uint32_t lazyBindOffset = UINT32_MAX;
private:

View File

@ -277,15 +277,17 @@ uint64_t StubsSection::getSize() const {
void StubsSection::writeTo(uint8_t *buf) const {
size_t off = 0;
for (const DylibSymbol *sym : in.stubs->getEntries()) {
for (const Symbol *sym : entries) {
target->writeStub(buf + off, *sym);
off += target->stubSize;
}
}
void StubsSection::addEntry(DylibSymbol *sym) {
if (entries.insert(sym))
bool StubsSection::addEntry(Symbol *sym) {
bool inserted = entries.insert(sym);
if (inserted)
sym->stubsIndex = entries.size() - 1;
return inserted;
}
StubHelperSection::StubHelperSection()
@ -293,17 +295,15 @@ StubHelperSection::StubHelperSection()
uint64_t StubHelperSection::getSize() const {
return target->stubHelperHeaderSize +
in.stubs->getEntries().size() * target->stubHelperEntrySize;
in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
}
bool StubHelperSection::isNeeded() const {
return !in.stubs->getEntries().empty();
}
bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
void StubHelperSection::writeTo(uint8_t *buf) const {
target->writeStubHelperHeader(buf);
size_t off = target->stubHelperHeaderSize;
for (const DylibSymbol *sym : in.stubs->getEntries()) {
for (const DylibSymbol *sym : in.lazyBinding->getEntries()) {
target->writeStubHelperEntry(buf + off, *sym, addr + off);
off += target->stubHelperEntrySize;
}
@ -347,10 +347,17 @@ bool LazyPointerSection::isNeeded() const {
void LazyPointerSection::writeTo(uint8_t *buf) const {
size_t off = 0;
for (const DylibSymbol *sym : in.stubs->getEntries()) {
uint64_t stubHelperOffset = target->stubHelperHeaderSize +
sym->stubsIndex * target->stubHelperEntrySize;
for (const Symbol *sym : in.stubs->getEntries()) {
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
if (dysym->hasStubsHelper()) {
uint64_t stubHelperOffset =
target->stubHelperHeaderSize +
dysym->stubsHelperIndex * target->stubHelperEntrySize;
write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
}
} else {
write64le(buf + off, sym->getVA());
}
off += WordSize;
}
}
@ -358,12 +365,10 @@ void LazyPointerSection::writeTo(uint8_t *buf) const {
LazyBindingSection::LazyBindingSection()
: LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {}
bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); }
void LazyBindingSection::finalizeContents() {
// TODO: Just precompute output size here instead of writing to a temporary
// buffer
for (DylibSymbol *sym : in.stubs->getEntries())
for (DylibSymbol *sym : entries)
sym->lazyBindOffset = encode(*sym);
}
@ -371,6 +376,11 @@ void LazyBindingSection::writeTo(uint8_t *buf) const {
memcpy(buf, contents.data(), contents.size());
}
void LazyBindingSection::addEntry(DylibSymbol *dysym) {
if (entries.insert(dysym))
dysym->stubsHelperIndex = entries.size() - 1;
}
// Unlike the non-lazy binding section, the bind opcodes in this section aren't
// interpreted all at once. Rather, dyld will start interpreting opcodes at a
// given offset, typically only binding a single symbol before it finds a

View File

@ -222,13 +222,15 @@ void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion,
// The following sections implement lazy symbol binding -- very similar to the
// PLT mechanism in ELF.
//
// ELF's .plt section is broken up into two sections in Mach-O: StubsSection and
// StubHelperSection. Calls to functions in dylibs will end up calling into
// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
// and StubHelperSection. Calls to functions in dylibs will end up calling into
// StubsSection, which contains indirect jumps to addresses stored in the
// LazyPointerSection (the counterpart to ELF's .plt.got).
//
// Initially, the LazyPointerSection contains addresses that point into one of
// the entry points in the middle of the StubHelperSection. The code in
// We will first describe how non-weak symbols are handled.
//
// At program start, the LazyPointerSection contains addresses that point into
// one of the entry points in the middle of the StubHelperSection. The code in
// StubHelperSection will push on the stack an offset into the
// LazyBindingSection. The push is followed by a jump to the beginning of the
// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
@ -236,10 +238,17 @@ void addNonLazyBindingEntries(const Symbol *, SectionPointerUnion,
// the GOT.
//
// The stub binder will look up the bind opcodes in the LazyBindingSection at
// the given offset. The bind opcodes will tell the binder to update the address
// in the LazyPointerSection to point to the symbol, so that subsequent calls
// don't have to redo the symbol resolution. The binder will then jump to the
// resolved symbol.
// the given offset. The bind opcodes will tell the binder to update the
// address in the LazyPointerSection to point to the symbol, so that subsequent
// calls don't have to redo the symbol resolution. The binder will then jump to
// the resolved symbol.
//
// With weak symbols, the situation is slightly different. Since there is no
// "weak lazy" lookup, function calls to weak symbols are always non-lazily
// bound. We emit both regular non-lazy bindings as well as weak bindings, in
// order that the weak bindings may overwrite the non-lazy bindings if an
// appropriate symbol is found at runtime. However, the bound addresses will
// still be written (non-lazily) into the LazyPointerSection.
class StubsSection : public SyntheticSection {
public:
@ -247,13 +256,13 @@ public:
uint64_t getSize() const override;
bool isNeeded() const override { return !entries.empty(); }
void writeTo(uint8_t *buf) const override;
const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
void addEntry(DylibSymbol *sym);
const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
// Returns whether the symbol was added. Note that every stubs entry will
// have a corresponding entry in the LazyPointerSection.
bool addEntry(Symbol *);
private:
llvm::SetVector<DylibSymbol *> entries;
llvm::SetVector<Symbol *> entries;
};
class StubHelperSection : public SyntheticSection {
@ -278,6 +287,8 @@ public:
uint64_t getSize() const override { return WordSize; }
};
// Note that this section may also be targeted by non-lazy bindings. In
// particular, this happens when branch relocations target weak symbols.
class LazyPointerSection : public SyntheticSection {
public:
LazyPointerSection();
@ -291,15 +302,21 @@ public:
LazyBindingSection();
void finalizeContents();
uint64_t getRawSize() const override { return contents.size(); }
uint32_t encode(const DylibSymbol &);
// Like other sections in __LINKEDIT, the lazy binding section is special: its
// offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
// section headers.
bool isHidden() const override { return true; }
bool isNeeded() const override;
bool isNeeded() const override { return !entries.empty(); }
void writeTo(uint8_t *buf) const override;
// Note that every entry here will by referenced by a corresponding entry in
// the StubHelperSection.
void addEntry(DylibSymbol *dysym);
const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
private:
uint32_t encode(const DylibSymbol &);
llvm::SetVector<DylibSymbol *> entries;
SmallVector<char, 128> contents;
llvm::raw_svector_ostream os{contents};
};
@ -368,6 +385,7 @@ struct InStruct {
MachHeaderSection *header = nullptr;
BindingSection *binding = nullptr;
WeakBindingSection *weakBinding = nullptr;
LazyBindingSection *lazyBinding = nullptr;
GotSection *got = nullptr;
TlvPointerSection *tlvPointers = nullptr;
LazyPointerSection *lazyPointers = nullptr;

View File

@ -44,7 +44,7 @@ public:
// Write code for lazy binding. See the comments on StubsSection for more
// details.
virtual void writeStub(uint8_t *buf, const DylibSymbol &) const = 0;
virtual void writeStub(uint8_t *buf, const Symbol &) const = 0;
virtual void writeStubHelperHeader(uint8_t *buf) const = 0;
virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
uint64_t entryAddr) const = 0;

View File

@ -55,7 +55,6 @@ public:
uint64_t addr = 0;
uint64_t fileOff = 0;
MachHeaderSection *header = nullptr;
LazyBindingSection *lazyBindingSection = nullptr;
ExportSection *exportSection = nullptr;
StringTableSection *stringTableSection = nullptr;
SymtabSection *symtabSection = nullptr;
@ -327,8 +326,8 @@ void Writer::scanRelocations() {
}
void Writer::createLoadCommands() {
in.header->addLoadCommand(make<LCDyldInfo>(
in.binding, in.weakBinding, lazyBindingSection, exportSection));
in.header->addLoadCommand(make<LCDyldInfo>(in.binding, in.weakBinding,
in.lazyBinding, exportSection));
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
in.header->addLoadCommand(make<LCDysymtab>());
for (StringRef path : config->runtimePaths)
@ -473,7 +472,6 @@ static void sortSegmentsAndSections() {
void Writer::createOutputSections() {
// First, create hidden sections
lazyBindingSection = make<LazyBindingSection>();
stringTableSection = make<StringTableSection>();
symtabSection = make<SymtabSection>(*stringTableSection);
exportSection = make<ExportSection>();
@ -585,7 +583,7 @@ void Writer::run() {
// Fill __LINKEDIT contents.
in.binding->finalizeContents();
in.weakBinding->finalizeContents();
lazyBindingSection->finalizeContents();
in.lazyBinding->finalizeContents();
exportSection->finalizeContents();
symtabSection->finalizeContents();
@ -609,6 +607,7 @@ void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
in.binding = make<BindingSection>();
in.weakBinding = make<WeakBindingSection>();
in.lazyBinding = make<LazyBindingSection>();
in.got = make<GotSection>();
in.tlvPointers = make<TlvPointerSection>();
in.lazyPointers = make<LazyPointerSection>();

View File

@ -4,13 +4,18 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o
# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk -dylib %t/libfoo.o -o %t/libfoo.dylib
# RUN: lld -flavor darwinnew -syslibroot %S/Inputs/MacOSX.sdk %t/test.o -L%t -lfoo -o %t/test -lSystem
# RUN: llvm-objdump -d --no-show-raw-insn --bind --weak-bind --full-contents %t/test | \
# RUN: llvm-objdump -d --no-show-raw-insn --bind --lazy-bind --weak-bind --full-contents %t/test | \
# RUN: FileCheck %s
# CHECK: Contents of section __la_symbol_ptr:
## Check that this section contains a nonzero pointer. It should point to
## _weak_external_fn, but we don't have a good way of testing the exact value as
## the bytes here are in little-endian order.
# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}}
# CHECK: Contents of section __got:
## Check that this section contains a nonzero pointer. It should point to
## _weak_external_for_gotpcrel, but we don't have a good way of testing the exact
## value as the bytes here are in little-endian order.
## _weak_external_for_gotpcrel.
# CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}}
# CHECK: <_main>:
@ -19,15 +24,23 @@
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_TLV_ADDR:]]
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_TLV_ADDR:]]
# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_INT_TLV_ADDR:]]
# CHECK-NEXT: callq 0x{{[0-9a-f]*}}
# CHECK-NEXT: callq 0x{{[0-9a-f]*}}
# CHECK-NEXT: callq 0x{{[0-9a-f]*}}
# CHECK-LABEL: Bind table:
# CHECK-DAG: __DATA __data 0x[[#%x,WEAK_DY:]] pointer 0 libfoo _weak_dysym
# CHECK-DAG: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap
# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 libfoo _weak_dysym_tlv
# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 libfoo _weak_dysym_for_gotpcrel
# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,WEAK_DY_FN:]] pointer 0 libfoo _weak_dysym_fn
## Check that we don't have any other bindings
# CHECK-NOT: pointer
# CHECK-LABEL: Lazy bind table:
## Verify that we have no lazy bindings
# CHECK-NOT: pointer
# CHECK-LABEL: Weak bind table:
# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_DY_GOT_ADDR]] pointer 0 _weak_dysym_for_gotpcrel
# CHECK-DAG: __DATA_CONST __got 0x[[#WEAK_EXT_GOT_ADDR]] pointer 0 _weak_external_for_gotpcrel
@ -35,12 +48,15 @@
# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_TLV_ADDR]] pointer 0 _weak_tlv
# CHECK-DAG: __DATA __thread_ptrs 0x[[#WEAK_DY_TLV_ADDR]] pointer 0 _weak_dysym_tlv
# CHECK-DAG: __DATA __data 0x{{[0-9a-f]*}} pointer 2 _weak_external
# CHECK-DAG: __DATA __la_symbol_ptr 0x[[#WEAK_DY_FN]] pointer 0 _weak_dysym_fn
# CHECK-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} pointer 0 _weak_external_fn
## Check that we don't have any other bindings
# CHECK-NOT: pointer
## Weak internal symbols don't get bindings
# RUN: llvm-objdump --macho --bind --weak-bind %t/test | FileCheck %s --check-prefix=WEAK-INTERNAL
# RUN: llvm-objdump --macho --bind --lazy-bind --weak-bind %t/test | FileCheck %s --check-prefix=WEAK-INTERNAL
# WEAK-INTERNAL-NOT: _weak_internal
# WEAK-INTERNAL-NOT: _weak_internal_fn
# WEAK-INTERNAL-NOT: _weak_internal_tlv
#--- libfoo.s
@ -55,6 +71,11 @@ _weak_dysym:
_weak_dysym_for_gotpcrel:
.quad 0x1234
.globl _weak_dysym_fn
.weak_definition _weak_dysym_fn
_weak_dysym_fn:
ret
.section __DATA,__thread_vars,thread_local_variables
.globl _weak_dysym_tlv
@ -64,8 +85,8 @@ _weak_dysym_tlv:
#--- test.s
.globl _main, _weak_external, _weak_external_for_gotpcrel
.weak_definition _weak_external, _weak_external_for_gotpcrel, _weak_internal
.globl _main, _weak_external, _weak_external_for_gotpcrel, _weak_external_fn
.weak_definition _weak_external, _weak_external_for_gotpcrel, _weak_external_fn, _weak_internal, _weak_internal_fn
_main:
mov _weak_dysym_for_gotpcrel@GOTPCREL(%rip), %rax
@ -73,6 +94,9 @@ _main:
mov _weak_tlv@TLVP(%rip), %rax
mov _weak_dysym_tlv@TLVP(%rip), %rax
mov _weak_internal_tlv@TLVP(%rip), %rax
callq _weak_dysym_fn
callq _weak_external_fn
callq _weak_internal_fn
mov $0, %rax
ret
@ -82,9 +106,15 @@ _weak_external:
_weak_external_for_gotpcrel:
.quad 0x1234
_weak_external_fn:
ret
_weak_internal:
.quad 0x1234
_weak_internal_fn:
ret
.data
.quad _weak_dysym
.quad _weak_external + 2

View File

@ -23,12 +23,11 @@
# RUN: @executable_path/libweak2.dylib %t/weak2.o -o %t/libweak2.dylib
# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/dylib12 -Z -L%t -lweak1 -lweak2 %t/test.o
# RUN: llvm-objdump --macho --lazy-bind %t/dylib12 | FileCheck %s --check-prefix=DYLIB1
# RUN: llvm-objdump --macho --bind %t/dylib12 | FileCheck %s --check-prefix=DYLIB1
# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/dylib21 -Z -L%t -lweak2 -lweak1 %t/test.o
# RUN: llvm-objdump --macho --lazy-bind %t/dylib21 | FileCheck %s --check-prefix=DYLIB2
## TODO: these should really be in the weak binding section, not the lazy binding section
# DYLIB1: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} libweak1 _foo
# DYLIB2: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} libweak2 _foo
# RUN: llvm-objdump --macho --bind %t/dylib21 | FileCheck %s --check-prefix=DYLIB2
# DYLIB1: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} pointer 0 libweak1 _foo
# DYLIB2: __DATA __la_symbol_ptr 0x{{[0-9a-f]*}} pointer 0 libweak2 _foo
.globl _main
_main: