forked from OSchip/llvm-project
[lld-macho] Support binding dysyms to any section
Previously, we only supported binding dysyms to the GOT. This diff adds support for binding them to any arbitrary section. C++ programs appear to use this, I believe for vtables and type_info. This diff also makes our bind opcode encoding a bit smarter -- we now encode just the differences between bindings, which will make things more compact. I was initially concerned about the performance overhead of iterating over these relocations, but it turns out that the number of such relocations is small. A quick analysis of my llvm-project build directory showed that < 1.3% out of ~7M relocations are RELOC_UNSIGNED bindings to symbols (including both dynamic and static symbols). Reviewed By: #lld-macho, smeenai Differential Revision: https://reviews.llvm.org/D83103
This commit is contained in:
parent
7ec6927bad
commit
53eb7fda51
|
@ -34,7 +34,8 @@ struct X86_64 : TargetInfo {
|
||||||
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
|
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
|
||||||
uint64_t entryAddr) const override;
|
uint64_t entryAddr) const override;
|
||||||
|
|
||||||
void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
|
void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
|
||||||
|
const Reloc &) override;
|
||||||
uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
|
uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -208,8 +209,9 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
|
||||||
in.stubHelper->addr);
|
in.stubHelper->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
|
||||||
switch (type) {
|
const InputSection *isec, const Reloc &r) {
|
||||||
|
switch (r.type) {
|
||||||
case X86_64_RELOC_GOT_LOAD:
|
case X86_64_RELOC_GOT_LOAD:
|
||||||
// TODO: implement mov -> lea relaxation for non-dynamic symbols
|
// TODO: implement mov -> lea relaxation for non-dynamic symbols
|
||||||
case X86_64_RELOC_GOT:
|
case X86_64_RELOC_GOT:
|
||||||
|
@ -220,7 +222,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
||||||
in.stubs->addEntry(*dysym);
|
in.stubs->addEntry(*dysym);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86_64_RELOC_UNSIGNED:
|
case X86_64_RELOC_UNSIGNED: {
|
||||||
|
if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
|
||||||
|
if (r.length != 3) {
|
||||||
|
error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
|
||||||
|
dysym->getName() + " must have r_length = 3");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
in.binding->addEntry(dysym, isec, r.offset, r.addend);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case X86_64_RELOC_SIGNED:
|
case X86_64_RELOC_SIGNED:
|
||||||
case X86_64_RELOC_SIGNED_1:
|
case X86_64_RELOC_SIGNED_1:
|
||||||
case X86_64_RELOC_SIGNED_2:
|
case X86_64_RELOC_SIGNED_2:
|
||||||
|
@ -228,7 +240,7 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
||||||
break;
|
break;
|
||||||
case X86_64_RELOC_SUBTRACTOR:
|
case X86_64_RELOC_SUBTRACTOR:
|
||||||
case X86_64_RELOC_TLV:
|
case X86_64_RELOC_TLV:
|
||||||
fatal("TODO: handle relocation type " + std::to_string(type));
|
fatal("TODO: handle relocation type " + std::to_string(r.type));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("unexpected relocation type");
|
llvm_unreachable("unexpected relocation type");
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "ExportTrie.h"
|
#include "ExportTrie.h"
|
||||||
#include "InputFiles.h"
|
#include "InputFiles.h"
|
||||||
#include "MachOStructs.h"
|
#include "MachOStructs.h"
|
||||||
|
#include "MergedOutputSection.h"
|
||||||
#include "OutputSegment.h"
|
#include "OutputSegment.h"
|
||||||
#include "SymbolTable.h"
|
#include "SymbolTable.h"
|
||||||
#include "Symbols.h"
|
#include "Symbols.h"
|
||||||
|
@ -95,7 +96,68 @@ void GotSection::writeTo(uint8_t *buf) const {
|
||||||
BindingSection::BindingSection()
|
BindingSection::BindingSection()
|
||||||
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
|
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
|
||||||
|
|
||||||
bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
|
bool BindingSection::isNeeded() const {
|
||||||
|
return bindings.size() != 0 || in.got->isNeeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct Binding {
|
||||||
|
OutputSegment *segment = nullptr;
|
||||||
|
uint64_t offset = 0;
|
||||||
|
int64_t addend = 0;
|
||||||
|
uint8_t ordinal = 0;
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Encode a sequence of opcodes that tell dyld to write the address of dysym +
|
||||||
|
// addend at osec->addr + outSecOff.
|
||||||
|
//
|
||||||
|
// The bind opcode "interpreter" remembers the values of each binding field, so
|
||||||
|
// we only need to encode the differences between bindings. Hence the use of
|
||||||
|
// lastBinding.
|
||||||
|
static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
|
||||||
|
uint64_t outSecOff, int64_t addend,
|
||||||
|
Binding &lastBinding, raw_svector_ostream &os) {
|
||||||
|
using namespace llvm::MachO;
|
||||||
|
OutputSegment *seg = osec->parent;
|
||||||
|
uint64_t offset = osec->getSegmentOffset() + outSecOff;
|
||||||
|
if (lastBinding.segment != seg) {
|
||||||
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
||||||
|
seg->index);
|
||||||
|
encodeULEB128(offset, os);
|
||||||
|
lastBinding.segment = seg;
|
||||||
|
lastBinding.offset = offset;
|
||||||
|
} else if (lastBinding.offset != offset) {
|
||||||
|
assert(lastBinding.offset <= offset);
|
||||||
|
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
|
||||||
|
encodeULEB128(offset - lastBinding.offset, os);
|
||||||
|
lastBinding.offset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastBinding.ordinal != dysym.file->ordinal) {
|
||||||
|
if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
|
||||||
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
||||||
|
dysym.file->ordinal);
|
||||||
|
} else {
|
||||||
|
error("TODO: Support larger dylib symbol ordinals");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lastBinding.ordinal = dysym.file->ordinal;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastBinding.addend != addend) {
|
||||||
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
|
||||||
|
encodeSLEB128(addend, os);
|
||||||
|
lastBinding.addend = addend;
|
||||||
|
}
|
||||||
|
|
||||||
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
||||||
|
<< dysym.getName() << '\0'
|
||||||
|
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
|
||||||
|
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
|
||||||
|
// DO_BIND causes dyld to both perform the binding and increment the offset
|
||||||
|
lastBinding.offset += WordSize;
|
||||||
|
}
|
||||||
|
|
||||||
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
|
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
|
||||||
// interprets to update a record with the following fields:
|
// interprets to update a record with the following fields:
|
||||||
|
@ -111,44 +173,40 @@ bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
|
||||||
// entry. It does *not* clear the record state after doing the bind, so
|
// entry. It does *not* clear the record state after doing the bind, so
|
||||||
// subsequent opcodes only need to encode the differences between bindings.
|
// subsequent opcodes only need to encode the differences between bindings.
|
||||||
void BindingSection::finalizeContents() {
|
void BindingSection::finalizeContents() {
|
||||||
if (!isNeeded())
|
|
||||||
return;
|
|
||||||
|
|
||||||
raw_svector_ostream os{contents};
|
raw_svector_ostream os{contents};
|
||||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
Binding lastBinding;
|
||||||
in.got->parent->index);
|
bool didEncode = false;
|
||||||
encodeULEB128(in.got->getSegmentOffset(), os);
|
size_t gotIdx = 0;
|
||||||
uint32_t entries_to_skip = 0;
|
|
||||||
for (const Symbol *sym : in.got->getEntries()) {
|
for (const Symbol *sym : in.got->getEntries()) {
|
||||||
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
||||||
if (entries_to_skip != 0) {
|
didEncode = true;
|
||||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_ADD_ADDR_ULEB);
|
encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
|
||||||
encodeULEB128(WordSize * entries_to_skip, os);
|
|
||||||
entries_to_skip = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Implement compact encoding -- we only need to encode the
|
|
||||||
// differences between consecutive symbol entries.
|
|
||||||
if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
|
|
||||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
|
||||||
dysym->file->ordinal);
|
|
||||||
} else {
|
|
||||||
error("TODO: Support larger dylib symbol ordinals");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
os << static_cast<uint8_t>(
|
|
||||||
MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
|
||||||
<< dysym->getName() << '\0'
|
|
||||||
<< static_cast<uint8_t>(MachO::BIND_OPCODE_SET_TYPE_IMM |
|
|
||||||
MachO::BIND_TYPE_POINTER)
|
|
||||||
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND);
|
|
||||||
} else {
|
|
||||||
// We have a defined symbol with a pre-populated address; skip over it.
|
|
||||||
++entries_to_skip;
|
|
||||||
}
|
}
|
||||||
|
++gotIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
|
// Sorting the relocations by segment and address allows us to encode them
|
||||||
|
// more compactly.
|
||||||
|
llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
|
||||||
|
OutputSegment *segA = a.isec->parent->parent;
|
||||||
|
OutputSegment *segB = b.isec->parent->parent;
|
||||||
|
if (segA != segB)
|
||||||
|
return segA->fileOff < segB->fileOff;
|
||||||
|
OutputSection *osecA = a.isec->parent;
|
||||||
|
OutputSection *osecB = b.isec->parent;
|
||||||
|
if (osecA != osecB)
|
||||||
|
return osecA->addr < osecB->addr;
|
||||||
|
if (a.isec != b.isec)
|
||||||
|
return a.isec->outSecOff < b.isec->outSecOff;
|
||||||
|
return a.offset < b.offset;
|
||||||
|
});
|
||||||
|
for (const BindingEntry &b : bindings) {
|
||||||
|
didEncode = true;
|
||||||
|
encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
|
||||||
|
b.addend, lastBinding, os);
|
||||||
|
}
|
||||||
|
if (didEncode)
|
||||||
|
os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindingSection::writeTo(uint8_t *buf) const {
|
void BindingSection::writeTo(uint8_t *buf) const {
|
||||||
|
|
|
@ -94,6 +94,16 @@ private:
|
||||||
llvm::SetVector<const Symbol *> entries;
|
llvm::SetVector<const Symbol *> entries;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct BindingEntry {
|
||||||
|
const DylibSymbol *dysym;
|
||||||
|
const InputSection *isec;
|
||||||
|
uint64_t offset;
|
||||||
|
int64_t addend;
|
||||||
|
BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
|
||||||
|
uint64_t offset, int64_t addend)
|
||||||
|
: dysym(dysym), isec(isec), offset(offset), addend(addend) {}
|
||||||
|
};
|
||||||
|
|
||||||
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
|
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
|
||||||
class BindingSection : public SyntheticSection {
|
class BindingSection : public SyntheticSection {
|
||||||
public:
|
public:
|
||||||
|
@ -107,6 +117,13 @@ public:
|
||||||
bool isNeeded() const override;
|
bool isNeeded() const override;
|
||||||
void writeTo(uint8_t *buf) const override;
|
void writeTo(uint8_t *buf) const override;
|
||||||
|
|
||||||
|
void addEntry(const DylibSymbol *dysym, const InputSection *isec,
|
||||||
|
uint64_t offset, int64_t addend) {
|
||||||
|
bindings.emplace_back(dysym, isec, offset, addend);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<BindingEntry> bindings;
|
||||||
SmallVector<char, 128> contents;
|
SmallVector<char, 128> contents;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -256,6 +273,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
struct InStruct {
|
struct InStruct {
|
||||||
|
BindingSection *binding = nullptr;
|
||||||
GotSection *got = nullptr;
|
GotSection *got = nullptr;
|
||||||
LazyPointerSection *lazyPointers = nullptr;
|
LazyPointerSection *lazyPointers = nullptr;
|
||||||
StubsSection *stubs = nullptr;
|
StubsSection *stubs = nullptr;
|
||||||
|
|
|
@ -53,7 +53,8 @@ public:
|
||||||
// depending on the relocation type. prepareSymbolRelocation() will set up the
|
// depending on the relocation type. prepareSymbolRelocation() will set up the
|
||||||
// GOT/stubs entries, and getSymbolVA() will return the addresses of those
|
// GOT/stubs entries, and getSymbolVA() will return the addresses of those
|
||||||
// entries.
|
// entries.
|
||||||
virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
|
virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
|
||||||
|
const Reloc &) = 0;
|
||||||
virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
|
virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
|
||||||
|
|
||||||
uint32_t cpuType;
|
uint32_t cpuType;
|
||||||
|
|
|
@ -54,7 +54,6 @@ public:
|
||||||
uint64_t addr = 0;
|
uint64_t addr = 0;
|
||||||
uint64_t fileOff = 0;
|
uint64_t fileOff = 0;
|
||||||
MachHeaderSection *headerSection = nullptr;
|
MachHeaderSection *headerSection = nullptr;
|
||||||
BindingSection *bindingSection = nullptr;
|
|
||||||
LazyBindingSection *lazyBindingSection = nullptr;
|
LazyBindingSection *lazyBindingSection = nullptr;
|
||||||
ExportSection *exportSection = nullptr;
|
ExportSection *exportSection = nullptr;
|
||||||
StringTableSection *stringTableSection = nullptr;
|
StringTableSection *stringTableSection = nullptr;
|
||||||
|
@ -254,7 +253,7 @@ void Writer::scanRelocations() {
|
||||||
error("undefined symbol " + s->getName() + ", referenced from " +
|
error("undefined symbol " + s->getName() + ", referenced from " +
|
||||||
sys::path::filename(isec->file->getName()));
|
sys::path::filename(isec->file->getName()));
|
||||||
else
|
else
|
||||||
target->prepareSymbolRelocation(*s, r.type);
|
target->prepareSymbolRelocation(*s, isec, r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -262,7 +261,7 @@ void Writer::scanRelocations() {
|
||||||
|
|
||||||
void Writer::createLoadCommands() {
|
void Writer::createLoadCommands() {
|
||||||
headerSection->addLoadCommand(
|
headerSection->addLoadCommand(
|
||||||
make<LCDyldInfo>(bindingSection, lazyBindingSection, exportSection));
|
make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
|
||||||
headerSection->addLoadCommand(
|
headerSection->addLoadCommand(
|
||||||
make<LCSymtab>(symtabSection, stringTableSection));
|
make<LCSymtab>(symtabSection, stringTableSection));
|
||||||
headerSection->addLoadCommand(make<LCDysymtab>());
|
headerSection->addLoadCommand(make<LCDysymtab>());
|
||||||
|
@ -404,7 +403,6 @@ static void sortSegmentsAndSections() {
|
||||||
void Writer::createOutputSections() {
|
void Writer::createOutputSections() {
|
||||||
// First, create hidden sections
|
// First, create hidden sections
|
||||||
headerSection = make<MachHeaderSection>();
|
headerSection = make<MachHeaderSection>();
|
||||||
bindingSection = make<BindingSection>();
|
|
||||||
lazyBindingSection = make<LazyBindingSection>();
|
lazyBindingSection = make<LazyBindingSection>();
|
||||||
stringTableSection = make<StringTableSection>();
|
stringTableSection = make<StringTableSection>();
|
||||||
symtabSection = make<SymtabSection>(*stringTableSection);
|
symtabSection = make<SymtabSection>(*stringTableSection);
|
||||||
|
@ -513,7 +511,7 @@ void Writer::run() {
|
||||||
assignAddresses(seg);
|
assignAddresses(seg);
|
||||||
|
|
||||||
// Fill __LINKEDIT contents.
|
// Fill __LINKEDIT contents.
|
||||||
bindingSection->finalizeContents();
|
in.binding->finalizeContents();
|
||||||
lazyBindingSection->finalizeContents();
|
lazyBindingSection->finalizeContents();
|
||||||
exportSection->finalizeContents();
|
exportSection->finalizeContents();
|
||||||
symtabSection->finalizeContents();
|
symtabSection->finalizeContents();
|
||||||
|
@ -535,6 +533,7 @@ void Writer::run() {
|
||||||
void macho::writeResult() { Writer().run(); }
|
void macho::writeResult() { Writer().run(); }
|
||||||
|
|
||||||
void macho::createSyntheticSections() {
|
void macho::createSyntheticSections() {
|
||||||
|
in.binding = make<BindingSection>();
|
||||||
in.got = make<GotSection>();
|
in.got = make<GotSection>();
|
||||||
in.lazyPointers = make<LazyPointerSection>();
|
in.lazyPointers = make<LazyPointerSection>();
|
||||||
in.stubs = make<StubsSection>();
|
in.stubs = make<StubsSection>();
|
||||||
|
|
|
@ -31,9 +31,12 @@
|
||||||
# CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: popq %rsi
|
# CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: popq %rsi
|
||||||
|
|
||||||
# CHECK-LABEL: Bind table:
|
# CHECK-LABEL: Bind table:
|
||||||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
|
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
|
||||||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
|
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
|
||||||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
|
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
|
||||||
|
# CHECK-DAG: __DATA __data 0x[[#%x, DATA_ADDR:]] pointer 0 libhello _hello_world
|
||||||
|
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 8]] pointer 8 libhello _hello_its_me
|
||||||
|
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 16]] pointer -15 libgoodbye _goodbye_world
|
||||||
|
|
||||||
.section __TEXT,__text
|
.section __TEXT,__text
|
||||||
.globl _main
|
.globl _main
|
||||||
|
@ -59,3 +62,8 @@ _main:
|
||||||
syscall
|
syscall
|
||||||
mov $0, %rax
|
mov $0, %rax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.data
|
||||||
|
.quad _hello_world
|
||||||
|
.quad _hello_its_me + 0x8
|
||||||
|
.quad _goodbye_world - 0xf
|
||||||
|
|
Loading…
Reference in New Issue