forked from OSchip/llvm-project
[lld-macho] Support binding dysyms to any section
Previously, we only supported binding dysyms to the GOT. This diff adds support for binding them to any arbitrary section. C++ programs appear to use this, I believe for vtables and type_info. This diff also makes our bind opcode encoding a bit smarter -- we now encode just the differences between bindings, which will make things more compact. I was initially concerned about the performance overhead of iterating over these relocations, but it turns out that the number of such relocations is small. A quick analysis of my llvm-project build directory showed that < 1.3% out of ~7M relocations are RELOC_UNSIGNED bindings to symbols (including both dynamic and static symbols). Reviewed By: #lld-macho, smeenai Differential Revision: https://reviews.llvm.org/D83103
This commit is contained in:
parent
7ec6927bad
commit
53eb7fda51
|
@ -34,7 +34,8 @@ struct X86_64 : TargetInfo {
|
|||
void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
|
||||
uint64_t entryAddr) const override;
|
||||
|
||||
void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
|
||||
void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
|
||||
const Reloc &) override;
|
||||
uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
|
||||
};
|
||||
|
||||
|
@ -208,8 +209,9 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
|
|||
in.stubHelper->addr);
|
||||
}
|
||||
|
||||
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
||||
switch (type) {
|
||||
void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
|
||||
const InputSection *isec, const Reloc &r) {
|
||||
switch (r.type) {
|
||||
case X86_64_RELOC_GOT_LOAD:
|
||||
// TODO: implement mov -> lea relaxation for non-dynamic symbols
|
||||
case X86_64_RELOC_GOT:
|
||||
|
@ -220,7 +222,17 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
|||
in.stubs->addEntry(*dysym);
|
||||
break;
|
||||
}
|
||||
case X86_64_RELOC_UNSIGNED:
|
||||
case X86_64_RELOC_UNSIGNED: {
|
||||
if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
|
||||
if (r.length != 3) {
|
||||
error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
|
||||
dysym->getName() + " must have r_length = 3");
|
||||
return;
|
||||
}
|
||||
in.binding->addEntry(dysym, isec, r.offset, r.addend);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86_64_RELOC_SIGNED:
|
||||
case X86_64_RELOC_SIGNED_1:
|
||||
case X86_64_RELOC_SIGNED_2:
|
||||
|
@ -228,7 +240,7 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
|
|||
break;
|
||||
case X86_64_RELOC_SUBTRACTOR:
|
||||
case X86_64_RELOC_TLV:
|
||||
fatal("TODO: handle relocation type " + std::to_string(type));
|
||||
fatal("TODO: handle relocation type " + std::to_string(r.type));
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected relocation type");
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "ExportTrie.h"
|
||||
#include "InputFiles.h"
|
||||
#include "MachOStructs.h"
|
||||
#include "MergedOutputSection.h"
|
||||
#include "OutputSegment.h"
|
||||
#include "SymbolTable.h"
|
||||
#include "Symbols.h"
|
||||
|
@ -95,7 +96,68 @@ void GotSection::writeTo(uint8_t *buf) const {
|
|||
BindingSection::BindingSection()
|
||||
: SyntheticSection(segment_names::linkEdit, section_names::binding) {}
|
||||
|
||||
bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
|
||||
bool BindingSection::isNeeded() const {
|
||||
return bindings.size() != 0 || in.got->isNeeded();
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct Binding {
|
||||
OutputSegment *segment = nullptr;
|
||||
uint64_t offset = 0;
|
||||
int64_t addend = 0;
|
||||
uint8_t ordinal = 0;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// Encode a sequence of opcodes that tell dyld to write the address of dysym +
|
||||
// addend at osec->addr + outSecOff.
|
||||
//
|
||||
// The bind opcode "interpreter" remembers the values of each binding field, so
|
||||
// we only need to encode the differences between bindings. Hence the use of
|
||||
// lastBinding.
|
||||
static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
|
||||
uint64_t outSecOff, int64_t addend,
|
||||
Binding &lastBinding, raw_svector_ostream &os) {
|
||||
using namespace llvm::MachO;
|
||||
OutputSegment *seg = osec->parent;
|
||||
uint64_t offset = osec->getSegmentOffset() + outSecOff;
|
||||
if (lastBinding.segment != seg) {
|
||||
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
||||
seg->index);
|
||||
encodeULEB128(offset, os);
|
||||
lastBinding.segment = seg;
|
||||
lastBinding.offset = offset;
|
||||
} else if (lastBinding.offset != offset) {
|
||||
assert(lastBinding.offset <= offset);
|
||||
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
|
||||
encodeULEB128(offset - lastBinding.offset, os);
|
||||
lastBinding.offset = offset;
|
||||
}
|
||||
|
||||
if (lastBinding.ordinal != dysym.file->ordinal) {
|
||||
if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
|
||||
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
||||
dysym.file->ordinal);
|
||||
} else {
|
||||
error("TODO: Support larger dylib symbol ordinals");
|
||||
return;
|
||||
}
|
||||
lastBinding.ordinal = dysym.file->ordinal;
|
||||
}
|
||||
|
||||
if (lastBinding.addend != addend) {
|
||||
os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
|
||||
encodeSLEB128(addend, os);
|
||||
lastBinding.addend = addend;
|
||||
}
|
||||
|
||||
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
||||
<< dysym.getName() << '\0'
|
||||
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
|
||||
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
|
||||
// DO_BIND causes dyld to both perform the binding and increment the offset
|
||||
lastBinding.offset += WordSize;
|
||||
}
|
||||
|
||||
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
|
||||
// interprets to update a record with the following fields:
|
||||
|
@ -111,43 +173,39 @@ bool BindingSection::isNeeded() const { return in.got->isNeeded(); }
|
|||
// entry. It does *not* clear the record state after doing the bind, so
|
||||
// subsequent opcodes only need to encode the differences between bindings.
|
||||
void BindingSection::finalizeContents() {
|
||||
if (!isNeeded())
|
||||
return;
|
||||
|
||||
raw_svector_ostream os{contents};
|
||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
||||
in.got->parent->index);
|
||||
encodeULEB128(in.got->getSegmentOffset(), os);
|
||||
uint32_t entries_to_skip = 0;
|
||||
Binding lastBinding;
|
||||
bool didEncode = false;
|
||||
size_t gotIdx = 0;
|
||||
for (const Symbol *sym : in.got->getEntries()) {
|
||||
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
||||
if (entries_to_skip != 0) {
|
||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_ADD_ADDR_ULEB);
|
||||
encodeULEB128(WordSize * entries_to_skip, os);
|
||||
entries_to_skip = 0;
|
||||
didEncode = true;
|
||||
encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
|
||||
}
|
||||
++gotIdx;
|
||||
}
|
||||
|
||||
// TODO: Implement compact encoding -- we only need to encode the
|
||||
// differences between consecutive symbol entries.
|
||||
if (dysym->file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
|
||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
||||
dysym->file->ordinal);
|
||||
} else {
|
||||
error("TODO: Support larger dylib symbol ordinals");
|
||||
continue;
|
||||
// Sorting the relocations by segment and address allows us to encode them
|
||||
// more compactly.
|
||||
llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
|
||||
OutputSegment *segA = a.isec->parent->parent;
|
||||
OutputSegment *segB = b.isec->parent->parent;
|
||||
if (segA != segB)
|
||||
return segA->fileOff < segB->fileOff;
|
||||
OutputSection *osecA = a.isec->parent;
|
||||
OutputSection *osecB = b.isec->parent;
|
||||
if (osecA != osecB)
|
||||
return osecA->addr < osecB->addr;
|
||||
if (a.isec != b.isec)
|
||||
return a.isec->outSecOff < b.isec->outSecOff;
|
||||
return a.offset < b.offset;
|
||||
});
|
||||
for (const BindingEntry &b : bindings) {
|
||||
didEncode = true;
|
||||
encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
|
||||
b.addend, lastBinding, os);
|
||||
}
|
||||
os << static_cast<uint8_t>(
|
||||
MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
||||
<< dysym->getName() << '\0'
|
||||
<< static_cast<uint8_t>(MachO::BIND_OPCODE_SET_TYPE_IMM |
|
||||
MachO::BIND_TYPE_POINTER)
|
||||
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND);
|
||||
} else {
|
||||
// We have a defined symbol with a pre-populated address; skip over it.
|
||||
++entries_to_skip;
|
||||
}
|
||||
}
|
||||
|
||||
if (didEncode)
|
||||
os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
|
||||
}
|
||||
|
||||
|
|
|
@ -94,6 +94,16 @@ private:
|
|||
llvm::SetVector<const Symbol *> entries;
|
||||
};
|
||||
|
||||
struct BindingEntry {
|
||||
const DylibSymbol *dysym;
|
||||
const InputSection *isec;
|
||||
uint64_t offset;
|
||||
int64_t addend;
|
||||
BindingEntry(const DylibSymbol *dysym, const InputSection *isec,
|
||||
uint64_t offset, int64_t addend)
|
||||
: dysym(dysym), isec(isec), offset(offset), addend(addend) {}
|
||||
};
|
||||
|
||||
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
|
||||
class BindingSection : public SyntheticSection {
|
||||
public:
|
||||
|
@ -107,6 +117,13 @@ public:
|
|||
bool isNeeded() const override;
|
||||
void writeTo(uint8_t *buf) const override;
|
||||
|
||||
void addEntry(const DylibSymbol *dysym, const InputSection *isec,
|
||||
uint64_t offset, int64_t addend) {
|
||||
bindings.emplace_back(dysym, isec, offset, addend);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<BindingEntry> bindings;
|
||||
SmallVector<char, 128> contents;
|
||||
};
|
||||
|
||||
|
@ -256,6 +273,7 @@ private:
|
|||
};
|
||||
|
||||
struct InStruct {
|
||||
BindingSection *binding = nullptr;
|
||||
GotSection *got = nullptr;
|
||||
LazyPointerSection *lazyPointers = nullptr;
|
||||
StubsSection *stubs = nullptr;
|
||||
|
|
|
@ -53,7 +53,8 @@ public:
|
|||
// depending on the relocation type. prepareSymbolRelocation() will set up the
|
||||
// GOT/stubs entries, and getSymbolVA() will return the addresses of those
|
||||
// entries.
|
||||
virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
|
||||
virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
|
||||
const Reloc &) = 0;
|
||||
virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
|
||||
|
||||
uint32_t cpuType;
|
||||
|
|
|
@ -54,7 +54,6 @@ public:
|
|||
uint64_t addr = 0;
|
||||
uint64_t fileOff = 0;
|
||||
MachHeaderSection *headerSection = nullptr;
|
||||
BindingSection *bindingSection = nullptr;
|
||||
LazyBindingSection *lazyBindingSection = nullptr;
|
||||
ExportSection *exportSection = nullptr;
|
||||
StringTableSection *stringTableSection = nullptr;
|
||||
|
@ -254,7 +253,7 @@ void Writer::scanRelocations() {
|
|||
error("undefined symbol " + s->getName() + ", referenced from " +
|
||||
sys::path::filename(isec->file->getName()));
|
||||
else
|
||||
target->prepareSymbolRelocation(*s, r.type);
|
||||
target->prepareSymbolRelocation(*s, isec, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -262,7 +261,7 @@ void Writer::scanRelocations() {
|
|||
|
||||
void Writer::createLoadCommands() {
|
||||
headerSection->addLoadCommand(
|
||||
make<LCDyldInfo>(bindingSection, lazyBindingSection, exportSection));
|
||||
make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
|
||||
headerSection->addLoadCommand(
|
||||
make<LCSymtab>(symtabSection, stringTableSection));
|
||||
headerSection->addLoadCommand(make<LCDysymtab>());
|
||||
|
@ -404,7 +403,6 @@ static void sortSegmentsAndSections() {
|
|||
void Writer::createOutputSections() {
|
||||
// First, create hidden sections
|
||||
headerSection = make<MachHeaderSection>();
|
||||
bindingSection = make<BindingSection>();
|
||||
lazyBindingSection = make<LazyBindingSection>();
|
||||
stringTableSection = make<StringTableSection>();
|
||||
symtabSection = make<SymtabSection>(*stringTableSection);
|
||||
|
@ -513,7 +511,7 @@ void Writer::run() {
|
|||
assignAddresses(seg);
|
||||
|
||||
// Fill __LINKEDIT contents.
|
||||
bindingSection->finalizeContents();
|
||||
in.binding->finalizeContents();
|
||||
lazyBindingSection->finalizeContents();
|
||||
exportSection->finalizeContents();
|
||||
symtabSection->finalizeContents();
|
||||
|
@ -535,6 +533,7 @@ void Writer::run() {
|
|||
void macho::writeResult() { Writer().run(); }
|
||||
|
||||
void macho::createSyntheticSections() {
|
||||
in.binding = make<BindingSection>();
|
||||
in.got = make<GotSection>();
|
||||
in.lazyPointers = make<LazyPointerSection>();
|
||||
in.stubs = make<StubsSection>();
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world
|
||||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me
|
||||
# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world
|
||||
# CHECK-DAG: __DATA __data 0x[[#%x, DATA_ADDR:]] pointer 0 libhello _hello_world
|
||||
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 8]] pointer 8 libhello _hello_its_me
|
||||
# CHECK-DAG: __DATA __data 0x{{0*}}[[#%x, DATA_ADDR + 16]] pointer -15 libgoodbye _goodbye_world
|
||||
|
||||
.section __TEXT,__text
|
||||
.globl _main
|
||||
|
@ -59,3 +62,8 @@ _main:
|
|||
syscall
|
||||
mov $0, %rax
|
||||
ret
|
||||
|
||||
.data
|
||||
.quad _hello_world
|
||||
.quad _hello_its_me + 0x8
|
||||
.quad _goodbye_world - 0xf
|
||||
|
|
Loading…
Reference in New Issue