2020-04-22 04:37:57 +08:00
|
|
|
//===- SyntheticSections.cpp ---------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SyntheticSections.h"
|
2020-04-29 07:58:22 +08:00
|
|
|
#include "Config.h"
|
2020-04-30 06:42:19 +08:00
|
|
|
#include "ExportTrie.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "InputFiles.h"
|
2020-05-22 06:26:35 +08:00
|
|
|
#include "MachOStructs.h"
|
2020-07-03 12:19:55 +08:00
|
|
|
#include "MergedOutputSection.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "OutputSegment.h"
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
#include "SymbolTable.h"
|
2020-04-22 04:37:57 +08:00
|
|
|
#include "Symbols.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "Writer.h"
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
[lld-macho] Ensure __bss sections we output have file offset of zero
Summary:
llvm-mc emits `__bss` sections with an offset of zero, but we weren't expecting
that in our input, so we were copying non-zero data from the start of the file and
putting it in `__bss`, with obviously undesirable runtime results. (It appears that
the kernel will copy those nonzero bytes as long as the offset is nonzero, regardless
of whether S_ZERO_FILL is set.)
I debated on whether to make a special ZeroFillSection -- separate from a
regular InputSection -- but it seemed like too much work for now. But I'm happy
to refactor if anyone feels strongly about having it as a separate class.
Depends on D80857.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80859
2020-06-14 11:00:36 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "llvm/Support/EndianStream.h"
|
|
|
|
#include "llvm/Support/LEB128.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::support;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
using namespace llvm::support::endian;
|
2020-06-25 03:22:13 +08:00
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-06-25 03:22:13 +08:00
|
|
|
InStruct macho::in;
|
|
|
|
std::vector<SyntheticSection *> macho::syntheticSections;
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
SyntheticSection::SyntheticSection(const char *segname, const char *name)
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
: OutputSection(SyntheticKind, name), segname(segname) {
|
|
|
|
syntheticSections.push_back(this);
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
|
|
|
|
// from the beginning of the file (i.e. the header).
|
|
|
|
MachHeaderSection::MachHeaderSection()
|
|
|
|
: SyntheticSection(segment_names::text, section_names::header) {}
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
|
|
|
|
loadCommands.push_back(lc);
|
|
|
|
sizeOfCmds += lc->getSize();
|
|
|
|
}
|
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t MachHeaderSection::getSize() const {
|
2020-07-31 05:28:45 +08:00
|
|
|
return sizeof(MachO::mach_header_64) + sizeOfCmds + config->headerPad;
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void MachHeaderSection::writeTo(uint8_t *buf) const {
|
2020-06-25 04:45:41 +08:00
|
|
|
auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf);
|
|
|
|
hdr->magic = MachO::MH_MAGIC_64;
|
|
|
|
hdr->cputype = MachO::CPU_TYPE_X86_64;
|
|
|
|
hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64;
|
2020-04-29 07:58:22 +08:00
|
|
|
hdr->filetype = config->outputType;
|
2020-04-28 03:50:59 +08:00
|
|
|
hdr->ncmds = loadCommands.size();
|
|
|
|
hdr->sizeofcmds = sizeOfCmds;
|
2020-06-25 04:45:41 +08:00
|
|
|
hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL;
|
|
|
|
if (config->outputType == MachO::MH_DYLIB && !config->hasReexports)
|
|
|
|
hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS;
|
2020-04-28 03:50:59 +08:00
|
|
|
|
|
|
|
uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
|
|
|
|
for (LoadCommand *lc : loadCommands) {
|
|
|
|
lc->writeTo(p);
|
|
|
|
p += lc->getSize();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
PageZeroSection::PageZeroSection()
|
|
|
|
: SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
GotSection::GotSection()
|
|
|
|
: SyntheticSection(segment_names::dataConst, section_names::got) {
|
2020-04-22 04:37:57 +08:00
|
|
|
align = 8;
|
2020-06-25 04:45:41 +08:00
|
|
|
flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
|
2020-04-22 04:37:57 +08:00
|
|
|
|
|
|
|
// TODO: section_64::reserved1 should be an index into the indirect symbol
|
|
|
|
// table, which we do not currently emit
|
|
|
|
}
|
|
|
|
|
2020-06-14 11:00:06 +08:00
|
|
|
void GotSection::addEntry(Symbol &sym) {
|
2020-04-22 04:37:57 +08:00
|
|
|
if (entries.insert(&sym)) {
|
|
|
|
sym.gotIndex = entries.size() - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 11:00:06 +08:00
|
|
|
void GotSection::writeTo(uint8_t *buf) const {
|
|
|
|
for (size_t i = 0, n = entries.size(); i < n; ++i)
|
|
|
|
if (auto *defined = dyn_cast<Defined>(entries[i]))
|
|
|
|
write64le(&buf[i * WordSize], defined->getVA());
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
BindingSection::BindingSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::binding) {}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-07-03 12:19:55 +08:00
|
|
|
bool BindingSection::isNeeded() const {
|
|
|
|
return bindings.size() != 0 || in.got->isNeeded();
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct Binding {
|
|
|
|
OutputSegment *segment = nullptr;
|
|
|
|
uint64_t offset = 0;
|
|
|
|
int64_t addend = 0;
|
|
|
|
uint8_t ordinal = 0;
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
// Encode a sequence of opcodes that tell dyld to write the address of dysym +
|
|
|
|
// addend at osec->addr + outSecOff.
|
|
|
|
//
|
|
|
|
// The bind opcode "interpreter" remembers the values of each binding field, so
|
|
|
|
// we only need to encode the differences between bindings. Hence the use of
|
|
|
|
// lastBinding.
|
|
|
|
static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
|
|
|
|
uint64_t outSecOff, int64_t addend,
|
|
|
|
Binding &lastBinding, raw_svector_ostream &os) {
|
|
|
|
using namespace llvm::MachO;
|
|
|
|
OutputSegment *seg = osec->parent;
|
|
|
|
uint64_t offset = osec->getSegmentOffset() + outSecOff;
|
|
|
|
if (lastBinding.segment != seg) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
|
|
|
seg->index);
|
|
|
|
encodeULEB128(offset, os);
|
|
|
|
lastBinding.segment = seg;
|
|
|
|
lastBinding.offset = offset;
|
|
|
|
} else if (lastBinding.offset != offset) {
|
|
|
|
assert(lastBinding.offset <= offset);
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
|
|
|
|
encodeULEB128(offset - lastBinding.offset, os);
|
|
|
|
lastBinding.offset = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lastBinding.ordinal != dysym.file->ordinal) {
|
|
|
|
if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
|
|
|
dysym.file->ordinal);
|
|
|
|
} else {
|
|
|
|
error("TODO: Support larger dylib symbol ordinals");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
lastBinding.ordinal = dysym.file->ordinal;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lastBinding.addend != addend) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
|
|
|
|
encodeSLEB128(addend, os);
|
|
|
|
lastBinding.addend = addend;
|
|
|
|
}
|
|
|
|
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
|
|
|
<< dysym.getName() << '\0'
|
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
|
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
|
|
|
|
// DO_BIND causes dyld to both perform the binding and increment the offset
|
|
|
|
lastBinding.offset += WordSize;
|
|
|
|
}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
|
|
|
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
|
|
|
|
// interprets to update a record with the following fields:
|
|
|
|
// * segment index (of the segment to write the symbol addresses to, typically
|
|
|
|
// the __DATA_CONST segment which contains the GOT)
|
|
|
|
// * offset within the segment, indicating the next location to write a binding
|
|
|
|
// * symbol type
|
|
|
|
// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
|
|
|
|
// * symbol name
|
|
|
|
// * addend
|
|
|
|
// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
|
|
|
|
// a symbol in the GOT, and increments the segment offset to point to the next
|
|
|
|
// entry. It does *not* clear the record state after doing the bind, so
|
|
|
|
// subsequent opcodes only need to encode the differences between bindings.
|
|
|
|
void BindingSection::finalizeContents() {
|
|
|
|
raw_svector_ostream os{contents};
|
2020-07-03 12:19:55 +08:00
|
|
|
Binding lastBinding;
|
|
|
|
bool didEncode = false;
|
|
|
|
size_t gotIdx = 0;
|
2020-06-14 11:00:06 +08:00
|
|
|
for (const Symbol *sym : in.got->getEntries()) {
|
|
|
|
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
2020-07-03 12:19:55 +08:00
|
|
|
didEncode = true;
|
|
|
|
encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
2020-07-03 12:19:55 +08:00
|
|
|
++gotIdx;
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-07-03 12:19:55 +08:00
|
|
|
// Sorting the relocations by segment and address allows us to encode them
|
|
|
|
// more compactly.
|
|
|
|
llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
|
|
|
|
OutputSegment *segA = a.isec->parent->parent;
|
|
|
|
OutputSegment *segB = b.isec->parent->parent;
|
|
|
|
if (segA != segB)
|
|
|
|
return segA->fileOff < segB->fileOff;
|
|
|
|
OutputSection *osecA = a.isec->parent;
|
|
|
|
OutputSection *osecB = b.isec->parent;
|
|
|
|
if (osecA != osecB)
|
|
|
|
return osecA->addr < osecB->addr;
|
|
|
|
if (a.isec != b.isec)
|
|
|
|
return a.isec->outSecOff < b.isec->outSecOff;
|
|
|
|
return a.offset < b.offset;
|
|
|
|
});
|
|
|
|
for (const BindingEntry &b : bindings) {
|
|
|
|
didEncode = true;
|
|
|
|
encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
|
|
|
|
b.addend, lastBinding, os);
|
|
|
|
}
|
|
|
|
if (didEncode)
|
|
|
|
os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void BindingSection::writeTo(uint8_t *buf) const {
|
2020-04-28 03:50:59 +08:00
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
StubsSection::StubsSection()
|
|
|
|
: SyntheticSection(segment_names::text, "__stubs") {}
|
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t StubsSection::getSize() const {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return entries.size() * target->stubSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubsSection::writeTo(uint8_t *buf) const {
|
|
|
|
size_t off = 0;
|
|
|
|
for (const DylibSymbol *sym : in.stubs->getEntries()) {
|
|
|
|
target->writeStub(buf + off, *sym);
|
|
|
|
off += target->stubSize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubsSection::addEntry(DylibSymbol &sym) {
|
|
|
|
if (entries.insert(&sym))
|
|
|
|
sym.stubsIndex = entries.size() - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
StubHelperSection::StubHelperSection()
|
|
|
|
: SyntheticSection(segment_names::text, "__stub_helper") {}
|
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t StubHelperSection::getSize() const {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return target->stubHelperHeaderSize +
|
|
|
|
in.stubs->getEntries().size() * target->stubHelperEntrySize;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool StubHelperSection::isNeeded() const {
|
|
|
|
return !in.stubs->getEntries().empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubHelperSection::writeTo(uint8_t *buf) const {
|
|
|
|
target->writeStubHelperHeader(buf);
|
|
|
|
size_t off = target->stubHelperHeaderSize;
|
|
|
|
for (const DylibSymbol *sym : in.stubs->getEntries()) {
|
|
|
|
target->writeStubHelperEntry(buf + off, *sym, addr + off);
|
|
|
|
off += target->stubHelperEntrySize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubHelperSection::setup() {
|
|
|
|
stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder"));
|
|
|
|
if (stubBinder == nullptr) {
|
|
|
|
error("symbol dyld_stub_binder not found (normally in libSystem.dylib). "
|
|
|
|
"Needed to perform lazy binding.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
in.got->addEntry(*stubBinder);
|
|
|
|
|
|
|
|
inputSections.push_back(in.imageLoaderCache);
|
2020-07-25 06:55:25 +08:00
|
|
|
symtab->addDefined("__dyld_private", in.imageLoaderCache, 0,
|
|
|
|
/*isWeakDef=*/false);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ImageLoaderCacheSection::ImageLoaderCacheSection() {
|
|
|
|
segname = segment_names::data;
|
|
|
|
name = "__data";
|
[lld-macho] Ensure __bss sections we output have file offset of zero
Summary:
llvm-mc emits `__bss` sections with an offset of zero, but we weren't expecting
that in our input, so we were copying non-zero data from the start of the file and
putting it in `__bss`, with obviously undesirable runtime results. (It appears that
the kernel will copy those nonzero bytes as long as the offset is nonzero, regardless
of whether S_ZERO_FILL is set.)
I debated on whether to make a special ZeroFillSection -- separate from a
regular InputSection -- but it seemed like too much work for now. But I'm happy
to refactor if anyone feels strongly about having it as a separate class.
Depends on D80857.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80859
2020-06-14 11:00:36 +08:00
|
|
|
uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
|
|
|
|
memset(arr, 0, WordSize);
|
|
|
|
data = {arr, WordSize};
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
LazyPointerSection::LazyPointerSection()
|
|
|
|
: SyntheticSection(segment_names::data, "__la_symbol_ptr") {
|
|
|
|
align = 8;
|
2020-06-25 04:45:41 +08:00
|
|
|
flags = MachO::S_LAZY_SYMBOL_POINTERS;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t LazyPointerSection::getSize() const {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return in.stubs->getEntries().size() * WordSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool LazyPointerSection::isNeeded() const {
|
|
|
|
return !in.stubs->getEntries().empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
void LazyPointerSection::writeTo(uint8_t *buf) const {
|
|
|
|
size_t off = 0;
|
|
|
|
for (const DylibSymbol *sym : in.stubs->getEntries()) {
|
|
|
|
uint64_t stubHelperOffset = target->stubHelperHeaderSize +
|
|
|
|
sym->stubsIndex * target->stubHelperEntrySize;
|
|
|
|
write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
|
|
|
|
off += WordSize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LazyBindingSection::LazyBindingSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {}
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
|
|
|
bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); }
|
|
|
|
|
|
|
|
void LazyBindingSection::finalizeContents() {
|
|
|
|
// TODO: Just precompute output size here instead of writing to a temporary
|
|
|
|
// buffer
|
|
|
|
for (DylibSymbol *sym : in.stubs->getEntries())
|
|
|
|
sym->lazyBindOffset = encode(*sym);
|
|
|
|
}
|
|
|
|
|
|
|
|
void LazyBindingSection::writeTo(uint8_t *buf) const {
|
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unlike the non-lazy binding section, the bind opcodes in this section aren't
|
|
|
|
// interpreted all at once. Rather, dyld will start interpreting opcodes at a
|
|
|
|
// given offset, typically only binding a single symbol before it finds a
|
|
|
|
// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
|
|
|
|
// we cannot encode just the differences between symbols; we have to emit the
|
|
|
|
// complete bind information for each symbol.
|
|
|
|
uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
|
|
|
|
uint32_t opstreamOffset = contents.size();
|
|
|
|
OutputSegment *dataSeg = in.lazyPointers->parent;
|
2020-06-25 04:45:41 +08:00
|
|
|
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
dataSeg->index);
|
|
|
|
uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
|
|
|
|
sym.stubsIndex * WordSize;
|
|
|
|
encodeULEB128(offset, os);
|
2020-06-25 04:45:41 +08:00
|
|
|
if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK)
|
|
|
|
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
sym.file->ordinal);
|
|
|
|
else
|
|
|
|
fatal("TODO: Support larger dylib symbol ordinals");
|
|
|
|
|
2020-06-25 04:45:41 +08:00
|
|
|
os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
|
|
|
|
<< sym.getName() << '\0'
|
|
|
|
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND)
|
|
|
|
<< static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return opstreamOffset;
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
ExportSection::ExportSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::export_) {}
|
2020-04-29 07:58:22 +08:00
|
|
|
|
|
|
|
void ExportSection::finalizeContents() {
|
|
|
|
// TODO: We should check symbol visibility.
|
|
|
|
for (const Symbol *sym : symtab->getSymbols())
|
|
|
|
if (auto *defined = dyn_cast<Defined>(sym))
|
2020-04-30 06:42:19 +08:00
|
|
|
trieBuilder.addSymbol(*defined);
|
|
|
|
size = trieBuilder.build();
|
2020-04-29 07:58:22 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
|
2020-04-29 07:58:22 +08:00
|
|
|
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
SymtabSection::SymtabSection(StringTableSection &stringTableSection)
|
2020-05-02 07:29:06 +08:00
|
|
|
: SyntheticSection(segment_names::linkEdit, section_names::symbolTable),
|
2020-07-31 05:29:14 +08:00
|
|
|
stringTableSection(stringTableSection) {}
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t SymtabSection::getSize() const {
|
2020-05-22 06:26:35 +08:00
|
|
|
return symbols.size() * sizeof(structs::nlist_64);
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void SymtabSection::finalizeContents() {
|
2020-04-29 07:58:22 +08:00
|
|
|
// TODO support other symbol types
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
for (Symbol *sym : symtab->getSymbols())
|
2020-04-29 07:58:22 +08:00
|
|
|
if (isa<Defined>(sym))
|
|
|
|
symbols.push_back({sym, stringTableSection.addString(sym->getName())});
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void SymtabSection::writeTo(uint8_t *buf) const {
|
2020-05-22 06:26:35 +08:00
|
|
|
auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
for (const SymtabEntry &entry : symbols) {
|
2020-04-29 07:58:22 +08:00
|
|
|
nList->n_strx = entry.strx;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
// TODO support other symbol types
|
|
|
|
// TODO populate n_desc
|
2020-05-12 06:50:22 +08:00
|
|
|
if (auto *defined = dyn_cast<Defined>(entry.sym)) {
|
2020-06-25 04:45:41 +08:00
|
|
|
nList->n_type = MachO::N_EXT | MachO::N_SECT;
|
2020-05-02 07:29:06 +08:00
|
|
|
nList->n_sect = defined->isec->parent->index;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
// For the N_SECT symbol type, n_value is the address of the symbol
|
2020-05-02 07:29:06 +08:00
|
|
|
nList->n_value = defined->value + defined->isec->getVA();
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
++nList;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
StringTableSection::StringTableSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
|
|
|
|
uint32_t StringTableSection::addString(StringRef str) {
|
|
|
|
uint32_t strx = size;
|
|
|
|
strings.push_back(str);
|
|
|
|
size += str.size() + 1; // account for null terminator
|
|
|
|
return strx;
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void StringTableSection::writeTo(uint8_t *buf) const {
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
uint32_t off = 0;
|
|
|
|
for (StringRef str : strings) {
|
|
|
|
memcpy(buf + off, str.data(), str.size());
|
|
|
|
off += str.size() + 1; // account for null terminator
|
|
|
|
}
|
|
|
|
}
|