2020-04-22 04:37:57 +08:00
|
|
|
//===- SyntheticSections.cpp ---------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SyntheticSections.h"
|
2020-04-29 07:58:22 +08:00
|
|
|
#include "Config.h"
|
2020-04-30 06:42:19 +08:00
|
|
|
#include "ExportTrie.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "InputFiles.h"
|
2020-05-22 06:26:35 +08:00
|
|
|
#include "MachOStructs.h"
|
2020-07-03 12:19:55 +08:00
|
|
|
#include "MergedOutputSection.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "OutputSegment.h"
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
#include "SymbolTable.h"
|
2020-04-22 04:37:57 +08:00
|
|
|
#include "Symbols.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "Writer.h"
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
[lld-macho] Ensure __bss sections we output have file offset of zero
Summary:
llvm-mc emits `__bss` sections with an offset of zero, but we weren't expecting
that in our input, so we were copying non-zero data from the start of the file and
putting it in `__bss`, with obviously undesirable runtime results. (It appears that
the kernel will copy those nonzero bytes as long as the offset is nonzero, regardless
of whether S_ZERO_FILL is set.)
I debated on whether to make a special ZeroFillSection -- separate from a
regular InputSection -- but it seemed like too much work for now. But I'm happy
to refactor if anyone feels strongly about having it as a separate class.
Depends on D80857.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80859
2020-06-14 11:00:36 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2020-12-02 06:45:09 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2021-04-17 04:46:45 +08:00
|
|
|
#include "llvm/Config/config.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "llvm/Support/EndianStream.h"
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2020-04-28 03:50:59 +08:00
|
|
|
#include "llvm/Support/LEB128.h"
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
#include "llvm/Support/Path.h"
|
2021-01-07 10:11:44 +08:00
|
|
|
#include "llvm/Support/SHA256.h"
|
|
|
|
|
|
|
|
#if defined(__APPLE__)
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2021-04-17 04:46:45 +08:00
|
|
|
#ifdef HAVE_LIBXAR
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <xar/xar.h>
|
|
|
|
#endif
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
using namespace llvm;
|
2021-03-12 02:28:08 +08:00
|
|
|
using namespace llvm::MachO;
|
2020-04-28 03:50:59 +08:00
|
|
|
using namespace llvm::support;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
using namespace llvm::support::endian;
|
2020-06-25 03:22:13 +08:00
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-06-25 03:22:13 +08:00
|
|
|
InStruct macho::in;
|
|
|
|
std::vector<SyntheticSection *> macho::syntheticSections;
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
SyntheticSection::SyntheticSection(const char *segname, const char *name)
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
: OutputSection(SyntheticKind, name), segname(segname) {
|
2021-03-13 06:26:12 +08:00
|
|
|
isec = make<InputSection>();
|
|
|
|
isec->segname = segname;
|
|
|
|
isec->name = name;
|
|
|
|
isec->parent = this;
|
|
|
|
isec->outSecOff = 0;
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
syntheticSections.push_back(this);
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
|
|
|
|
// from the beginning of the file (i.e. the header).
|
|
|
|
MachHeaderSection::MachHeaderSection()
|
2021-03-19 06:49:45 +08:00
|
|
|
: SyntheticSection(segment_names::text, section_names::header) {
|
|
|
|
// XXX: This is a hack. (See D97007)
|
|
|
|
// Setting the index to 1 to pretend that this section is the text
|
|
|
|
// section.
|
|
|
|
index = 1;
|
|
|
|
}
|
2020-05-02 07:29:06 +08:00
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
|
|
|
|
loadCommands.push_back(lc);
|
|
|
|
sizeOfCmds += lc->getSize();
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
// This serves to hide (type-erase) the template parameter from
|
|
|
|
// MachHeaderSection.
|
|
|
|
template <class LP> class MachHeaderSectionImpl : public MachHeaderSection {
|
|
|
|
public:
|
|
|
|
MachHeaderSectionImpl() = default;
|
|
|
|
uint64_t getSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class LP> MachHeaderSection *macho::makeMachHeaderSection() {
|
|
|
|
return make<MachHeaderSectionImpl<LP>>();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class LP> uint64_t MachHeaderSectionImpl<LP>::getSize() const {
|
2021-04-22 01:35:12 +08:00
|
|
|
uint64_t size =
|
|
|
|
sizeof(typename LP::mach_header) + sizeOfCmds + config->headerPad;
|
|
|
|
// If we are emitting an encryptable binary, our load commands must have a
|
|
|
|
// separate (non-encrypted) page to themselves.
|
|
|
|
if (config->emitEncryptionInfo)
|
|
|
|
size = alignTo(size, target->getPageSize());
|
|
|
|
return size;
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2021-02-23 01:12:39 +08:00
|
|
|
static uint32_t cpuSubtype() {
|
|
|
|
uint32_t subtype = target->cpuSubtype;
|
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
if (config->outputType == MH_EXECUTE && !config->staticLink &&
|
|
|
|
target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL &&
|
2021-04-22 03:43:38 +08:00
|
|
|
config->platform() == PlatformKind::macOS &&
|
2021-03-05 03:36:47 +08:00
|
|
|
config->platformInfo.minimum >= VersionTuple(10, 5))
|
2021-03-12 02:28:08 +08:00
|
|
|
subtype |= CPU_SUBTYPE_LIB64;
|
2021-02-23 01:12:39 +08:00
|
|
|
|
|
|
|
return subtype;
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP>
|
|
|
|
void MachHeaderSectionImpl<LP>::writeTo(uint8_t *buf) const {
|
|
|
|
auto *hdr = reinterpret_cast<typename LP::mach_header *>(buf);
|
|
|
|
hdr->magic = LP::magic;
|
2020-09-27 04:00:22 +08:00
|
|
|
hdr->cputype = target->cpuType;
|
2021-02-23 01:12:39 +08:00
|
|
|
hdr->cpusubtype = cpuSubtype();
|
2020-04-29 07:58:22 +08:00
|
|
|
hdr->filetype = config->outputType;
|
2020-04-28 03:50:59 +08:00
|
|
|
hdr->ncmds = loadCommands.size();
|
|
|
|
hdr->sizeofcmds = sizeOfCmds;
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags = MH_DYLDLINK;
|
2021-03-02 04:25:10 +08:00
|
|
|
|
|
|
|
if (config->namespaceKind == NamespaceKind::twolevel)
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags |= MH_NOUNDEFS | MH_TWOLEVEL;
|
2020-08-08 02:04:52 +08:00
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
if (config->outputType == MH_DYLIB && !config->hasReexports)
|
|
|
|
hdr->flags |= MH_NO_REEXPORTED_DYLIBS;
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2021-03-09 23:17:01 +08:00
|
|
|
if (config->markDeadStrippableDylib)
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags |= MH_DEAD_STRIPPABLE_DYLIB;
|
2021-03-09 23:17:01 +08:00
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
if (config->outputType == MH_EXECUTE && config->isPic)
|
|
|
|
hdr->flags |= MH_PIE;
|
2020-09-06 01:55:33 +08:00
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition())
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags |= MH_WEAK_DEFINES;
|
2020-08-28 06:59:15 +08:00
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry())
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags |= MH_BINDS_TO_WEAK;
|
2020-08-28 06:59:15 +08:00
|
|
|
|
2021-03-10 13:41:34 +08:00
|
|
|
for (const OutputSegment *seg : outputSegments) {
|
|
|
|
for (const OutputSection *osec : seg->getSections()) {
|
2020-08-08 02:04:52 +08:00
|
|
|
if (isThreadLocalVariables(osec->flags)) {
|
2021-03-12 02:28:08 +08:00
|
|
|
hdr->flags |= MH_HAS_TLV_DESCRIPTORS;
|
2020-08-08 02:04:52 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
|
2021-03-10 13:41:34 +08:00
|
|
|
for (const LoadCommand *lc : loadCommands) {
|
2020-04-28 03:50:59 +08:00
|
|
|
lc->writeTo(p);
|
|
|
|
p += lc->getSize();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
PageZeroSection::PageZeroSection()
|
|
|
|
: SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-09-06 01:55:33 +08:00
|
|
|
RebaseSection::RebaseSection()
|
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct Rebase {
|
|
|
|
OutputSegment *segment = nullptr;
|
|
|
|
uint64_t offset = 0;
|
|
|
|
uint64_t consecutiveCount = 0;
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
// Rebase opcodes allow us to describe a contiguous sequence of rebase location
|
|
|
|
// using a single DO_REBASE opcode. To take advantage of it, we delay emitting
|
|
|
|
// `DO_REBASE` until we have reached the end of a contiguous sequence.
|
|
|
|
static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) {
|
|
|
|
assert(rebase.consecutiveCount != 0);
|
|
|
|
if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) {
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
|
|
|
|
rebase.consecutiveCount);
|
|
|
|
} else {
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
|
|
|
|
encodeULEB128(rebase.consecutiveCount, os);
|
|
|
|
}
|
|
|
|
rebase.consecutiveCount = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void encodeRebase(const OutputSection *osec, uint64_t outSecOff,
|
|
|
|
Rebase &lastRebase, raw_svector_ostream &os) {
|
|
|
|
OutputSegment *seg = osec->parent;
|
|
|
|
uint64_t offset = osec->getSegmentOffset() + outSecOff;
|
|
|
|
if (lastRebase.segment != seg || lastRebase.offset != offset) {
|
|
|
|
if (lastRebase.consecutiveCount != 0)
|
|
|
|
encodeDoRebase(lastRebase, os);
|
|
|
|
|
|
|
|
if (lastRebase.segment != seg) {
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
|
|
|
seg->index);
|
|
|
|
encodeULEB128(offset, os);
|
|
|
|
lastRebase.segment = seg;
|
|
|
|
lastRebase.offset = offset;
|
|
|
|
} else {
|
|
|
|
assert(lastRebase.offset != offset);
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
|
|
|
|
encodeULEB128(offset - lastRebase.offset, os);
|
|
|
|
lastRebase.offset = offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++lastRebase.consecutiveCount;
|
|
|
|
// DO_REBASE causes dyld to both perform the binding and increment the offset
|
2021-04-03 06:46:18 +08:00
|
|
|
lastRebase.offset += target->wordSize;
|
2020-09-06 01:55:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void RebaseSection::finalizeContents() {
|
|
|
|
if (locations.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
raw_svector_ostream os{contents};
|
|
|
|
Rebase lastRebase;
|
|
|
|
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
|
|
|
|
|
|
|
|
llvm::sort(locations, [](const Location &a, const Location &b) {
|
2021-03-13 06:26:12 +08:00
|
|
|
return a.isec->getVA() < b.isec->getVA();
|
2020-09-06 01:55:33 +08:00
|
|
|
});
|
2021-03-13 06:26:12 +08:00
|
|
|
for (const Location &loc : locations)
|
|
|
|
encodeRebase(loc.isec->parent, loc.isec->outSecOff + loc.offset, lastRebase,
|
|
|
|
os);
|
2020-09-06 01:55:33 +08:00
|
|
|
if (lastRebase.consecutiveCount != 0)
|
|
|
|
encodeDoRebase(lastRebase, os);
|
|
|
|
|
|
|
|
os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
|
|
|
|
}
|
|
|
|
|
|
|
|
void RebaseSection::writeTo(uint8_t *buf) const {
|
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
|
|
|
|
const char *name)
|
|
|
|
: SyntheticSection(segname, name) {
|
2021-04-03 06:46:18 +08:00
|
|
|
align = target->wordSize;
|
2021-03-12 02:28:08 +08:00
|
|
|
flags = S_NON_LAZY_SYMBOL_POINTERS;
|
2020-04-22 04:37:57 +08:00
|
|
|
}
|
|
|
|
|
2021-03-15 06:35:27 +08:00
|
|
|
void macho::addNonLazyBindingEntries(const Symbol *sym,
|
|
|
|
const InputSection *isec, uint64_t offset,
|
|
|
|
int64_t addend) {
|
|
|
|
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
|
|
|
in.binding->addEntry(dysym, isec, offset, addend);
|
|
|
|
if (dysym->isWeakDef())
|
|
|
|
in.weakBinding->addEntry(sym, isec, offset, addend);
|
|
|
|
} else if (const auto *defined = dyn_cast<Defined>(sym)) {
|
|
|
|
in.rebase->addEntry(isec, offset);
|
|
|
|
if (defined->isExternalWeakDef())
|
|
|
|
in.weakBinding->addEntry(sym, isec, offset, addend);
|
|
|
|
} else {
|
|
|
|
// Undefined symbols are filtered out in scanRelocations(); we should never
|
|
|
|
// get here
|
|
|
|
llvm_unreachable("cannot bind to an undefined symbol");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-21 05:45:51 +08:00
|
|
|
void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
|
|
|
|
if (entries.insert(sym)) {
|
2020-08-25 12:57:59 +08:00
|
|
|
assert(!sym->isInGot());
|
2020-08-21 05:45:51 +08:00
|
|
|
sym->gotIndex = entries.size() - 1;
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
addNonLazyBindingEntries(sym, isec, sym->gotIndex * target->wordSize);
|
2020-04-22 04:37:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
|
2020-06-14 11:00:06 +08:00
|
|
|
for (size_t i = 0, n = entries.size(); i < n; ++i)
|
|
|
|
if (auto *defined = dyn_cast<Defined>(entries[i]))
|
2021-04-03 06:46:18 +08:00
|
|
|
write64le(&buf[i * target->wordSize], defined->getVA());
|
2020-06-14 11:00:06 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
BindingSection::BindingSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::binding) {}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-07-03 12:19:55 +08:00
|
|
|
namespace {
|
|
|
|
struct Binding {
|
|
|
|
OutputSegment *segment = nullptr;
|
|
|
|
uint64_t offset = 0;
|
|
|
|
int64_t addend = 0;
|
2021-02-23 02:03:02 +08:00
|
|
|
int16_t ordinal = 0;
|
2020-07-03 12:19:55 +08:00
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
// Encode a sequence of opcodes that tell dyld to write the address of symbol +
|
2020-07-03 12:19:55 +08:00
|
|
|
// addend at osec->addr + outSecOff.
|
|
|
|
//
|
|
|
|
// The bind opcode "interpreter" remembers the values of each binding field, so
|
|
|
|
// we only need to encode the differences between bindings. Hence the use of
|
|
|
|
// lastBinding.
|
2020-08-25 12:57:59 +08:00
|
|
|
static void encodeBinding(const Symbol *sym, const OutputSection *osec,
|
2020-07-03 12:19:55 +08:00
|
|
|
uint64_t outSecOff, int64_t addend,
|
2020-12-16 10:05:06 +08:00
|
|
|
bool isWeakBinding, Binding &lastBinding,
|
|
|
|
raw_svector_ostream &os) {
|
2020-07-03 12:19:55 +08:00
|
|
|
OutputSegment *seg = osec->parent;
|
|
|
|
uint64_t offset = osec->getSegmentOffset() + outSecOff;
|
|
|
|
if (lastBinding.segment != seg) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
|
|
|
seg->index);
|
|
|
|
encodeULEB128(offset, os);
|
|
|
|
lastBinding.segment = seg;
|
|
|
|
lastBinding.offset = offset;
|
|
|
|
} else if (lastBinding.offset != offset) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
|
|
|
|
encodeULEB128(offset - lastBinding.offset, os);
|
|
|
|
lastBinding.offset = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lastBinding.addend != addend) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
|
|
|
|
encodeSLEB128(addend, os);
|
|
|
|
lastBinding.addend = addend;
|
|
|
|
}
|
|
|
|
|
2020-12-16 10:05:06 +08:00
|
|
|
uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
|
|
|
|
if (!isWeakBinding && sym->isWeakRef())
|
|
|
|
flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
|
|
|
|
|
|
|
|
os << flags << sym->getName() << '\0'
|
2020-07-03 12:19:55 +08:00
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
|
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
|
|
|
|
// DO_BIND causes dyld to both perform the binding and increment the offset
|
2021-04-03 06:46:18 +08:00
|
|
|
lastBinding.offset += target->wordSize;
|
2020-07-03 12:19:55 +08:00
|
|
|
}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
// Non-weak bindings need to have their dylib ordinal encoded as well.
|
2021-02-27 08:13:48 +08:00
|
|
|
static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
|
2021-03-02 04:25:10 +08:00
|
|
|
return config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup()
|
2021-03-12 02:28:08 +08:00
|
|
|
? static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
|
2021-03-02 04:25:10 +08:00
|
|
|
: dysym.getFile()->ordinal;
|
2021-02-27 08:13:48 +08:00
|
|
|
}
|
2021-02-26 08:56:31 +08:00
|
|
|
|
2021-02-27 08:13:48 +08:00
|
|
|
static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) {
|
|
|
|
if (ordinal <= 0) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
|
|
|
|
(ordinal & BIND_IMMEDIATE_MASK));
|
|
|
|
} else if (ordinal <= BIND_IMMEDIATE_MASK) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | ordinal);
|
|
|
|
} else {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
|
|
|
|
encodeULEB128(ordinal, os);
|
2020-08-25 12:57:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
static void encodeWeakOverride(const Defined *defined,
|
|
|
|
raw_svector_ostream &os) {
|
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM |
|
|
|
|
BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
|
|
|
|
<< defined->getName() << '\0';
|
|
|
|
}
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
|
|
|
|
// interprets to update a record with the following fields:
|
|
|
|
// * segment index (of the segment to write the symbol addresses to, typically
|
|
|
|
// the __DATA_CONST segment which contains the GOT)
|
|
|
|
// * offset within the segment, indicating the next location to write a binding
|
|
|
|
// * symbol type
|
|
|
|
// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
|
|
|
|
// * symbol name
|
|
|
|
// * addend
|
|
|
|
// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
|
|
|
|
// a symbol in the GOT, and increments the segment offset to point to the next
|
|
|
|
// entry. It does *not* clear the record state after doing the bind, so
|
|
|
|
// subsequent opcodes only need to encode the differences between bindings.
|
|
|
|
void BindingSection::finalizeContents() {
|
|
|
|
raw_svector_ostream os{contents};
|
2020-07-03 12:19:55 +08:00
|
|
|
Binding lastBinding;
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-08-21 05:45:51 +08:00
|
|
|
// Since bindings are delta-encoded, sorting them allows for a more compact
|
|
|
|
// result. Note that sorting by address alone ensures that bindings for the
|
|
|
|
// same segment / section are located together.
|
2020-07-03 12:19:55 +08:00
|
|
|
llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
|
2020-08-25 12:57:59 +08:00
|
|
|
return a.target.getVA() < b.target.getVA();
|
2020-07-03 12:19:55 +08:00
|
|
|
});
|
|
|
|
for (const BindingEntry &b : bindings) {
|
2021-02-27 08:13:48 +08:00
|
|
|
int16_t ordinal = ordinalForDylibSymbol(*b.dysym);
|
|
|
|
if (ordinal != lastBinding.ordinal) {
|
|
|
|
encodeDylibOrdinal(ordinal, os);
|
|
|
|
lastBinding.ordinal = ordinal;
|
|
|
|
}
|
2021-03-13 06:26:12 +08:00
|
|
|
encodeBinding(b.dysym, b.target.isec->parent,
|
|
|
|
b.target.isec->outSecOff + b.target.offset, b.addend,
|
|
|
|
/*isWeakBinding=*/false, lastBinding, os);
|
2020-07-03 12:19:55 +08:00
|
|
|
}
|
2020-08-21 05:45:51 +08:00
|
|
|
if (!bindings.empty())
|
2021-03-12 02:28:08 +08:00
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void BindingSection::writeTo(uint8_t *buf) const {
|
2020-04-28 03:50:59 +08:00
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
WeakBindingSection::WeakBindingSection()
|
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {}
|
|
|
|
|
|
|
|
void WeakBindingSection::finalizeContents() {
|
|
|
|
raw_svector_ostream os{contents};
|
|
|
|
Binding lastBinding;
|
|
|
|
|
2020-08-28 06:59:30 +08:00
|
|
|
for (const Defined *defined : definitions)
|
|
|
|
encodeWeakOverride(defined, os);
|
|
|
|
|
2020-08-25 12:57:59 +08:00
|
|
|
// Since bindings are delta-encoded, sorting them allows for a more compact
|
|
|
|
// result.
|
|
|
|
llvm::sort(bindings,
|
|
|
|
[](const WeakBindingEntry &a, const WeakBindingEntry &b) {
|
|
|
|
return a.target.getVA() < b.target.getVA();
|
|
|
|
});
|
2021-03-13 06:26:12 +08:00
|
|
|
for (const WeakBindingEntry &b : bindings)
|
|
|
|
encodeBinding(b.symbol, b.target.isec->parent,
|
|
|
|
b.target.isec->outSecOff + b.target.offset, b.addend,
|
|
|
|
/*isWeakBinding=*/true, lastBinding, os);
|
2020-08-28 06:59:30 +08:00
|
|
|
if (!bindings.empty() || !definitions.empty())
|
2021-03-12 02:28:08 +08:00
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_DONE);
|
2020-08-25 12:57:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void WeakBindingSection::writeTo(uint8_t *buf) const {
|
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
StubsSection::StubsSection()
|
2020-09-05 09:02:07 +08:00
|
|
|
: SyntheticSection(segment_names::text, "__stubs") {
|
2021-03-12 02:28:08 +08:00
|
|
|
flags = S_SYMBOL_STUBS | S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS;
|
2021-02-23 01:06:58 +08:00
|
|
|
// The stubs section comprises machine instructions, which are aligned to
|
|
|
|
// 4 bytes on the archs we care about.
|
|
|
|
align = 4;
|
2020-09-05 09:02:07 +08:00
|
|
|
reserved2 = target->stubSize;
|
|
|
|
}
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t StubsSection::getSize() const {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return entries.size() * target->stubSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubsSection::writeTo(uint8_t *buf) const {
|
|
|
|
size_t off = 0;
|
2020-08-28 06:54:42 +08:00
|
|
|
for (const Symbol *sym : entries) {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
target->writeStub(buf + off, *sym);
|
|
|
|
off += target->stubSize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-28 06:54:42 +08:00
|
|
|
bool StubsSection::addEntry(Symbol *sym) {
|
|
|
|
bool inserted = entries.insert(sym);
|
|
|
|
if (inserted)
|
2020-08-21 05:45:51 +08:00
|
|
|
sym->stubsIndex = entries.size() - 1;
|
2020-08-28 06:54:42 +08:00
|
|
|
return inserted;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
StubHelperSection::StubHelperSection()
|
2021-01-03 02:31:55 +08:00
|
|
|
: SyntheticSection(segment_names::text, "__stub_helper") {
|
2021-03-12 02:28:08 +08:00
|
|
|
flags = S_ATTR_SOME_INSTRUCTIONS | S_ATTR_PURE_INSTRUCTIONS;
|
2021-02-23 01:06:58 +08:00
|
|
|
align = 4; // This section comprises machine instructions
|
2021-01-03 02:31:55 +08:00
|
|
|
}
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t StubHelperSection::getSize() const {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return target->stubHelperHeaderSize +
|
2020-08-28 06:54:42 +08:00
|
|
|
in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
2020-08-28 06:54:42 +08:00
|
|
|
bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
|
|
|
void StubHelperSection::writeTo(uint8_t *buf) const {
|
|
|
|
target->writeStubHelperHeader(buf);
|
|
|
|
size_t off = target->stubHelperHeaderSize;
|
2020-08-28 06:54:42 +08:00
|
|
|
for (const DylibSymbol *sym : in.lazyBinding->getEntries()) {
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
target->writeStubHelperEntry(buf + off, *sym, addr + off);
|
|
|
|
off += target->stubHelperEntrySize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StubHelperSection::setup() {
|
|
|
|
stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder"));
|
|
|
|
if (stubBinder == nullptr) {
|
|
|
|
error("symbol dyld_stub_binder not found (normally in libSystem.dylib). "
|
|
|
|
"Needed to perform lazy binding.");
|
|
|
|
return;
|
|
|
|
}
|
2020-12-16 10:05:06 +08:00
|
|
|
stubBinder->refState = RefState::Strong;
|
2020-08-21 05:45:51 +08:00
|
|
|
in.got->addEntry(stubBinder);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
|
|
|
inputSections.push_back(in.imageLoaderCache);
|
2021-04-02 08:48:09 +08:00
|
|
|
dyldPrivate =
|
|
|
|
make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0,
|
|
|
|
/*isWeakDef=*/false,
|
|
|
|
/*isExternal=*/false, /*isPrivateExtern=*/false);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ImageLoaderCacheSection::ImageLoaderCacheSection() {
|
|
|
|
segname = segment_names::data;
|
|
|
|
name = "__data";
|
2021-04-03 06:46:18 +08:00
|
|
|
uint8_t *arr = bAlloc.Allocate<uint8_t>(target->wordSize);
|
|
|
|
memset(arr, 0, target->wordSize);
|
|
|
|
data = {arr, target->wordSize};
|
|
|
|
align = target->wordSize;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
LazyPointerSection::LazyPointerSection()
|
|
|
|
: SyntheticSection(segment_names::data, "__la_symbol_ptr") {
|
2021-04-03 06:46:18 +08:00
|
|
|
align = target->wordSize;
|
2021-03-12 02:28:08 +08:00
|
|
|
flags = S_LAZY_SYMBOL_POINTERS;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
2020-06-17 08:27:28 +08:00
|
|
|
uint64_t LazyPointerSection::getSize() const {
|
2021-04-03 06:46:18 +08:00
|
|
|
return in.stubs->getEntries().size() * target->wordSize;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool LazyPointerSection::isNeeded() const {
|
|
|
|
return !in.stubs->getEntries().empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
void LazyPointerSection::writeTo(uint8_t *buf) const {
|
|
|
|
size_t off = 0;
|
2020-08-28 06:54:42 +08:00
|
|
|
for (const Symbol *sym : in.stubs->getEntries()) {
|
|
|
|
if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
|
|
|
if (dysym->hasStubsHelper()) {
|
|
|
|
uint64_t stubHelperOffset =
|
|
|
|
target->stubHelperHeaderSize +
|
|
|
|
dysym->stubsHelperIndex * target->stubHelperEntrySize;
|
|
|
|
write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
write64le(buf + off, sym->getVA());
|
|
|
|
}
|
2021-04-03 06:46:18 +08:00
|
|
|
off += target->wordSize;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LazyBindingSection::LazyBindingSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {}
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
|
|
|
void LazyBindingSection::finalizeContents() {
|
|
|
|
// TODO: Just precompute output size here instead of writing to a temporary
|
|
|
|
// buffer
|
2020-08-28 06:54:42 +08:00
|
|
|
for (DylibSymbol *sym : entries)
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
sym->lazyBindOffset = encode(*sym);
|
|
|
|
}
|
|
|
|
|
|
|
|
void LazyBindingSection::writeTo(uint8_t *buf) const {
|
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
2020-08-28 06:54:42 +08:00
|
|
|
void LazyBindingSection::addEntry(DylibSymbol *dysym) {
|
2020-09-06 01:55:33 +08:00
|
|
|
if (entries.insert(dysym)) {
|
2020-08-28 06:54:42 +08:00
|
|
|
dysym->stubsHelperIndex = entries.size() - 1;
|
2021-04-03 06:46:18 +08:00
|
|
|
in.rebase->addEntry(in.lazyPointers->isec,
|
|
|
|
dysym->stubsIndex * target->wordSize);
|
2020-09-06 01:55:33 +08:00
|
|
|
}
|
2020-08-28 06:54:42 +08:00
|
|
|
}
|
|
|
|
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
// Unlike the non-lazy binding section, the bind opcodes in this section aren't
|
|
|
|
// interpreted all at once. Rather, dyld will start interpreting opcodes at a
|
|
|
|
// given offset, typically only binding a single symbol before it finds a
|
|
|
|
// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
|
|
|
|
// we cannot encode just the differences between symbols; we have to emit the
|
|
|
|
// complete bind information for each symbol.
|
|
|
|
uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
|
|
|
|
uint32_t opstreamOffset = contents.size();
|
|
|
|
OutputSegment *dataSeg = in.lazyPointers->parent;
|
2021-03-12 02:28:08 +08:00
|
|
|
os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
dataSeg->index);
|
|
|
|
uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
|
2021-04-03 06:46:18 +08:00
|
|
|
sym.stubsIndex * target->wordSize;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
encodeULEB128(offset, os);
|
2021-02-27 08:13:48 +08:00
|
|
|
encodeDylibOrdinal(ordinalForDylibSymbol(sym), os);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
|
2020-12-16 10:05:06 +08:00
|
|
|
if (sym.isWeakRef())
|
2021-03-12 02:28:08 +08:00
|
|
|
flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
|
2020-12-16 10:05:06 +08:00
|
|
|
|
|
|
|
os << flags << sym.getName() << '\0'
|
2021-03-12 02:28:08 +08:00
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND)
|
|
|
|
<< static_cast<uint8_t>(BIND_OPCODE_DONE);
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
return opstreamOffset;
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
ExportSection::ExportSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::export_) {}
|
2020-04-29 07:58:22 +08:00
|
|
|
|
2021-03-11 08:45:18 +08:00
|
|
|
static void validateExportSymbol(const Defined *defined) {
|
|
|
|
StringRef symbolName = defined->getName();
|
|
|
|
if (defined->privateExtern && config->exportedSymbols.match(symbolName))
|
|
|
|
error("cannot export hidden symbol " + symbolName + "\n>>> defined in " +
|
|
|
|
toString(defined->getFile()));
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool shouldExportSymbol(const Defined *defined) {
|
|
|
|
if (defined->privateExtern)
|
|
|
|
return false;
|
|
|
|
// TODO: Is this a performance bottleneck? If a build has mostly
|
|
|
|
// global symbols in the input but uses -exported_symbols to filter
|
|
|
|
// out most of them, then it would be better to set the value of
|
|
|
|
// privateExtern at parse time instead of calling
|
|
|
|
// exportedSymbols.match() more than once.
|
|
|
|
//
|
|
|
|
// Measurements show that symbol ordering (which again looks up
|
|
|
|
// every symbol in a hashmap) is the biggest bottleneck when linking
|
|
|
|
// chromium_framework, so this will likely be worth optimizing.
|
|
|
|
return config->exportedSymbols.empty()
|
|
|
|
? !config->unexportedSymbols.match(defined->getName())
|
|
|
|
: config->exportedSymbols.match(defined->getName());
|
|
|
|
}
|
|
|
|
|
2020-04-29 07:58:22 +08:00
|
|
|
void ExportSection::finalizeContents() {
|
2020-09-17 02:20:10 +08:00
|
|
|
trieBuilder.setImageBase(in.header->addr);
|
2020-08-28 06:59:15 +08:00
|
|
|
for (const Symbol *sym : symtab->getSymbols()) {
|
|
|
|
if (const auto *defined = dyn_cast<Defined>(sym)) {
|
2021-03-11 08:45:18 +08:00
|
|
|
validateExportSymbol(defined);
|
|
|
|
if (!shouldExportSymbol(defined))
|
|
|
|
continue;
|
2020-04-30 06:42:19 +08:00
|
|
|
trieBuilder.addSymbol(*defined);
|
2020-08-28 06:59:15 +08:00
|
|
|
hasWeakSymbol = hasWeakSymbol || sym->isWeakDef();
|
|
|
|
}
|
|
|
|
}
|
2020-04-30 06:42:19 +08:00
|
|
|
size = trieBuilder.build();
|
2020-04-29 07:58:22 +08:00
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
|
2020-04-29 07:58:22 +08:00
|
|
|
|
2021-03-09 14:00:37 +08:00
|
|
|
FunctionStartsSection::FunctionStartsSection()
|
2021-03-14 06:41:44 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {}
|
2021-03-09 14:00:37 +08:00
|
|
|
|
|
|
|
void FunctionStartsSection::finalizeContents() {
|
|
|
|
raw_svector_ostream os{contents};
|
|
|
|
uint64_t addr = in.header->addr;
|
|
|
|
for (const Symbol *sym : symtab->getSymbols()) {
|
|
|
|
if (const auto *defined = dyn_cast<Defined>(sym)) {
|
|
|
|
if (!defined->isec || !isCodeSection(defined->isec))
|
|
|
|
continue;
|
|
|
|
// TODO: Add support for thumbs, in that case
|
|
|
|
// the lowest bit of nextAddr needs to be set to 1.
|
|
|
|
uint64_t nextAddr = defined->getVA();
|
|
|
|
uint64_t delta = nextAddr - addr;
|
|
|
|
if (delta == 0)
|
|
|
|
continue;
|
|
|
|
encodeULEB128(delta, os);
|
|
|
|
addr = nextAddr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
os << '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
void FunctionStartsSection::writeTo(uint8_t *buf) const {
|
|
|
|
memcpy(buf, contents.data(), contents.size());
|
|
|
|
}
|
|
|
|
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
SymtabSection::SymtabSection(StringTableSection &stringTableSection)
|
2020-08-28 08:43:19 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::symbolTable),
|
2020-07-31 05:29:14 +08:00
|
|
|
stringTableSection(stringTableSection) {}
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
|
2021-03-12 02:28:08 +08:00
|
|
|
StabsEntry stab(N_SO);
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
SmallString<261> dir(compileUnit->getCompilationDir());
|
|
|
|
StringRef sep = sys::path::get_separator();
|
|
|
|
// We don't use `path::append` here because we want an empty `dir` to result
|
|
|
|
// in an absolute path. `append` would give us a relative path for that case.
|
|
|
|
if (!dir.endswith(sep))
|
|
|
|
dir += sep;
|
|
|
|
stab.strx = stringTableSection.addString(
|
|
|
|
saver.save(dir + compileUnit->getUnitDIE().getShortName()));
|
|
|
|
stabs.emplace_back(std::move(stab));
|
|
|
|
}
|
|
|
|
|
|
|
|
void SymtabSection::emitEndSourceStab() {
|
2021-03-12 02:28:08 +08:00
|
|
|
StabsEntry stab(N_SO);
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
stab.sect = 1;
|
|
|
|
stabs.emplace_back(std::move(stab));
|
|
|
|
}
|
|
|
|
|
|
|
|
void SymtabSection::emitObjectFileStab(ObjFile *file) {
|
2021-03-12 02:28:08 +08:00
|
|
|
StabsEntry stab(N_OSO);
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
stab.sect = target->cpuSubtype;
|
2020-12-02 06:45:11 +08:00
|
|
|
SmallString<261> path(!file->archiveName.empty() ? file->archiveName
|
|
|
|
: file->getName());
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
std::error_code ec = sys::fs::make_absolute(path);
|
|
|
|
if (ec)
|
2020-12-02 06:45:11 +08:00
|
|
|
fatal("failed to get absolute path for " + path);
|
|
|
|
|
|
|
|
if (!file->archiveName.empty())
|
|
|
|
path.append({"(", file->getName(), ")"});
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
|
|
|
|
stab.strx = stringTableSection.addString(saver.save(path.str()));
|
|
|
|
stab.desc = 1;
|
2020-12-02 06:45:11 +08:00
|
|
|
stab.value = file->modTime;
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
stabs.emplace_back(std::move(stab));
|
|
|
|
}
|
|
|
|
|
2020-12-02 06:45:12 +08:00
|
|
|
void SymtabSection::emitEndFunStab(Defined *defined) {
|
2021-03-12 02:28:08 +08:00
|
|
|
StabsEntry stab(N_FUN);
|
2021-04-02 08:48:09 +08:00
|
|
|
stab.value = defined->size;
|
2020-12-02 06:45:12 +08:00
|
|
|
stabs.emplace_back(std::move(stab));
|
|
|
|
}
|
|
|
|
|
|
|
|
void SymtabSection::emitStabs() {
|
2021-04-09 02:12:20 +08:00
|
|
|
for (const std::string &s : config->astPaths) {
|
|
|
|
StabsEntry astStab(N_AST);
|
|
|
|
astStab.strx = stringTableSection.addString(s);
|
|
|
|
stabs.emplace_back(std::move(astStab));
|
|
|
|
}
|
|
|
|
|
2020-12-02 06:45:12 +08:00
|
|
|
std::vector<Defined *> symbolsNeedingStabs;
|
|
|
|
for (const SymtabEntry &entry :
|
|
|
|
concat<SymtabEntry>(localSymbols, externalSymbols)) {
|
|
|
|
Symbol *sym = entry.sym;
|
|
|
|
if (auto *defined = dyn_cast<Defined>(sym)) {
|
|
|
|
if (defined->isAbsolute())
|
|
|
|
continue;
|
|
|
|
InputSection *isec = defined->isec;
|
|
|
|
ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file);
|
|
|
|
if (!file || !file->compileUnit)
|
|
|
|
continue;
|
|
|
|
symbolsNeedingStabs.push_back(defined);
|
|
|
|
}
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
}
|
|
|
|
|
2020-12-02 06:45:12 +08:00
|
|
|
llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) {
|
|
|
|
return a->isec->file->id < b->isec->file->id;
|
|
|
|
});
|
|
|
|
|
|
|
|
// Emit STABS symbols so that dsymutil and/or the debugger can map address
|
|
|
|
// regions in the final binary to the source and object files from which they
|
|
|
|
// originated.
|
|
|
|
InputFile *lastFile = nullptr;
|
|
|
|
for (Defined *defined : symbolsNeedingStabs) {
|
|
|
|
InputSection *isec = defined->isec;
|
2021-04-09 02:12:20 +08:00
|
|
|
ObjFile *file = cast<ObjFile>(isec->file);
|
2020-12-02 06:45:12 +08:00
|
|
|
|
|
|
|
if (lastFile == nullptr || lastFile != file) {
|
|
|
|
if (lastFile != nullptr)
|
|
|
|
emitEndSourceStab();
|
|
|
|
lastFile = file;
|
|
|
|
|
|
|
|
emitBeginSourceStab(file->compileUnit);
|
|
|
|
emitObjectFileStab(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
StabsEntry symStab;
|
|
|
|
symStab.sect = defined->isec->parent->index;
|
|
|
|
symStab.strx = stringTableSection.addString(defined->getName());
|
|
|
|
symStab.value = defined->getVA();
|
|
|
|
|
2020-12-02 06:45:13 +08:00
|
|
|
if (isCodeSection(isec)) {
|
2021-03-12 02:28:08 +08:00
|
|
|
symStab.type = N_FUN;
|
2020-12-02 06:45:12 +08:00
|
|
|
stabs.emplace_back(std::move(symStab));
|
|
|
|
emitEndFunStab(defined);
|
|
|
|
} else {
|
2021-03-12 02:28:08 +08:00
|
|
|
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
|
2020-12-02 06:45:12 +08:00
|
|
|
stabs.emplace_back(std::move(symStab));
|
|
|
|
}
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
}
|
2020-12-02 06:45:12 +08:00
|
|
|
|
|
|
|
if (!stabs.empty())
|
|
|
|
emitEndSourceStab();
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void SymtabSection::finalizeContents() {
|
2020-12-23 00:00:57 +08:00
|
|
|
auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
|
|
|
|
uint32_t strx = stringTableSection.addString(sym->getName());
|
|
|
|
symbols.push_back({sym, strx});
|
|
|
|
};
|
|
|
|
|
2020-12-02 06:45:09 +08:00
|
|
|
// Local symbols aren't in the SymbolTable, so we walk the list of object
|
|
|
|
// files to gather them.
|
2021-03-10 13:41:34 +08:00
|
|
|
for (const InputFile *file : inputFiles) {
|
2020-12-02 06:45:09 +08:00
|
|
|
if (auto *objFile = dyn_cast<ObjFile>(file)) {
|
|
|
|
for (Symbol *sym : objFile->symbols) {
|
2021-03-06 06:22:57 +08:00
|
|
|
if (sym == nullptr)
|
|
|
|
continue;
|
2020-12-02 06:45:09 +08:00
|
|
|
// TODO: when we implement -dead_strip, we should filter out symbols
|
|
|
|
// that belong to dead sections.
|
|
|
|
if (auto *defined = dyn_cast<Defined>(sym)) {
|
2020-09-27 04:00:22 +08:00
|
|
|
if (!defined->isExternal()) {
|
|
|
|
StringRef name = defined->getName();
|
|
|
|
if (!name.startswith("l") && !name.startswith("L"))
|
|
|
|
addSymbol(localSymbols, sym);
|
|
|
|
}
|
2020-12-02 06:45:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
// __dyld_private is a local symbol too. It's linker-created and doesn't
|
|
|
|
// exist in any object file.
|
2021-04-07 02:05:15 +08:00
|
|
|
if (Defined *dyldPrivate = in.stubHelper->dyldPrivate)
|
2020-12-23 00:00:57 +08:00
|
|
|
addSymbol(localSymbols, dyldPrivate);
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
|
2020-09-05 09:02:07 +08:00
|
|
|
for (Symbol *sym : symtab->getSymbols()) {
|
2020-12-02 06:45:09 +08:00
|
|
|
if (auto *defined = dyn_cast<Defined>(sym)) {
|
2021-03-19 06:49:45 +08:00
|
|
|
if (!defined->includeInSymtab)
|
2021-03-13 06:26:12 +08:00
|
|
|
continue;
|
2020-12-02 06:45:09 +08:00
|
|
|
assert(defined->isExternal());
|
2021-03-13 06:26:12 +08:00
|
|
|
addSymbol(externalSymbols, defined);
|
2020-12-16 10:05:06 +08:00
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
|
|
|
if (dysym->isReferenced())
|
2020-12-23 00:00:57 +08:00
|
|
|
addSymbol(undefinedSymbols, sym);
|
2020-09-05 09:02:07 +08:00
|
|
|
}
|
2020-12-02 06:45:09 +08:00
|
|
|
}
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
|
2020-12-02 06:45:12 +08:00
|
|
|
emitStabs();
|
2020-12-02 06:45:09 +08:00
|
|
|
uint32_t symtabIndex = stabs.size();
|
|
|
|
for (const SymtabEntry &entry :
|
|
|
|
concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) {
|
|
|
|
entry.sym->symtabIndex = symtabIndex++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t SymtabSection::getNumSymbols() const {
|
|
|
|
return stabs.size() + localSymbols.size() + externalSymbols.size() +
|
|
|
|
undefinedSymbols.size();
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
// This serves to hide (type-erase) the template parameter from SymtabSection.
|
|
|
|
template <class LP> class SymtabSectionImpl : public SymtabSection {
|
|
|
|
public:
|
|
|
|
SymtabSectionImpl(StringTableSection &stringTableSection)
|
|
|
|
: SymtabSection(stringTableSection) {}
|
|
|
|
uint64_t getRawSize() const override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
|
|
|
|
return getNumSymbols() * sizeof(typename LP::nlist);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
|
|
|
|
auto *nList = reinterpret_cast<typename LP::nlist *>(buf);
|
2020-12-02 06:45:09 +08:00
|
|
|
// Emit the stabs entries before the "real" symbols. We cannot emit them
|
|
|
|
// after as that would render Symbol::symtabIndex inaccurate.
|
|
|
|
for (const StabsEntry &entry : stabs) {
|
|
|
|
nList->n_strx = entry.strx;
|
|
|
|
nList->n_type = entry.type;
|
|
|
|
nList->n_sect = entry.sect;
|
|
|
|
nList->n_desc = entry.desc;
|
|
|
|
nList->n_value = entry.value;
|
|
|
|
++nList;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const SymtabEntry &entry : concat<const SymtabEntry>(
|
|
|
|
localSymbols, externalSymbols, undefinedSymbols)) {
|
2020-04-29 07:58:22 +08:00
|
|
|
nList->n_strx = entry.strx;
|
2020-09-01 11:32:39 +08:00
|
|
|
// TODO populate n_desc with more flags
|
2020-05-12 06:50:22 +08:00
|
|
|
if (auto *defined = dyn_cast<Defined>(entry.sym)) {
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
uint8_t scope = 0;
|
2021-03-11 08:45:18 +08:00
|
|
|
if (!shouldExportSymbol(defined)) {
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
// Private external -- dylib scoped symbol.
|
|
|
|
// Promote to non-external at link time.
|
|
|
|
assert(defined->isExternal() && "invalid input file");
|
2021-03-12 02:28:08 +08:00
|
|
|
scope = N_PEXT;
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
} else if (defined->isExternal()) {
|
|
|
|
// Normal global symbol.
|
2021-03-12 02:28:08 +08:00
|
|
|
scope = N_EXT;
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
} else {
|
|
|
|
// TU-local symbol from localSymbols.
|
|
|
|
scope = 0;
|
|
|
|
}
|
|
|
|
|
2020-09-18 23:40:46 +08:00
|
|
|
if (defined->isAbsolute()) {
|
2021-03-12 02:28:08 +08:00
|
|
|
nList->n_type = scope | N_ABS;
|
|
|
|
nList->n_sect = NO_SECT;
|
2020-09-18 23:40:46 +08:00
|
|
|
nList->n_value = defined->value;
|
|
|
|
} else {
|
2021-03-12 02:28:08 +08:00
|
|
|
nList->n_type = scope | N_SECT;
|
2020-09-18 23:40:46 +08:00
|
|
|
nList->n_sect = defined->isec->parent->index;
|
|
|
|
// For the N_SECT symbol type, n_value is the address of the symbol
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
nList->n_value = defined->getVA();
|
2020-09-18 23:40:46 +08:00
|
|
|
}
|
2021-03-12 02:28:08 +08:00
|
|
|
nList->n_desc |= defined->isExternalWeakDef() ? N_WEAK_DEF : 0;
|
2020-12-16 02:36:15 +08:00
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
|
|
|
|
uint16_t n_desc = nList->n_desc;
|
2021-03-02 04:25:10 +08:00
|
|
|
int16_t ordinal = ordinalForDylibSymbol(*dysym);
|
2021-03-12 02:28:08 +08:00
|
|
|
if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
|
|
|
|
SET_LIBRARY_ORDINAL(n_desc, DYNAMIC_LOOKUP_ORDINAL);
|
|
|
|
else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
|
|
|
|
SET_LIBRARY_ORDINAL(n_desc, EXECUTABLE_ORDINAL);
|
2021-03-02 04:25:10 +08:00
|
|
|
else {
|
|
|
|
assert(ordinal > 0);
|
2021-03-12 02:28:08 +08:00
|
|
|
SET_LIBRARY_ORDINAL(n_desc, static_cast<uint8_t>(ordinal));
|
2021-03-02 04:25:10 +08:00
|
|
|
}
|
2021-02-23 02:03:02 +08:00
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
nList->n_type = N_EXT;
|
|
|
|
n_desc |= dysym->isWeakDef() ? N_WEAK_DEF : 0;
|
|
|
|
n_desc |= dysym->isWeakRef() ? N_WEAK_REF : 0;
|
2020-12-16 02:36:15 +08:00
|
|
|
nList->n_desc = n_desc;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
}
|
|
|
|
++nList;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP>
|
|
|
|
SymtabSection *
|
|
|
|
macho::makeSymtabSection(StringTableSection &stringTableSection) {
|
|
|
|
return make<SymtabSectionImpl<LP>>(stringTableSection);
|
|
|
|
}
|
|
|
|
|
2020-09-05 09:02:07 +08:00
|
|
|
IndirectSymtabSection::IndirectSymtabSection()
|
|
|
|
: LinkEditSection(segment_names::linkEdit,
|
|
|
|
section_names::indirectSymbolTable) {}
|
|
|
|
|
|
|
|
uint32_t IndirectSymtabSection::getNumSymbols() const {
|
|
|
|
return in.got->getEntries().size() + in.tlvPointers->getEntries().size() +
|
|
|
|
in.stubs->getEntries().size();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IndirectSymtabSection::isNeeded() const {
|
|
|
|
return in.got->isNeeded() || in.tlvPointers->isNeeded() ||
|
|
|
|
in.stubs->isNeeded();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IndirectSymtabSection::finalizeContents() {
|
|
|
|
uint32_t off = 0;
|
|
|
|
in.got->reserved1 = off;
|
|
|
|
off += in.got->getEntries().size();
|
|
|
|
in.tlvPointers->reserved1 = off;
|
|
|
|
off += in.tlvPointers->getEntries().size();
|
|
|
|
// There is a 1:1 correspondence between stubs and LazyPointerSection
|
|
|
|
// entries, so they can share the same sub-array in the table.
|
|
|
|
in.stubs->reserved1 = in.lazyPointers->reserved1 = off;
|
|
|
|
}
|
|
|
|
|
2021-02-09 02:47:33 +08:00
|
|
|
static uint32_t indirectValue(const Symbol *sym) {
|
|
|
|
return sym->symtabIndex != UINT32_MAX ? sym->symtabIndex
|
2021-03-12 02:28:08 +08:00
|
|
|
: INDIRECT_SYMBOL_LOCAL;
|
2021-02-09 02:47:33 +08:00
|
|
|
}
|
|
|
|
|
2020-09-05 09:02:07 +08:00
|
|
|
void IndirectSymtabSection::writeTo(uint8_t *buf) const {
|
|
|
|
uint32_t off = 0;
|
|
|
|
for (const Symbol *sym : in.got->getEntries()) {
|
2021-02-09 02:47:33 +08:00
|
|
|
write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
|
2020-09-05 09:02:07 +08:00
|
|
|
++off;
|
|
|
|
}
|
|
|
|
for (const Symbol *sym : in.tlvPointers->getEntries()) {
|
2021-02-09 02:47:33 +08:00
|
|
|
write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
|
2020-09-05 09:02:07 +08:00
|
|
|
++off;
|
|
|
|
}
|
|
|
|
for (const Symbol *sym : in.stubs->getEntries()) {
|
2021-02-09 02:47:33 +08:00
|
|
|
write32le(buf + off * sizeof(uint32_t), indirectValue(sym));
|
2020-09-05 09:02:07 +08:00
|
|
|
++off;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
StringTableSection::StringTableSection()
|
2020-07-31 05:29:14 +08:00
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
|
|
|
|
uint32_t StringTableSection::addString(StringRef str) {
|
|
|
|
uint32_t strx = size;
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
strings.push_back(str); // TODO: consider deduplicating strings
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
size += str.size() + 1; // account for null terminator
|
|
|
|
return strx;
|
|
|
|
}
|
|
|
|
|
2020-05-02 07:29:06 +08:00
|
|
|
void StringTableSection::writeTo(uint8_t *buf) const {
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
uint32_t off = 0;
|
|
|
|
for (StringRef str : strings) {
|
|
|
|
memcpy(buf + off, str.data(), str.size());
|
|
|
|
off += str.size() + 1; // account for null terminator
|
|
|
|
}
|
|
|
|
}
|
2021-01-07 10:11:44 +08:00
|
|
|
|
|
|
|
CodeSignatureSection::CodeSignatureSection()
|
|
|
|
: LinkEditSection(segment_names::linkEdit, section_names::codeSignature) {
|
|
|
|
align = 16; // required by libstuff
|
|
|
|
fileName = config->outputFile;
|
|
|
|
size_t slashIndex = fileName.rfind("/");
|
|
|
|
if (slashIndex != std::string::npos)
|
|
|
|
fileName = fileName.drop_front(slashIndex + 1);
|
|
|
|
allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1);
|
|
|
|
fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t CodeSignatureSection::getBlockCount() const {
|
|
|
|
return (fileOff + blockSize - 1) / blockSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t CodeSignatureSection::getRawSize() const {
|
|
|
|
return allHeadersSize + getBlockCount() * hashSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeSignatureSection::writeHashes(uint8_t *buf) const {
|
|
|
|
uint8_t *code = buf;
|
|
|
|
uint8_t *codeEnd = buf + fileOff;
|
|
|
|
uint8_t *hashes = codeEnd + allHeadersSize;
|
|
|
|
while (code < codeEnd) {
|
|
|
|
StringRef block(reinterpret_cast<char *>(code),
|
|
|
|
std::min(codeEnd - code, static_cast<ssize_t>(blockSize)));
|
|
|
|
SHA256 hasher;
|
|
|
|
hasher.update(block);
|
|
|
|
StringRef hash = hasher.final();
|
|
|
|
assert(hash.size() == hashSize);
|
|
|
|
memcpy(hashes, hash.data(), hashSize);
|
|
|
|
code += blockSize;
|
|
|
|
hashes += hashSize;
|
|
|
|
}
|
|
|
|
#if defined(__APPLE__)
|
|
|
|
// This is macOS-specific work-around and makes no sense for any
|
|
|
|
// other host OS. See https://openradar.appspot.com/FB8914231
|
|
|
|
//
|
|
|
|
// The macOS kernel maintains a signature-verification cache to
|
|
|
|
// quickly validate applications at time of execve(2). The trouble
|
|
|
|
// is that for the kernel creates the cache entry at the time of the
|
|
|
|
// mmap(2) call, before we have a chance to write either the code to
|
|
|
|
// sign or the signature header+hashes. The fix is to invalidate
|
|
|
|
// all cached data associated with the output file, thus discarding
|
|
|
|
// the bogus prematurely-cached signature.
|
|
|
|
msync(buf, fileOff + getSize(), MS_INVALIDATE);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeSignatureSection::writeTo(uint8_t *buf) const {
|
|
|
|
uint32_t signatureSize = static_cast<uint32_t>(getSize());
|
|
|
|
auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf);
|
|
|
|
write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE);
|
|
|
|
write32be(&superBlob->length, signatureSize);
|
|
|
|
write32be(&superBlob->count, 1);
|
|
|
|
auto *blobIndex = reinterpret_cast<CS_BlobIndex *>(&superBlob[1]);
|
|
|
|
write32be(&blobIndex->type, CSSLOT_CODEDIRECTORY);
|
|
|
|
write32be(&blobIndex->offset, blobHeadersSize);
|
|
|
|
auto *codeDirectory =
|
|
|
|
reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize);
|
|
|
|
write32be(&codeDirectory->magic, CSMAGIC_CODEDIRECTORY);
|
|
|
|
write32be(&codeDirectory->length, signatureSize - blobHeadersSize);
|
|
|
|
write32be(&codeDirectory->version, CS_SUPPORTSEXECSEG);
|
|
|
|
write32be(&codeDirectory->flags, CS_ADHOC | CS_LINKER_SIGNED);
|
|
|
|
write32be(&codeDirectory->hashOffset,
|
|
|
|
sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad);
|
|
|
|
write32be(&codeDirectory->identOffset, sizeof(CS_CodeDirectory));
|
|
|
|
codeDirectory->nSpecialSlots = 0;
|
|
|
|
write32be(&codeDirectory->nCodeSlots, getBlockCount());
|
|
|
|
write32be(&codeDirectory->codeLimit, fileOff);
|
|
|
|
codeDirectory->hashSize = static_cast<uint8_t>(hashSize);
|
|
|
|
codeDirectory->hashType = kSecCodeSignatureHashSHA256;
|
|
|
|
codeDirectory->platform = 0;
|
|
|
|
codeDirectory->pageSize = blockSizeShift;
|
|
|
|
codeDirectory->spare2 = 0;
|
|
|
|
codeDirectory->scatterOffset = 0;
|
|
|
|
codeDirectory->teamOffset = 0;
|
|
|
|
codeDirectory->spare3 = 0;
|
|
|
|
codeDirectory->codeLimit64 = 0;
|
|
|
|
OutputSegment *textSeg = getOrCreateOutputSegment(segment_names::text);
|
|
|
|
write64be(&codeDirectory->execSegBase, textSeg->fileOff);
|
|
|
|
write64be(&codeDirectory->execSegLimit, textSeg->fileSize);
|
|
|
|
write64be(&codeDirectory->execSegFlags,
|
|
|
|
config->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : 0);
|
|
|
|
auto *id = reinterpret_cast<char *>(&codeDirectory[1]);
|
|
|
|
memcpy(id, fileName.begin(), fileName.size());
|
|
|
|
memset(id + fileName.size(), 0, fileNamePad);
|
|
|
|
}
|
2021-03-19 06:49:45 +08:00
|
|
|
|
2021-04-17 04:46:45 +08:00
|
|
|
BitcodeBundleSection::BitcodeBundleSection()
|
|
|
|
: SyntheticSection(segment_names::llvm, section_names::bitcodeBundle) {}
|
|
|
|
|
|
|
|
class ErrorCodeWrapper {
|
|
|
|
public:
|
|
|
|
ErrorCodeWrapper(std::error_code ec) : errorCode(ec.value()) {}
|
|
|
|
ErrorCodeWrapper(int ec) : errorCode(ec) {}
|
|
|
|
operator int() const { return errorCode; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
int errorCode;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define CHECK_EC(exp) \
|
|
|
|
do { \
|
|
|
|
ErrorCodeWrapper ec(exp); \
|
|
|
|
if (ec) \
|
2021-04-17 04:59:25 +08:00
|
|
|
fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \
|
|
|
|
#exp); \
|
2021-04-17 04:46:45 +08:00
|
|
|
} while (0);
|
|
|
|
|
|
|
|
void BitcodeBundleSection::finalize() {
|
|
|
|
#ifdef HAVE_LIBXAR
|
|
|
|
using namespace llvm::sys::fs;
|
|
|
|
CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath));
|
|
|
|
|
|
|
|
xar_t xar(xar_open(xarPath.data(), O_RDWR));
|
|
|
|
if (!xar)
|
|
|
|
fatal("failed to open XAR temporary file at " + xarPath);
|
|
|
|
CHECK_EC(xar_opt_set(xar, XAR_OPT_COMPRESSION, XAR_OPT_VAL_NONE));
|
|
|
|
// FIXME: add more data to XAR
|
|
|
|
CHECK_EC(xar_close(xar));
|
|
|
|
|
|
|
|
file_size(xarPath, xarSize);
|
|
|
|
#endif // defined(HAVE_LIBXAR)
|
|
|
|
}
|
|
|
|
|
|
|
|
void BitcodeBundleSection::writeTo(uint8_t *buf) const {
|
|
|
|
using namespace llvm::sys::fs;
|
|
|
|
file_t handle =
|
|
|
|
CHECK(openNativeFile(xarPath, CD_OpenExisting, FA_Read, OF_None),
|
|
|
|
"failed to open XAR file");
|
|
|
|
std::error_code ec;
|
|
|
|
mapped_file_region xarMap(handle, mapped_file_region::mapmode::readonly,
|
|
|
|
xarSize, 0, ec);
|
|
|
|
if (ec)
|
|
|
|
fatal("failed to map XAR file");
|
|
|
|
memcpy(buf, xarMap.const_data(), xarSize);
|
|
|
|
|
|
|
|
closeFile(handle);
|
|
|
|
remove(xarPath);
|
|
|
|
}
|
|
|
|
|
2021-03-19 06:49:45 +08:00
|
|
|
void macho::createSyntheticSymbols() {
|
|
|
|
auto addHeaderSymbol = [](const char *name) {
|
|
|
|
symtab->addSynthetic(name, in.header->isec, 0,
|
|
|
|
/*privateExtern=*/true,
|
2021-04-22 22:44:56 +08:00
|
|
|
/*includeInSymtab=*/false);
|
2021-03-19 06:49:45 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
switch (config->outputType) {
|
2021-03-20 02:17:51 +08:00
|
|
|
// FIXME: Assign the right address value for these symbols
|
2021-03-19 06:49:45 +08:00
|
|
|
// (rather than 0). But we need to do that after assignAddresses().
|
|
|
|
case MH_EXECUTE:
|
|
|
|
// If linking PIE, __mh_execute_header is a defined symbol in
|
|
|
|
// __TEXT, __text)
|
|
|
|
// Otherwise, it's an absolute symbol.
|
|
|
|
if (config->isPic)
|
|
|
|
symtab->addSynthetic("__mh_execute_header", in.header->isec, 0,
|
2021-04-22 22:44:56 +08:00
|
|
|
/*privateExtern=*/false,
|
|
|
|
/*includeInSymbtab=*/true);
|
2021-03-19 06:49:45 +08:00
|
|
|
else
|
|
|
|
symtab->addSynthetic("__mh_execute_header",
|
|
|
|
/*isec*/ nullptr, 0,
|
2021-04-22 22:44:56 +08:00
|
|
|
/*privateExtern=*/false,
|
|
|
|
/*includeInSymbtab=*/true);
|
2021-03-19 06:49:45 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
// The following symbols are N_SECT symbols, even though the header is not
|
|
|
|
// part of any section and that they are private to the bundle/dylib/object
|
|
|
|
// they are part of.
|
|
|
|
case MH_BUNDLE:
|
|
|
|
addHeaderSymbol("__mh_bundle_header");
|
|
|
|
break;
|
|
|
|
case MH_DYLIB:
|
|
|
|
addHeaderSymbol("__mh_dylib_header");
|
|
|
|
break;
|
|
|
|
case MH_DYLINKER:
|
|
|
|
addHeaderSymbol("__mh_dylinker_header");
|
|
|
|
break;
|
|
|
|
case MH_OBJECT:
|
|
|
|
addHeaderSymbol("__mh_object_header");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected outputType");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
|
|
|
|
// which does e.g. cleanup of static global variables. The ABI document
|
|
|
|
// says that the pointer can point to any address in one of the dylib's
|
|
|
|
// segments, but in practice ld64 seems to set it to point to the header,
|
|
|
|
// so that's what's implemented here.
|
|
|
|
addHeaderSymbol("___dso_handle");
|
|
|
|
}
|
2021-04-03 06:46:18 +08:00
|
|
|
|
|
|
|
template MachHeaderSection *macho::makeMachHeaderSection<LP64>();
|
|
|
|
template MachHeaderSection *macho::makeMachHeaderSection<ILP32>();
|
|
|
|
template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
|
|
|
|
template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);
|