2020-04-03 02:54:05 +08:00
|
|
|
//===- Writer.cpp ---------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "Writer.h"
|
2021-05-26 02:57:16 +08:00
|
|
|
#include "ConcatOutputSection.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "Config.h"
|
|
|
|
#include "InputFiles.h"
|
|
|
|
#include "InputSection.h"
|
2021-03-18 22:38:30 +08:00
|
|
|
#include "MapFile.h"
|
2020-05-06 07:37:34 +08:00
|
|
|
#include "OutputSection.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "OutputSegment.h"
|
2022-01-25 05:51:51 +08:00
|
|
|
#include "SectionPriorities.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "SymbolTable.h"
|
|
|
|
#include "Symbols.h"
|
2020-04-22 04:37:57 +08:00
|
|
|
#include "SyntheticSections.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "Target.h"
|
2020-08-21 04:05:13 +08:00
|
|
|
#include "UnwindInfoSection.h"
|
2022-06-09 08:09:48 +08:00
|
|
|
#include "llvm/Support/Parallel.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-04-01 03:48:18 +08:00
|
|
|
#include "lld/Common/Arrays.h"
|
2022-01-21 03:53:18 +08:00
|
|
|
#include "lld/Common/CommonLinkerContext.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
2020-08-15 03:35:31 +08:00
|
|
|
#include "llvm/Config/llvm-config.h"
|
2020-04-29 07:58:22 +08:00
|
|
|
#include "llvm/Support/LEB128.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2021-04-01 03:48:18 +08:00
|
|
|
#include "llvm/Support/Parallel.h"
|
2020-05-06 07:37:34 +08:00
|
|
|
#include "llvm/Support/Path.h"
|
2021-12-09 10:25:20 +08:00
|
|
|
#include "llvm/Support/ThreadPool.h"
|
2021-03-26 02:39:44 +08:00
|
|
|
#include "llvm/Support/TimeProfiler.h"
|
2020-12-10 10:04:22 +08:00
|
|
|
#include "llvm/Support/xxhash.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-09-22 02:04:13 +08:00
|
|
|
#include <algorithm>
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::MachO;
|
2020-12-19 06:58:07 +08:00
|
|
|
using namespace llvm::sys;
|
2020-04-03 02:54:05 +08:00
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
|
|
|
namespace {
|
2020-10-15 02:03:34 +08:00
|
|
|
class LCUuid;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
class Writer {
|
|
|
|
public:
|
|
|
|
Writer() : buffer(errorHandler().outputBuffer) {}
|
|
|
|
|
2021-07-22 22:31:39 +08:00
|
|
|
void treatSpecialUndefineds();
|
2020-04-22 04:37:57 +08:00
|
|
|
void scanRelocations();
|
2020-12-17 08:14:57 +08:00
|
|
|
void scanSymbols();
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void createOutputSections();
|
|
|
|
template <class LP> void createLoadCommands();
|
2021-04-08 07:55:45 +08:00
|
|
|
void finalizeAddresses();
|
2021-03-15 06:35:27 +08:00
|
|
|
void finalizeLinkEditSegment();
|
2020-04-28 03:50:59 +08:00
|
|
|
void assignAddresses(OutputSegment *);
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
void openFile();
|
|
|
|
void writeSections();
|
2020-10-15 02:03:34 +08:00
|
|
|
void writeUuid();
|
2021-01-07 10:11:44 +08:00
|
|
|
void writeCodeSignature();
|
2021-03-26 02:39:44 +08:00
|
|
|
void writeOutputFile();
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void run();
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-12-09 10:25:20 +08:00
|
|
|
ThreadPool threadPool;
|
2020-04-03 02:54:05 +08:00
|
|
|
std::unique_ptr<FileOutputBuffer> &buffer;
|
2020-04-22 04:37:57 +08:00
|
|
|
uint64_t addr = 0;
|
2020-04-28 03:50:59 +08:00
|
|
|
uint64_t fileOff = 0;
|
2020-07-31 05:28:41 +08:00
|
|
|
MachHeaderSection *header = nullptr;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
StringTableSection *stringTableSection = nullptr;
|
2020-04-29 07:58:22 +08:00
|
|
|
SymtabSection *symtabSection = nullptr;
|
2020-09-05 09:02:07 +08:00
|
|
|
IndirectSymtabSection *indirectSymtabSection = nullptr;
|
2021-01-07 10:11:44 +08:00
|
|
|
CodeSignatureSection *codeSignatureSection = nullptr;
|
2021-06-15 10:21:43 +08:00
|
|
|
DataInCodeSection *dataInCodeSection = nullptr;
|
2021-03-23 05:38:52 +08:00
|
|
|
FunctionStartsSection *functionStartsSection = nullptr;
|
|
|
|
|
2020-10-15 02:03:34 +08:00
|
|
|
LCUuid *uuidCommand = nullptr;
|
2021-03-15 06:35:27 +08:00
|
|
|
OutputSegment *linkEditSegment = nullptr;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCDyldInfo final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
2020-09-06 01:55:33 +08:00
|
|
|
LCDyldInfo(RebaseSection *rebaseSection, BindingSection *bindingSection,
|
2020-08-25 12:57:59 +08:00
|
|
|
WeakBindingSection *weakBindingSection,
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
LazyBindingSection *lazyBindingSection,
|
|
|
|
ExportSection *exportSection)
|
2020-09-06 01:55:33 +08:00
|
|
|
: rebaseSection(rebaseSection), bindingSection(bindingSection),
|
|
|
|
weakBindingSection(weakBindingSection),
|
2020-08-25 12:57:59 +08:00
|
|
|
lazyBindingSection(lazyBindingSection), exportSection(exportSection) {}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
uint32_t getSize() const override { return sizeof(dyld_info_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<dyld_info_command *>(buf);
|
|
|
|
c->cmd = LC_DYLD_INFO_ONLY;
|
|
|
|
c->cmdsize = getSize();
|
2020-09-06 01:55:33 +08:00
|
|
|
if (rebaseSection->isNeeded()) {
|
|
|
|
c->rebase_off = rebaseSection->fileOff;
|
|
|
|
c->rebase_size = rebaseSection->getFileSize();
|
|
|
|
}
|
2020-04-28 03:50:59 +08:00
|
|
|
if (bindingSection->isNeeded()) {
|
2020-05-02 07:29:06 +08:00
|
|
|
c->bind_off = bindingSection->fileOff;
|
2020-04-28 03:50:59 +08:00
|
|
|
c->bind_size = bindingSection->getFileSize();
|
|
|
|
}
|
2020-08-25 12:57:59 +08:00
|
|
|
if (weakBindingSection->isNeeded()) {
|
|
|
|
c->weak_bind_off = weakBindingSection->fileOff;
|
|
|
|
c->weak_bind_size = weakBindingSection->getFileSize();
|
|
|
|
}
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
if (lazyBindingSection->isNeeded()) {
|
|
|
|
c->lazy_bind_off = lazyBindingSection->fileOff;
|
|
|
|
c->lazy_bind_size = lazyBindingSection->getFileSize();
|
|
|
|
}
|
2020-04-29 07:58:22 +08:00
|
|
|
if (exportSection->isNeeded()) {
|
2020-05-02 07:29:06 +08:00
|
|
|
c->export_off = exportSection->fileOff;
|
2020-04-29 07:58:22 +08:00
|
|
|
c->export_size = exportSection->getFileSize();
|
|
|
|
}
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2020-09-06 01:55:33 +08:00
|
|
|
RebaseSection *rebaseSection;
|
2020-04-28 03:50:59 +08:00
|
|
|
BindingSection *bindingSection;
|
2020-08-25 12:57:59 +08:00
|
|
|
WeakBindingSection *weakBindingSection;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
LazyBindingSection *lazyBindingSection;
|
2020-04-29 07:58:22 +08:00
|
|
|
ExportSection *exportSection;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2021-07-06 02:40:52 +08:00
|
|
|
class LCSubFramework final : public LoadCommand {
|
|
|
|
public:
|
|
|
|
LCSubFramework(StringRef umbrella) : umbrella(umbrella) {}
|
|
|
|
|
|
|
|
uint32_t getSize() const override {
|
|
|
|
return alignTo(sizeof(sub_framework_command) + umbrella.size() + 1,
|
|
|
|
target->wordSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<sub_framework_command *>(buf);
|
|
|
|
buf += sizeof(sub_framework_command);
|
|
|
|
|
|
|
|
c->cmd = LC_SUB_FRAMEWORK;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->umbrella = sizeof(sub_framework_command);
|
|
|
|
|
|
|
|
memcpy(buf, umbrella.data(), umbrella.size());
|
|
|
|
buf[umbrella.size()] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const StringRef umbrella;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCFunctionStarts final : public LoadCommand {
|
2021-03-09 14:00:37 +08:00
|
|
|
public:
|
2021-03-23 05:38:52 +08:00
|
|
|
explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection)
|
|
|
|
: functionStartsSection(functionStartsSection) {}
|
2021-03-09 14:00:37 +08:00
|
|
|
|
|
|
|
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
|
|
|
|
c->cmd = LC_FUNCTION_STARTS;
|
|
|
|
c->cmdsize = getSize();
|
2021-03-23 05:38:52 +08:00
|
|
|
c->dataoff = functionStartsSection->fileOff;
|
|
|
|
c->datasize = functionStartsSection->getFileSize();
|
2021-03-09 14:00:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2021-03-23 05:38:52 +08:00
|
|
|
FunctionStartsSection *functionStartsSection;
|
2021-03-09 14:00:37 +08:00
|
|
|
};
|
|
|
|
|
2021-06-15 10:21:43 +08:00
|
|
|
class LCDataInCode final : public LoadCommand {
|
|
|
|
public:
|
|
|
|
explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
|
|
|
|
: dataInCodeSection(dataInCodeSection) {}
|
|
|
|
|
|
|
|
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
|
|
|
|
c->cmd = LC_DATA_IN_CODE;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->dataoff = dataInCodeSection->fileOff;
|
|
|
|
c->datasize = dataInCodeSection->getFileSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
DataInCodeSection *dataInCodeSection;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCDysymtab final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
2020-12-02 06:45:09 +08:00
|
|
|
LCDysymtab(SymtabSection *symtabSection,
|
|
|
|
IndirectSymtabSection *indirectSymtabSection)
|
|
|
|
: symtabSection(symtabSection),
|
|
|
|
indirectSymtabSection(indirectSymtabSection) {}
|
2020-09-05 09:02:07 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
uint32_t getSize() const override { return sizeof(dysymtab_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<dysymtab_command *>(buf);
|
|
|
|
c->cmd = LC_DYSYMTAB;
|
|
|
|
c->cmdsize = getSize();
|
2020-12-02 06:45:09 +08:00
|
|
|
|
|
|
|
c->ilocalsym = 0;
|
|
|
|
c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
|
|
|
|
c->nextdefsym = symtabSection->getNumExternalSymbols();
|
|
|
|
c->iundefsym = c->iextdefsym + c->nextdefsym;
|
|
|
|
c->nundefsym = symtabSection->getNumUndefinedSymbols();
|
|
|
|
|
2020-09-05 09:02:07 +08:00
|
|
|
c->indirectsymoff = indirectSymtabSection->fileOff;
|
|
|
|
c->nindirectsyms = indirectSymtabSection->getNumSymbols();
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
2020-09-05 09:02:07 +08:00
|
|
|
|
2020-12-02 06:45:09 +08:00
|
|
|
SymtabSection *symtabSection;
|
|
|
|
IndirectSymtabSection *indirectSymtabSection;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
template <class LP> class LCSegment final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
|
|
|
LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}
|
|
|
|
|
2021-04-03 08:04:11 +08:00
|
|
|
uint32_t getSize() const override {
|
2021-04-03 06:46:18 +08:00
|
|
|
return sizeof(typename LP::segment_command) +
|
|
|
|
seg->numNonHiddenSections() * sizeof(typename LP::section);
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2021-04-03 08:04:11 +08:00
|
|
|
void writeTo(uint8_t *buf) const override {
|
2021-04-03 06:46:18 +08:00
|
|
|
using SegmentCommand = typename LP::segment_command;
|
2021-11-05 11:55:31 +08:00
|
|
|
using SectionHeader = typename LP::section;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
auto *c = reinterpret_cast<SegmentCommand *>(buf);
|
|
|
|
buf += sizeof(SegmentCommand);
|
|
|
|
|
|
|
|
c->cmd = LP::segmentLCType;
|
2020-04-03 02:54:05 +08:00
|
|
|
c->cmdsize = getSize();
|
|
|
|
memcpy(c->segname, name.data(), name.size());
|
2020-04-28 03:50:59 +08:00
|
|
|
c->fileoff = seg->fileOff;
|
|
|
|
c->maxprot = seg->maxProt;
|
|
|
|
c->initprot = seg->initProt;
|
|
|
|
|
2021-07-23 22:19:06 +08:00
|
|
|
c->vmaddr = seg->addr;
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
c->vmsize = seg->vmSize;
|
|
|
|
c->filesize = seg->fileSize;
|
2020-05-02 07:29:06 +08:00
|
|
|
c->nsects = seg->numNonHiddenSections();
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-03-10 13:41:34 +08:00
|
|
|
for (const OutputSection *osec : seg->getSections()) {
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
if (osec->isHidden())
|
2020-04-28 03:50:59 +08:00
|
|
|
continue;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-11-05 11:55:31 +08:00
|
|
|
auto *sectHdr = reinterpret_cast<SectionHeader *>(buf);
|
|
|
|
buf += sizeof(SectionHeader);
|
2020-04-03 02:54:05 +08:00
|
|
|
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
|
2020-04-03 02:54:05 +08:00
|
|
|
memcpy(sectHdr->segname, name.data(), name.size());
|
|
|
|
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
sectHdr->addr = osec->addr;
|
|
|
|
sectHdr->offset = osec->fileOff;
|
|
|
|
sectHdr->align = Log2_32(osec->align);
|
|
|
|
sectHdr->flags = osec->flags;
|
|
|
|
sectHdr->size = osec->getSize();
|
2020-09-05 09:02:07 +08:00
|
|
|
sectHdr->reserved1 = osec->reserved1;
|
|
|
|
sectHdr->reserved2 = osec->reserved2;
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
StringRef name;
|
|
|
|
OutputSegment *seg;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCMain final : public LoadCommand {
|
2021-04-16 09:14:32 +08:00
|
|
|
uint32_t getSize() const override {
|
|
|
|
return sizeof(structs::entry_point_command);
|
|
|
|
}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
2021-04-16 09:14:32 +08:00
|
|
|
auto *c = reinterpret_cast<structs::entry_point_command *>(buf);
|
2020-04-03 02:54:05 +08:00
|
|
|
c->cmd = LC_MAIN;
|
|
|
|
c->cmdsize = getSize();
|
2020-09-18 01:20:16 +08:00
|
|
|
|
|
|
|
if (config->entry->isInStubs())
|
|
|
|
c->entryoff =
|
|
|
|
in.stubs->fileOff + config->entry->stubsIndex * target->stubSize;
|
|
|
|
else
|
2021-06-14 07:43:34 +08:00
|
|
|
c->entryoff = config->entry->getVA() - in.header->addr;
|
2020-09-18 01:20:16 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
c->stacksize = 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCSymtab final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection)
|
|
|
|
: symtabSection(symtabSection), stringTableSection(stringTableSection) {}
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
uint32_t getSize() const override { return sizeof(symtab_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<symtab_command *>(buf);
|
|
|
|
c->cmd = LC_SYMTAB;
|
|
|
|
c->cmdsize = getSize();
|
2020-05-02 07:29:06 +08:00
|
|
|
c->symoff = symtabSection->fileOff;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
c->nsyms = symtabSection->getNumSymbols();
|
2020-05-02 07:29:06 +08:00
|
|
|
c->stroff = stringTableSection->fileOff;
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
c->strsize = stringTableSection->getFileSize();
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
[lld-macho][reland] Add basic symbol table output
This diff implements basic support for writing a symbol table.
Attributes are loosely supported for extern symbols and not at all for
other types.
Initial version by Kellie Medlin <kelliem@fb.com>
Originally committed in a3d95a50ee33 and reverted in fbae153ca583 due to
UBSAN erroring over unaligned writes. That has been fixed in the
current diff with the following changes:
```
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -133,6 +133,9 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection)
: stringTableSection(stringTableSection) {
segname = segment_names::linkEdit;
name = section_names::symbolTable;
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
}
size_t SymtabSection::getSize() const {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -371,6 +371,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
ArrayRef<InputSection *> sections = p.second;
for (InputSection *isec : sections) {
addr = alignTo(addr, isec->align);
+ // We must align the file offsets too to avoid misaligned writes of
+ // structs.
+ fileOff = alignTo(fileOff, isec->align);
isec->addr = addr;
addr += isec->getSize();
fileOff += isec->getFileSize();
@@ -396,6 +397,7 @@ void Writer::writeSections() {
uint64_t fileOff = seg->fileOff;
for (auto § : seg->getSections()) {
for (InputSection *isec : sect.second) {
+ fileOff = alignTo(fileOff, isec->align);
isec->writeTo(buf + fileOff);
fileOff += isec->getFileSize();
}
```
I don't think it's easy to write a test for alignment (that doesn't
involve brittly hard-coding file offsets), so there isn't one... but
UBSAN builds pass now.
Differential Revision: https://reviews.llvm.org/D79050
2020-04-29 07:58:19 +08:00
|
|
|
|
|
|
|
SymtabSection *symtabSection = nullptr;
|
|
|
|
StringTableSection *stringTableSection = nullptr;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2020-04-24 11:16:49 +08:00
|
|
|
// There are several dylib load commands that share the same structure:
|
|
|
|
// * LC_LOAD_DYLIB
|
|
|
|
// * LC_ID_DYLIB
|
|
|
|
// * LC_REEXPORT_DYLIB
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCDylib final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
2020-12-15 07:24:50 +08:00
|
|
|
LCDylib(LoadCommandType type, StringRef path,
|
|
|
|
uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0)
|
|
|
|
: type(type), path(path), compatibilityVersion(compatibilityVersion),
|
|
|
|
currentVersion(currentVersion) {
|
2020-09-22 02:04:13 +08:00
|
|
|
instanceCount++;
|
|
|
|
}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
uint32_t getSize() const override {
|
|
|
|
return alignTo(sizeof(dylib_command) + path.size() + 1, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<dylib_command *>(buf);
|
|
|
|
buf += sizeof(dylib_command);
|
|
|
|
|
2020-04-24 11:16:49 +08:00
|
|
|
c->cmd = type;
|
2020-04-03 02:54:05 +08:00
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->dylib.name = sizeof(dylib_command);
|
2020-12-15 07:24:50 +08:00
|
|
|
c->dylib.timestamp = 0;
|
|
|
|
c->dylib.compatibility_version = compatibilityVersion;
|
|
|
|
c->dylib.current_version = currentVersion;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
memcpy(buf, path.data(), path.size());
|
|
|
|
buf[path.size()] = '\0';
|
|
|
|
}
|
|
|
|
|
2020-09-22 02:04:13 +08:00
|
|
|
static uint32_t getInstanceCount() { return instanceCount; }
|
2021-10-31 07:35:30 +08:00
|
|
|
static void resetInstanceCount() { instanceCount = 0; }
|
2020-09-22 02:04:13 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
private:
|
2020-04-24 11:16:49 +08:00
|
|
|
LoadCommandType type;
|
2020-04-03 02:54:05 +08:00
|
|
|
StringRef path;
|
2020-12-15 07:24:50 +08:00
|
|
|
uint32_t compatibilityVersion;
|
|
|
|
uint32_t currentVersion;
|
2020-09-22 02:04:13 +08:00
|
|
|
static uint32_t instanceCount;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2020-09-22 02:04:13 +08:00
|
|
|
uint32_t LCDylib::instanceCount = 0;
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCLoadDylinker final : public LoadCommand {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
|
|
|
uint32_t getSize() const override {
|
|
|
|
return alignTo(sizeof(dylinker_command) + path.size() + 1, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<dylinker_command *>(buf);
|
|
|
|
buf += sizeof(dylinker_command);
|
|
|
|
|
|
|
|
c->cmd = LC_LOAD_DYLINKER;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->name = sizeof(dylinker_command);
|
|
|
|
|
|
|
|
memcpy(buf, path.data(), path.size());
|
|
|
|
buf[path.size()] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
// Recent versions of Darwin won't run any binary that has dyld at a
|
|
|
|
// different location.
|
|
|
|
const StringRef path = "/usr/lib/dyld";
|
|
|
|
};
|
2020-08-13 10:50:28 +08:00
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCRPath final : public LoadCommand {
|
2020-08-13 10:50:28 +08:00
|
|
|
public:
|
2021-04-22 10:09:48 +08:00
|
|
|
explicit LCRPath(StringRef path) : path(path) {}
|
2020-08-13 10:50:28 +08:00
|
|
|
|
|
|
|
uint32_t getSize() const override {
|
2021-04-03 06:46:18 +08:00
|
|
|
return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize);
|
2020-08-13 10:50:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<rpath_command *>(buf);
|
|
|
|
buf += sizeof(rpath_command);
|
|
|
|
|
|
|
|
c->cmd = LC_RPATH;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->path = sizeof(rpath_command);
|
|
|
|
|
|
|
|
memcpy(buf, path.data(), path.size());
|
|
|
|
buf[path.size()] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
StringRef path;
|
|
|
|
};
|
2020-08-15 03:35:31 +08:00
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCMinVersion final : public LoadCommand {
|
2021-04-21 20:41:14 +08:00
|
|
|
public:
|
|
|
|
explicit LCMinVersion(const PlatformInfo &platformInfo)
|
|
|
|
: platformInfo(platformInfo) {}
|
|
|
|
|
|
|
|
uint32_t getSize() const override { return sizeof(version_min_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<version_min_command *>(buf);
|
|
|
|
switch (platformInfo.target.Platform) {
|
2022-01-13 06:01:59 +08:00
|
|
|
case PLATFORM_MACOS:
|
2021-04-21 20:41:14 +08:00
|
|
|
c->cmd = LC_VERSION_MIN_MACOSX;
|
|
|
|
break;
|
2022-01-13 06:01:59 +08:00
|
|
|
case PLATFORM_IOS:
|
|
|
|
case PLATFORM_IOSSIMULATOR:
|
2021-04-21 20:41:14 +08:00
|
|
|
c->cmd = LC_VERSION_MIN_IPHONEOS;
|
|
|
|
break;
|
2022-01-13 06:01:59 +08:00
|
|
|
case PLATFORM_TVOS:
|
|
|
|
case PLATFORM_TVOSSIMULATOR:
|
2021-04-21 20:41:14 +08:00
|
|
|
c->cmd = LC_VERSION_MIN_TVOS;
|
|
|
|
break;
|
2022-01-13 06:01:59 +08:00
|
|
|
case PLATFORM_WATCHOS:
|
|
|
|
case PLATFORM_WATCHOSSIMULATOR:
|
2021-04-21 20:41:14 +08:00
|
|
|
c->cmd = LC_VERSION_MIN_WATCHOS;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("invalid platform");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->version = encodeVersion(platformInfo.minimum);
|
|
|
|
c->sdk = encodeVersion(platformInfo.sdk);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const PlatformInfo &platformInfo;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCBuildVersion final : public LoadCommand {
|
2020-08-15 03:35:31 +08:00
|
|
|
public:
|
2021-04-21 20:41:14 +08:00
|
|
|
explicit LCBuildVersion(const PlatformInfo &platformInfo)
|
|
|
|
: platformInfo(platformInfo) {}
|
2020-08-15 03:35:31 +08:00
|
|
|
|
|
|
|
const int ntools = 1;
|
|
|
|
|
|
|
|
uint32_t getSize() const override {
|
|
|
|
return sizeof(build_version_command) + ntools * sizeof(build_tool_version);
|
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<build_version_command *>(buf);
|
|
|
|
c->cmd = LC_BUILD_VERSION;
|
|
|
|
c->cmdsize = getSize();
|
[lld/mac] Support writing zippered dylibs and bundles
With -platform_version flags for two distinct platforms,
this writes a LC_BUILD_VERSION header for each.
The motivation is that this is needed for self-hosting with lld as linker
after D124059.
To create a zippered output at the clang driver level, pass
-target arm64-apple-macos -darwin-target-variant arm64-apple-ios-macabi
to create a zippered dylib.
(In Xcode's clang, `-darwin-target-variant` is spelled just `-target-variant`.)
(If you pass `-target arm64-apple-ios-macabi -target-variant arm64-apple-macos`
instead, ld64 crashes!)
This results in two -platform_version flags being passed to the linker.
ld64 also verifies that the iOS SDK version is at least 13.1. We don't do that
yet. But ld64 also does that for other platforms and we don't. So we need to
do that at some point, but not in this patch.
Only dylib and bundle outputs can be zippered.
I verified that a Catalyst app linked against a dylib created with
clang -shared foo.cc -o libfoo.dylib \
-target arm64-apple-macos \
-target-variant arm64-apple-ios-macabi \
-Wl,-install_name,@rpath/libfoo.dylib \
-fuse-ld=$PWD/out/gn/bin/ld64.lld
runs successfully. (The app calls a function `f()` in libfoo.dylib
that returns a const char* "foo", and NSLog(@"%s")s it.)
ld64 is a bit more permissive when writing zippered outputs,
see references to "unzippered twins". That's not implemented yet.
(If anybody wants to implement that, D124275 is a good start.)
Differential Revision: https://reviews.llvm.org/D124887
2022-04-22 23:55:50 +08:00
|
|
|
|
2021-04-21 20:41:14 +08:00
|
|
|
c->platform = static_cast<uint32_t>(platformInfo.target.Platform);
|
|
|
|
c->minos = encodeVersion(platformInfo.minimum);
|
|
|
|
c->sdk = encodeVersion(platformInfo.sdk);
|
[lld/mac] Support writing zippered dylibs and bundles
With -platform_version flags for two distinct platforms,
this writes a LC_BUILD_VERSION header for each.
The motivation is that this is needed for self-hosting with lld as linker
after D124059.
To create a zippered output at the clang driver level, pass
-target arm64-apple-macos -darwin-target-variant arm64-apple-ios-macabi
to create a zippered dylib.
(In Xcode's clang, `-darwin-target-variant` is spelled just `-target-variant`.)
(If you pass `-target arm64-apple-ios-macabi -target-variant arm64-apple-macos`
instead, ld64 crashes!)
This results in two -platform_version flags being passed to the linker.
ld64 also verifies that the iOS SDK version is at least 13.1. We don't do that
yet. But ld64 also does that for other platforms and we don't. So we need to
do that at some point, but not in this patch.
Only dylib and bundle outputs can be zippered.
I verified that a Catalyst app linked against a dylib created with
clang -shared foo.cc -o libfoo.dylib \
-target arm64-apple-macos \
-target-variant arm64-apple-ios-macabi \
-Wl,-install_name,@rpath/libfoo.dylib \
-fuse-ld=$PWD/out/gn/bin/ld64.lld
runs successfully. (The app calls a function `f()` in libfoo.dylib
that returns a const char* "foo", and NSLog(@"%s")s it.)
ld64 is a bit more permissive when writing zippered outputs,
see references to "unzippered twins". That's not implemented yet.
(If anybody wants to implement that, D124275 is a good start.)
Differential Revision: https://reviews.llvm.org/D124887
2022-04-22 23:55:50 +08:00
|
|
|
|
2020-08-15 03:35:31 +08:00
|
|
|
c->ntools = ntools;
|
|
|
|
auto *t = reinterpret_cast<build_tool_version *>(&c[1]);
|
|
|
|
t->tool = TOOL_LD;
|
2021-07-12 06:35:45 +08:00
|
|
|
t->version = encodeVersion(VersionTuple(
|
2021-04-21 20:41:14 +08:00
|
|
|
LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH));
|
2020-08-15 03:35:31 +08:00
|
|
|
}
|
|
|
|
|
2021-04-21 20:41:14 +08:00
|
|
|
private:
|
2021-03-05 03:36:47 +08:00
|
|
|
const PlatformInfo &platformInfo;
|
2020-08-15 03:35:31 +08:00
|
|
|
};
|
|
|
|
|
2020-10-15 02:03:34 +08:00
|
|
|
// Stores a unique identifier for the output file based on an MD5 hash of its
|
|
|
|
// contents. In order to hash the contents, we must first write them, but
|
|
|
|
// LC_UUID itself must be part of the written contents in order for all the
|
|
|
|
// offsets to be calculated correctly. We resolve this circular paradox by
|
|
|
|
// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
|
|
|
|
// its real value later.
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCUuid final : public LoadCommand {
|
2020-10-15 02:03:34 +08:00
|
|
|
public:
|
|
|
|
uint32_t getSize() const override { return sizeof(uuid_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<uuid_command *>(buf);
|
|
|
|
c->cmd = LC_UUID;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
uuidBuf = c->uuid;
|
|
|
|
}
|
|
|
|
|
2020-12-10 10:04:22 +08:00
|
|
|
void writeUuid(uint64_t digest) const {
|
|
|
|
// xxhash only gives us 8 bytes, so put some fixed data in the other half.
|
|
|
|
static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size");
|
|
|
|
memcpy(uuidBuf, "LLD\xa1UU1D", 8);
|
|
|
|
memcpy(uuidBuf + 8, &digest, 8);
|
|
|
|
|
|
|
|
// RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
|
|
|
|
// byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
|
|
|
|
// want to lose bits of the digest in byte 8, so swap that with a byte of
|
|
|
|
// fixed data that happens to have the right bits set.
|
|
|
|
std::swap(uuidBuf[3], uuidBuf[8]);
|
|
|
|
|
|
|
|
// Claim that this is an MD5-based hash. It isn't, but this signals that
|
|
|
|
// this is not a time-based and not a random hash. MD5 seems like the least
|
|
|
|
// bad lie we can put here.
|
|
|
|
assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
|
|
|
|
assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
|
2020-10-15 02:03:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
mutable uint8_t *uuidBuf;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
template <class LP> class LCEncryptionInfo final : public LoadCommand {
|
2021-04-22 01:35:12 +08:00
|
|
|
public:
|
|
|
|
uint32_t getSize() const override {
|
|
|
|
return sizeof(typename LP::encryption_info_command);
|
|
|
|
}
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
using EncryptionInfo = typename LP::encryption_info_command;
|
|
|
|
auto *c = reinterpret_cast<EncryptionInfo *>(buf);
|
|
|
|
buf += sizeof(EncryptionInfo);
|
|
|
|
c->cmd = LP::encryptionInfoLCType;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->cryptoff = in.header->getSize();
|
|
|
|
auto it = find_if(outputSegments, [](const OutputSegment *seg) {
|
|
|
|
return seg->name == segment_names::text;
|
|
|
|
});
|
|
|
|
assert(it != outputSegments.end());
|
|
|
|
c->cryptsize = (*it)->fileSize - c->cryptoff;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class LCCodeSignature final : public LoadCommand {
|
2021-01-07 10:11:44 +08:00
|
|
|
public:
|
|
|
|
LCCodeSignature(CodeSignatureSection *section) : section(section) {}
|
|
|
|
|
|
|
|
uint32_t getSize() const override { return sizeof(linkedit_data_command); }
|
|
|
|
|
|
|
|
void writeTo(uint8_t *buf) const override {
|
|
|
|
auto *c = reinterpret_cast<linkedit_data_command *>(buf);
|
|
|
|
c->cmd = LC_CODE_SIGNATURE;
|
|
|
|
c->cmdsize = getSize();
|
|
|
|
c->dataoff = static_cast<uint32_t>(section->fileOff);
|
|
|
|
c->datasize = section->getSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
CodeSignatureSection *section;
|
|
|
|
};
|
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
} // namespace
|
|
|
|
|
2021-07-22 22:31:39 +08:00
|
|
|
void Writer::treatSpecialUndefineds() {
|
|
|
|
if (config->entry)
|
|
|
|
if (auto *undefined = dyn_cast<Undefined>(config->entry))
|
|
|
|
treatUndefinedSymbol(*undefined, "the entry point");
|
|
|
|
|
|
|
|
// FIXME: This prints symbols that are undefined both in input files and
|
|
|
|
// via -u flag twice.
|
|
|
|
for (const Symbol *sym : config->explicitUndefineds) {
|
|
|
|
if (const auto *undefined = dyn_cast<Undefined>(sym))
|
|
|
|
treatUndefinedSymbol(*undefined, "-u");
|
|
|
|
}
|
|
|
|
// Literal exported-symbol names must be defined, but glob
|
|
|
|
// patterns need not match.
|
|
|
|
for (const CachedHashStringRef &cachedName :
|
|
|
|
config->exportedSymbols.literals) {
|
|
|
|
if (const Symbol *sym = symtab->find(cachedName))
|
|
|
|
if (const auto *undefined = dyn_cast<Undefined>(sym))
|
|
|
|
treatUndefinedSymbol(*undefined, "-exported_symbol(s_list)");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-30 08:33:48 +08:00
|
|
|
// Add stubs and bindings where necessary (e.g. if the symbol is a
|
2021-03-15 06:35:27 +08:00
|
|
|
// DylibSymbol.)
|
|
|
|
static void prepareBranchTarget(Symbol *sym) {
|
|
|
|
if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
|
|
|
if (in.stubs->addEntry(dysym)) {
|
|
|
|
if (sym->isWeakDef()) {
|
|
|
|
in.binding->addEntry(dysym, in.lazyPointers->isec,
|
2021-04-03 06:46:18 +08:00
|
|
|
sym->stubsIndex * target->wordSize);
|
2021-03-15 06:35:27 +08:00
|
|
|
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
|
2021-04-03 06:46:18 +08:00
|
|
|
sym->stubsIndex * target->wordSize);
|
2021-03-15 06:35:27 +08:00
|
|
|
} else {
|
|
|
|
in.lazyBinding->addEntry(dysym);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (auto *defined = dyn_cast<Defined>(sym)) {
|
|
|
|
if (defined->isExternalWeakDef()) {
|
|
|
|
if (in.stubs->addEntry(sym)) {
|
2021-04-03 06:46:18 +08:00
|
|
|
in.rebase->addEntry(in.lazyPointers->isec,
|
|
|
|
sym->stubsIndex * target->wordSize);
|
2021-03-15 06:35:27 +08:00
|
|
|
in.weakBinding->addEntry(sym, in.lazyPointers->isec,
|
2021-04-03 06:46:18 +08:00
|
|
|
sym->stubsIndex * target->wordSize);
|
2021-03-15 06:35:27 +08:00
|
|
|
}
|
2022-03-15 09:51:15 +08:00
|
|
|
} else if (defined->interposable) {
|
|
|
|
if (in.stubs->addEntry(sym))
|
|
|
|
in.lazyBinding->addEntry(sym);
|
2021-03-15 06:35:27 +08:00
|
|
|
}
|
2021-05-10 08:05:45 +08:00
|
|
|
} else {
|
2021-03-30 08:33:48 +08:00
|
|
|
llvm_unreachable("invalid branch target symbol type");
|
2021-03-15 06:35:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Can a symbol's address can only be resolved at runtime?
|
|
|
|
static bool needsBinding(const Symbol *sym) {
|
|
|
|
if (isa<DylibSymbol>(sym))
|
|
|
|
return true;
|
|
|
|
if (const auto *defined = dyn_cast<Defined>(sym))
|
2022-03-15 09:51:15 +08:00
|
|
|
return defined->isExternalWeakDef() || defined->interposable;
|
2021-03-15 06:35:27 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-03-30 08:19:29 +08:00
|
|
|
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
|
2022-01-21 03:53:18 +08:00
|
|
|
const lld::macho::Reloc &r) {
|
2021-07-22 23:37:29 +08:00
|
|
|
assert(sym->isLive());
|
2021-03-12 02:28:09 +08:00
|
|
|
const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
|
2021-01-19 23:44:42 +08:00
|
|
|
|
|
|
|
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
|
|
|
|
prepareBranchTarget(sym);
|
|
|
|
} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
|
2021-02-24 10:41:54 +08:00
|
|
|
if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym))
|
|
|
|
in.got->addEntry(sym);
|
|
|
|
} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
|
2021-01-19 23:44:42 +08:00
|
|
|
if (needsBinding(sym))
|
|
|
|
in.tlvPointers->addEntry(sym);
|
2021-02-24 10:41:54 +08:00
|
|
|
} else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) {
|
2021-01-19 23:44:42 +08:00
|
|
|
// References from thread-local variable sections are treated as offsets
|
|
|
|
// relative to the start of the referent section, and therefore have no
|
|
|
|
// need of rebase opcodes.
|
2021-07-02 08:33:55 +08:00
|
|
|
if (!(isThreadLocalVariables(isec->getFlags()) && isa<Defined>(sym)))
|
2021-01-19 23:44:42 +08:00
|
|
|
addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-22 04:37:57 +08:00
|
|
|
void Writer::scanRelocations() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Scan relocations");
|
2021-07-16 00:54:42 +08:00
|
|
|
|
|
|
|
// This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
|
|
|
|
// add to inputSections, which invalidates inputSections's iterators.
|
|
|
|
for (size_t i = 0; i < inputSections.size(); ++i) {
|
|
|
|
ConcatInputSection *isec = inputSections[i];
|
|
|
|
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
if (isec->shouldOmitFromOutput())
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
continue;
|
2020-09-06 01:55:33 +08:00
|
|
|
|
2021-02-28 01:30:16 +08:00
|
|
|
for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
|
2022-01-21 03:53:18 +08:00
|
|
|
lld::macho::Reloc &r = *it;
|
2021-02-28 01:30:16 +08:00
|
|
|
if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
|
|
|
|
// Skip over the following UNSIGNED relocation -- it's just there as the
|
|
|
|
// minuend, and doesn't have the usual UNSIGNED semantics. We don't want
|
|
|
|
// to emit rebase opcodes for it.
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
it++;
|
2021-01-19 23:44:42 +08:00
|
|
|
continue;
|
2021-02-28 01:30:16 +08:00
|
|
|
}
|
2021-03-30 08:19:29 +08:00
|
|
|
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
|
2021-02-04 02:31:40 +08:00
|
|
|
if (auto *undefined = dyn_cast<Undefined>(sym))
|
2022-06-14 21:41:28 +08:00
|
|
|
treatUndefinedSymbol(*undefined, isec, r.offset);
|
2021-03-01 02:42:14 +08:00
|
|
|
// treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
|
2021-03-12 02:28:09 +08:00
|
|
|
if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r))
|
2021-01-19 23:44:42 +08:00
|
|
|
prepareSymbolRelocation(sym, isec, r);
|
2020-09-06 01:55:33 +08:00
|
|
|
} else {
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
// Canonicalize the referent so that later accesses in Writer won't
|
|
|
|
// have to worry about it. Perhaps we should do this for Defined::isec
|
|
|
|
// too...
|
|
|
|
auto *referentIsec = r.referent.get<InputSection *>();
|
|
|
|
r.referent = referentIsec->canonical();
|
2020-09-06 01:55:33 +08:00
|
|
|
if (!r.pcrel)
|
|
|
|
in.rebase->addEntry(isec, r.offset);
|
2020-05-19 06:46:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
|
|
|
|
in.unwindInfo->prepareRelocations();
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2020-12-17 08:14:57 +08:00
|
|
|
void Writer::scanSymbols() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Scan symbols");
|
2021-10-29 23:00:13 +08:00
|
|
|
for (Symbol *sym : symtab->getSymbols()) {
|
|
|
|
if (auto *defined = dyn_cast<Defined>(sym)) {
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
if (!defined->isLive())
|
|
|
|
continue;
|
2021-10-29 23:00:13 +08:00
|
|
|
defined->canonicalize();
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
if (defined->overridesWeakDef)
|
2020-12-17 08:14:57 +08:00
|
|
|
in.weakBinding->addNonWeakDefinition(defined);
|
2021-10-27 04:04:06 +08:00
|
|
|
if (!defined->isAbsolute() && isCodeSection(defined->isec))
|
|
|
|
in.unwindInfo->addSymbol(defined);
|
2020-12-17 08:14:57 +08:00
|
|
|
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
// This branch intentionally doesn't check isLive().
|
2021-02-26 08:56:31 +08:00
|
|
|
if (dysym->isDynamicLookup())
|
|
|
|
continue;
|
2021-02-04 02:31:40 +08:00
|
|
|
dysym->getFile()->refState =
|
2021-06-01 10:12:35 +08:00
|
|
|
std::max(dysym->getFile()->refState, dysym->getRefState());
|
2020-12-17 08:14:57 +08:00
|
|
|
}
|
|
|
|
}
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
|
|
|
|
for (const InputFile *file : inputFiles) {
|
|
|
|
if (auto *objFile = dyn_cast<ObjFile>(file))
|
|
|
|
for (Symbol *sym : objFile->symbols) {
|
2021-10-29 23:00:13 +08:00
|
|
|
if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
|
|
|
|
if (!defined->isLive())
|
|
|
|
continue;
|
|
|
|
defined->canonicalize();
|
|
|
|
if (!defined->isExternal() && !defined->isAbsolute() &&
|
|
|
|
isCodeSection(defined->isec))
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
in.unwindInfo->addSymbol(defined);
|
2021-10-29 23:00:13 +08:00
|
|
|
}
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
}
|
|
|
|
}
|
2020-12-17 08:14:57 +08:00
|
|
|
}
|
|
|
|
|
2021-04-21 20:41:14 +08:00
|
|
|
// TODO: ld64 enforces the old load commands in a few other cases.
|
|
|
|
static bool useLCBuildVersion(const PlatformInfo &platformInfo) {
|
2022-01-13 06:01:59 +08:00
|
|
|
static const std::vector<std::pair<PlatformType, VersionTuple>> minVersion = {
|
|
|
|
{PLATFORM_MACOS, VersionTuple(10, 14)},
|
|
|
|
{PLATFORM_IOS, VersionTuple(12, 0)},
|
|
|
|
{PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)},
|
|
|
|
{PLATFORM_TVOS, VersionTuple(12, 0)},
|
|
|
|
{PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)},
|
|
|
|
{PLATFORM_WATCHOS, VersionTuple(5, 0)},
|
|
|
|
{PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}};
|
2021-07-12 06:24:53 +08:00
|
|
|
auto it = llvm::find_if(minVersion, [&](const auto &p) {
|
|
|
|
return p.first == platformInfo.target.Platform;
|
|
|
|
});
|
2021-04-21 20:41:14 +08:00
|
|
|
return it == minVersion.end() ? true : platformInfo.minimum >= it->second;
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void Writer::createLoadCommands() {
|
2021-01-03 02:31:55 +08:00
|
|
|
uint8_t segIndex = 0;
|
|
|
|
for (OutputSegment *seg : outputSegments) {
|
2021-04-03 06:46:18 +08:00
|
|
|
in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
|
2021-01-03 02:31:55 +08:00
|
|
|
seg->index = segIndex++;
|
|
|
|
}
|
|
|
|
|
2020-09-06 01:55:33 +08:00
|
|
|
in.header->addLoadCommand(make<LCDyldInfo>(
|
|
|
|
in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
|
2020-07-31 05:28:41 +08:00
|
|
|
in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
|
2020-12-02 06:45:09 +08:00
|
|
|
in.header->addLoadCommand(
|
|
|
|
make<LCDysymtab>(symtabSection, indirectSymtabSection));
|
2021-07-06 02:40:52 +08:00
|
|
|
if (!config->umbrella.empty())
|
|
|
|
in.header->addLoadCommand(make<LCSubFramework>(config->umbrella));
|
2021-04-22 01:35:12 +08:00
|
|
|
if (config->emitEncryptionInfo)
|
|
|
|
in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
|
2020-08-13 10:50:28 +08:00
|
|
|
for (StringRef path : config->runtimePaths)
|
|
|
|
in.header->addLoadCommand(make<LCRPath>(path));
|
2020-04-29 07:58:22 +08:00
|
|
|
|
|
|
|
switch (config->outputType) {
|
|
|
|
case MH_EXECUTE:
|
2020-07-31 05:28:41 +08:00
|
|
|
in.header->addLoadCommand(make<LCLoadDylinker>());
|
2020-04-29 07:58:22 +08:00
|
|
|
break;
|
|
|
|
case MH_DYLIB:
|
2020-12-15 07:24:50 +08:00
|
|
|
in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName,
|
|
|
|
config->dylibCompatibilityVersion,
|
|
|
|
config->dylibCurrentVersion));
|
2020-04-29 07:58:22 +08:00
|
|
|
break;
|
2020-09-01 14:23:37 +08:00
|
|
|
case MH_BUNDLE:
|
|
|
|
break;
|
2020-04-29 07:58:22 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("unhandled output file type");
|
|
|
|
}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-10-15 02:03:34 +08:00
|
|
|
uuidCommand = make<LCUuid>();
|
|
|
|
in.header->addLoadCommand(uuidCommand);
|
|
|
|
|
2021-04-21 20:41:14 +08:00
|
|
|
if (useLCBuildVersion(config->platformInfo))
|
|
|
|
in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo));
|
|
|
|
else
|
|
|
|
in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo));
|
2020-04-03 02:54:05 +08:00
|
|
|
|
[lld/mac] Support writing zippered dylibs and bundles
With -platform_version flags for two distinct platforms,
this writes a LC_BUILD_VERSION header for each.
The motivation is that this is needed for self-hosting with lld as linker
after D124059.
To create a zippered output at the clang driver level, pass
-target arm64-apple-macos -darwin-target-variant arm64-apple-ios-macabi
to create a zippered dylib.
(In Xcode's clang, `-darwin-target-variant` is spelled just `-target-variant`.)
(If you pass `-target arm64-apple-ios-macabi -target-variant arm64-apple-macos`
instead, ld64 crashes!)
This results in two -platform_version flags being passed to the linker.
ld64 also verifies that the iOS SDK version is at least 13.1. We don't do that
yet. But ld64 also does that for other platforms and we don't. So we need to
do that at some point, but not in this patch.
Only dylib and bundle outputs can be zippered.
I verified that a Catalyst app linked against a dylib created with
clang -shared foo.cc -o libfoo.dylib \
-target arm64-apple-macos \
-target-variant arm64-apple-ios-macabi \
-Wl,-install_name,@rpath/libfoo.dylib \
-fuse-ld=$PWD/out/gn/bin/ld64.lld
runs successfully. (The app calls a function `f()` in libfoo.dylib
that returns a const char* "foo", and NSLog(@"%s")s it.)
ld64 is a bit more permissive when writing zippered outputs,
see references to "unzippered twins". That's not implemented yet.
(If anybody wants to implement that, D124275 is a good start.)
Differential Revision: https://reviews.llvm.org/D124887
2022-04-22 23:55:50 +08:00
|
|
|
if (config->secondaryPlatformInfo) {
|
|
|
|
in.header->addLoadCommand(
|
|
|
|
make<LCBuildVersion>(*config->secondaryPlatformInfo));
|
|
|
|
}
|
|
|
|
|
2021-07-17 23:18:48 +08:00
|
|
|
// This is down here to match ld64's load command order.
|
|
|
|
if (config->outputType == MH_EXECUTE)
|
|
|
|
in.header->addLoadCommand(make<LCMain>());
|
|
|
|
|
2021-02-23 02:03:02 +08:00
|
|
|
int64_t dylibOrdinal = 1;
|
2021-06-02 03:13:46 +08:00
|
|
|
DenseMap<StringRef, int64_t> ordinalForInstallName;
|
2020-04-28 03:50:59 +08:00
|
|
|
for (InputFile *file : inputFiles) {
|
|
|
|
if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
|
2021-02-23 02:03:02 +08:00
|
|
|
if (dylibFile->isBundleLoader) {
|
2021-03-12 02:28:08 +08:00
|
|
|
dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
|
2021-02-23 02:03:02 +08:00
|
|
|
// Shortcut since bundle-loader does not re-export the symbols.
|
|
|
|
|
|
|
|
dylibFile->reexport = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-06-01 10:12:35 +08:00
|
|
|
// Don't emit load commands for a dylib that is not referenced if:
|
|
|
|
// - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER --
|
|
|
|
// if it's on the linker command line, it's explicit)
|
|
|
|
// - or it's marked MH_DEAD_STRIPPABLE_DYLIB
|
|
|
|
// - or the flag -dead_strip_dylibs is used
|
|
|
|
// FIXME: `isReferenced()` is currently computed before dead code
|
|
|
|
// stripping, so references from dead code keep a dylib alive. This
|
|
|
|
// matches ld64, but it's something we should do better.
|
2021-06-02 23:06:42 +08:00
|
|
|
if (!dylibFile->isReferenced() && !dylibFile->forceNeeded &&
|
2021-06-01 10:12:35 +08:00
|
|
|
(!dylibFile->explicitlyLinked || dylibFile->deadStrippable ||
|
|
|
|
config->deadStripDylibs))
|
|
|
|
continue;
|
|
|
|
|
2021-06-02 03:13:46 +08:00
|
|
|
// Several DylibFiles can have the same installName. Only emit a single
|
|
|
|
// load command for that installName and give all these DylibFiles the
|
|
|
|
// same ordinal.
|
2021-06-02 21:35:06 +08:00
|
|
|
// This can happen in several cases:
|
2021-06-02 03:13:46 +08:00
|
|
|
// - a new framework could change its installName to an older
|
|
|
|
// framework name via an $ld$ symbol depending on platform_version
|
2021-06-02 21:35:06 +08:00
|
|
|
// - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd;
|
|
|
|
// Foo.framework/Foo.tbd is usually a symlink to
|
|
|
|
// Foo.framework/Versions/Current/Foo.tbd, where
|
|
|
|
// Foo.framework/Versions/Current is usually a symlink to
|
|
|
|
// Foo.framework/Versions/A)
|
2021-06-02 03:13:46 +08:00
|
|
|
// - a framework can be linked both explicitly on the linker
|
|
|
|
// command line and implicitly as a reexport from a different
|
|
|
|
// framework. The re-export will usually point to the tbd file
|
|
|
|
// in Foo.framework/Versions/A/Foo.tbd, while the explicit link will
|
2021-06-13 22:18:51 +08:00
|
|
|
// usually find Foo.framework/Foo.tbd. These are usually symlinks,
|
2021-06-02 21:35:06 +08:00
|
|
|
// but in a --reproduce archive they will be identical but distinct
|
|
|
|
// files.
|
2021-06-02 03:13:46 +08:00
|
|
|
// In the first case, *semantically distinct* DylibFiles will have the
|
|
|
|
// same installName.
|
2021-06-07 06:25:28 +08:00
|
|
|
int64_t &ordinal = ordinalForInstallName[dylibFile->installName];
|
2021-06-02 03:13:46 +08:00
|
|
|
if (ordinal) {
|
|
|
|
dylibFile->ordinal = ordinal;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
ordinal = dylibFile->ordinal = dylibOrdinal++;
|
2020-09-19 02:38:15 +08:00
|
|
|
LoadCommandType lcType =
|
2020-12-17 08:14:57 +08:00
|
|
|
dylibFile->forceWeakImport || dylibFile->refState == RefState::Weak
|
|
|
|
? LC_LOAD_WEAK_DYLIB
|
|
|
|
: LC_LOAD_DYLIB;
|
2021-06-07 06:25:28 +08:00
|
|
|
in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->installName,
|
2020-12-16 04:25:15 +08:00
|
|
|
dylibFile->compatibilityVersion,
|
|
|
|
dylibFile->currentVersion));
|
2020-04-24 11:16:49 +08:00
|
|
|
|
|
|
|
if (dylibFile->reexport)
|
2020-07-31 05:28:41 +08:00
|
|
|
in.header->addLoadCommand(
|
2021-06-07 06:25:28 +08:00
|
|
|
make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->installName));
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
2020-09-22 02:04:13 +08:00
|
|
|
|
2021-07-17 23:18:48 +08:00
|
|
|
if (functionStartsSection)
|
|
|
|
in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
|
|
|
|
if (dataInCodeSection)
|
|
|
|
in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
|
2021-01-07 10:11:44 +08:00
|
|
|
if (codeSignatureSection)
|
|
|
|
in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection));
|
|
|
|
|
2020-09-22 02:04:13 +08:00
|
|
|
const uint32_t MACOS_MAXPATHLEN = 1024;
|
|
|
|
config->headerPad = std::max(
|
|
|
|
config->headerPad, (config->headerPadMaxInstallNames
|
|
|
|
? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
|
|
|
|
: 0));
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2020-05-06 07:37:34 +08:00
|
|
|
// Sorting only can happen once all outputs have been collected. Here we sort
|
|
|
|
// segments, output sections within each segment, and input sections within each
|
|
|
|
// output segment.
|
|
|
|
static void sortSegmentsAndSections() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Sort segments and sections");
|
2021-05-26 02:57:18 +08:00
|
|
|
sortOutputSegments();
|
2020-05-06 07:37:34 +08:00
|
|
|
|
|
|
|
DenseMap<const InputSection *, size_t> isecPriorities =
|
2022-03-24 01:21:34 +08:00
|
|
|
priorityBuilder.buildInputSectionPriorities();
|
2020-05-06 07:37:34 +08:00
|
|
|
|
|
|
|
uint32_t sectionIndex = 0;
|
|
|
|
for (OutputSegment *seg : outputSegments) {
|
2021-05-26 02:57:18 +08:00
|
|
|
seg->sortOutputSections();
|
2021-12-29 08:01:01 +08:00
|
|
|
// References from thread-local variable sections are treated as offsets
|
|
|
|
// relative to the start of the thread-local data memory area, which
|
|
|
|
// is initialized via copying all the TLV data sections (which are all
|
|
|
|
// contiguous). If later data sections require a greater alignment than
|
|
|
|
// earlier ones, the offsets of data within those sections won't be
|
|
|
|
// guaranteed to aligned unless we normalize alignments. We therefore use
|
|
|
|
// the largest alignment for all TLV data sections.
|
|
|
|
uint32_t tlvAlign = 0;
|
|
|
|
for (const OutputSection *osec : seg->getSections())
|
|
|
|
if (isThreadLocalData(osec->flags) && osec->align > tlvAlign)
|
|
|
|
tlvAlign = osec->align;
|
|
|
|
|
2021-01-19 23:44:42 +08:00
|
|
|
for (OutputSection *osec : seg->getSections()) {
|
2020-05-06 07:37:34 +08:00
|
|
|
// Now that the output sections are sorted, assign the final
|
|
|
|
// output section indices.
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
if (!osec->isHidden())
|
|
|
|
osec->index = ++sectionIndex;
|
2021-12-29 08:01:01 +08:00
|
|
|
if (isThreadLocalData(osec->flags)) {
|
|
|
|
if (!firstTLVDataSection)
|
|
|
|
firstTLVDataSection = osec;
|
|
|
|
osec->align = tlvAlign;
|
|
|
|
}
|
2021-01-09 07:47:40 +08:00
|
|
|
|
2020-05-06 07:37:34 +08:00
|
|
|
if (!isecPriorities.empty()) {
|
2021-05-26 02:57:16 +08:00
|
|
|
if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
|
2020-05-06 07:37:34 +08:00
|
|
|
llvm::stable_sort(merged->inputs,
|
|
|
|
[&](InputSection *a, InputSection *b) {
|
|
|
|
return isecPriorities[a] > isecPriorities[b];
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void Writer::createOutputSections() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Create output sections");
|
2020-05-02 07:29:06 +08:00
|
|
|
// First, create hidden sections
|
|
|
|
stringTableSection = make<StringTableSection>();
|
2021-04-03 06:46:18 +08:00
|
|
|
symtabSection = makeSymtabSection<LP>(*stringTableSection);
|
2020-09-05 09:02:07 +08:00
|
|
|
indirectSymtabSection = make<IndirectSymtabSection>();
|
2021-03-05 22:07:58 +08:00
|
|
|
if (config->adhocCodesign)
|
2021-01-07 10:11:44 +08:00
|
|
|
codeSignatureSection = make<CodeSignatureSection>();
|
2021-06-16 09:38:29 +08:00
|
|
|
if (config->emitDataInCodeInfo)
|
|
|
|
dataInCodeSection = make<DataInCodeSection>();
|
2021-03-23 05:38:52 +08:00
|
|
|
if (config->emitFunctionStarts)
|
|
|
|
functionStartsSection = make<FunctionStartsSection>();
|
2021-04-17 04:46:45 +08:00
|
|
|
if (config->emitBitcodeBundle)
|
|
|
|
make<BitcodeBundleSection>();
|
2020-04-29 07:58:22 +08:00
|
|
|
|
|
|
|
switch (config->outputType) {
|
|
|
|
case MH_EXECUTE:
|
2020-05-02 07:29:06 +08:00
|
|
|
make<PageZeroSection>();
|
2020-04-29 07:58:22 +08:00
|
|
|
break;
|
|
|
|
case MH_DYLIB:
|
2020-09-01 14:23:37 +08:00
|
|
|
case MH_BUNDLE:
|
2020-04-29 07:58:22 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unhandled output file type");
|
|
|
|
}
|
2020-04-28 03:50:59 +08:00
|
|
|
|
2021-05-26 02:57:16 +08:00
|
|
|
// Then add input sections to output sections.
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
for (ConcatInputSection *isec : inputSections) {
|
|
|
|
if (isec->shouldOmitFromOutput())
|
|
|
|
continue;
|
[lld-macho] Have ICF operate on all sections at once
ICF previously operated only within a given OutputSection. We would
merge all CFStrings first, then merge all regular code sections in a
second phase. This worked fine since CFStrings would never reference
regular `__text` sections. However, I would like to expand ICF to merge
functions that reference unwind info. Unwind info references the LSDA
section, which can in turn reference the `__text` section, so we cannot
perform ICF in phases.
In order to have ICF operate on InputSections spanning multiple
OutputSections, we need a way to distinguish InputSections that are
destined for different OutputSections, so that we don't fold across
section boundaries. We achieve this by creating OutputSections early,
and setting `InputSection::parent` to point to them. This is what
LLD-ELF does. (This change should also make it easier to implement the
`section$start$` symbols.)
This diff also folds InputSections w/o checking their flags, which I
think is the right behavior -- if they are destined for the same
OutputSection, they will have the same flags in the output (even if
their input flags differ). I.e. the `parent` pointer check subsumes the
`flags` check. In practice this has nearly no effect (ICF did not become
any more effective on chromium_framework).
I've also updated ICF.cpp's block comment to better reflect its current
status.
Reviewed By: #lld-macho, smeenai
Differential Revision: https://reviews.llvm.org/D105641
2021-07-18 01:42:26 +08:00
|
|
|
ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
osec->addInput(isec);
|
|
|
|
osec->inputOrder =
|
|
|
|
std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
|
2020-04-22 04:37:57 +08:00
|
|
|
}
|
2020-05-06 08:25:58 +08:00
|
|
|
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
// Once all the inputs are added, we can finalize the output section
|
|
|
|
// properties and create the corresponding output segments.
|
2021-05-26 02:57:16 +08:00
|
|
|
for (const auto &it : concatOutputSections) {
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
StringRef segname = it.first.first;
|
2021-05-26 02:57:16 +08:00
|
|
|
ConcatOutputSection *osec = it.second;
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
assert(segname != segment_names::ld);
|
2022-06-13 09:56:45 +08:00
|
|
|
if (osec->isNeeded()) {
|
|
|
|
// See comment in ObjFile::splitEhFrames()
|
|
|
|
if (osec->name == section_names::ehFrame &&
|
|
|
|
segname == segment_names::text)
|
|
|
|
osec->align = target->wordSize;
|
|
|
|
|
[lld-macho] Have ICF operate on all sections at once
ICF previously operated only within a given OutputSection. We would
merge all CFStrings first, then merge all regular code sections in a
second phase. This worked fine since CFStrings would never reference
regular `__text` sections. However, I would like to expand ICF to merge
functions that reference unwind info. Unwind info references the LSDA
section, which can in turn reference the `__text` section, so we cannot
perform ICF in phases.
In order to have ICF operate on InputSections spanning multiple
OutputSections, we need a way to distinguish InputSections that are
destined for different OutputSections, so that we don't fold across
section boundaries. We achieve this by creating OutputSections early,
and setting `InputSection::parent` to point to them. This is what
LLD-ELF does. (This change should also make it easier to implement the
`section$start$` symbols.)
This diff also folds InputSections w/o checking their flags, which I
think is the right behavior -- if they are destined for the same
OutputSection, they will have the same flags in the output (even if
their input flags differ). I.e. the `parent` pointer check subsumes the
`flags` check. In practice this has nearly no effect (ICF did not become
any more effective on chromium_framework).
I've also updated ICF.cpp's block comment to better reflect its current
status.
Reviewed By: #lld-macho, smeenai
Differential Revision: https://reviews.llvm.org/D105641
2021-07-18 01:42:26 +08:00
|
|
|
getOrCreateOutputSegment(segname)->addOutputSection(osec);
|
2022-06-13 09:56:45 +08:00
|
|
|
}
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (SyntheticSection *ssec : syntheticSections) {
|
2021-05-26 02:57:16 +08:00
|
|
|
auto it = concatOutputSections.find({ssec->segname, ssec->name});
|
2021-10-28 02:58:15 +08:00
|
|
|
// We add all LinkEdit sections here because we don't know if they are
|
|
|
|
// needed until their finalizeContents() methods get called later. While
|
|
|
|
// this means that we add some redundant sections to __LINKEDIT, there is
|
|
|
|
// is no redundancy in the output, as we do not emit section headers for
|
|
|
|
// any LinkEdit sections.
|
|
|
|
if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) {
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
if (it == concatOutputSections.end()) {
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
} else {
|
2021-07-02 08:33:55 +08:00
|
|
|
fatal("section from " +
|
|
|
|
toString(it->second->firstSection()->getFile()) +
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
" conflicts with synthetic section " + ssec->segname + "," +
|
|
|
|
ssec->name);
|
|
|
|
}
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
}
|
2020-05-06 08:25:58 +08:00
|
|
|
}
|
2021-03-15 06:35:27 +08:00
|
|
|
|
|
|
|
// dyld requires __LINKEDIT segment to always exist (even if empty).
|
|
|
|
linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
|
|
|
|
}
|
|
|
|
|
2021-04-08 07:55:45 +08:00
|
|
|
void Writer::finalizeAddresses() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Finalize addresses");
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
uint64_t pageSize = target->getPageSize();
|
2022-04-08 06:13:27 +08:00
|
|
|
|
|
|
|
// We could parallelize this loop, but local benchmarking indicates it is
|
|
|
|
// faster to do it all in the main thread.
|
|
|
|
for (OutputSegment *seg : outputSegments) {
|
|
|
|
if (seg == linkEditSegment)
|
|
|
|
continue;
|
|
|
|
for (OutputSection *osec : seg->getSections()) {
|
|
|
|
if (!osec->isNeeded())
|
|
|
|
continue;
|
|
|
|
// Other kinds of OutputSections have already been finalized.
|
|
|
|
if (auto concatOsec = dyn_cast<ConcatOutputSection>(osec))
|
2022-05-24 08:59:18 +08:00
|
|
|
concatOsec->finalizeContents();
|
2022-04-08 06:13:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-15 06:35:27 +08:00
|
|
|
// Ensure that segments (and the sections they contain) are allocated
|
|
|
|
// addresses in ascending order, which dyld requires.
|
|
|
|
//
|
|
|
|
// Note that at this point, __LINKEDIT sections are empty, but we need to
|
|
|
|
// determine addresses of other segments/sections before generating its
|
|
|
|
// contents.
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
for (OutputSegment *seg : outputSegments) {
|
|
|
|
if (seg == linkEditSegment)
|
|
|
|
continue;
|
2021-07-23 22:19:06 +08:00
|
|
|
seg->addr = addr;
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
assignAddresses(seg);
|
|
|
|
// codesign / libstuff checks for segment ordering by verifying that
|
|
|
|
// `fileOff + fileSize == next segment fileOff`. So we call alignTo() before
|
|
|
|
// (instead of after) computing fileSize to ensure that the segments are
|
|
|
|
// contiguous. We handle addr / vmSize similarly for the same reason.
|
|
|
|
fileOff = alignTo(fileOff, pageSize);
|
|
|
|
addr = alignTo(addr, pageSize);
|
2021-07-23 22:19:06 +08:00
|
|
|
seg->vmSize = addr - seg->addr;
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
seg->fileSize = fileOff - seg->fileOff;
|
2021-07-23 22:12:55 +08:00
|
|
|
seg->assignAddressesToStartEndSymbols();
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
}
|
2021-03-15 06:35:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Writer::finalizeLinkEditSegment() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Finalize __LINKEDIT segment");
|
2021-03-15 06:35:27 +08:00
|
|
|
// Fill __LINKEDIT contents.
|
2021-04-08 07:55:45 +08:00
|
|
|
std::vector<LinkEditSection *> linkEditSections{
|
2021-06-15 10:21:43 +08:00
|
|
|
in.rebase,
|
|
|
|
in.binding,
|
|
|
|
in.weakBinding,
|
|
|
|
in.lazyBinding,
|
|
|
|
in.exports,
|
|
|
|
symtabSection,
|
|
|
|
indirectSymtabSection,
|
|
|
|
dataInCodeSection,
|
|
|
|
functionStartsSection,
|
2021-04-08 07:55:45 +08:00
|
|
|
};
|
2021-12-09 10:25:20 +08:00
|
|
|
SmallVector<std::shared_future<void>> threadFutures;
|
|
|
|
threadFutures.reserve(linkEditSections.size());
|
|
|
|
for (LinkEditSection *osec : linkEditSections)
|
2021-04-08 07:55:45 +08:00
|
|
|
if (osec)
|
2021-12-09 10:25:20 +08:00
|
|
|
threadFutures.emplace_back(threadPool.async(
|
|
|
|
[](LinkEditSection *osec) { osec->finalizeContents(); }, osec));
|
|
|
|
for (std::shared_future<void> &future : threadFutures)
|
|
|
|
future.wait();
|
2021-03-23 05:38:52 +08:00
|
|
|
|
2021-03-15 06:35:27 +08:00
|
|
|
// Now that __LINKEDIT is filled out, do a proper calculation of its
|
|
|
|
// addresses and offsets.
|
2021-07-23 22:19:06 +08:00
|
|
|
linkEditSegment->addr = addr;
|
2021-03-15 06:35:27 +08:00
|
|
|
assignAddresses(linkEditSegment);
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
// No need to page-align fileOff / addr here since this is the last segment.
|
2021-07-23 22:19:06 +08:00
|
|
|
linkEditSegment->vmSize = addr - linkEditSegment->addr;
|
[lld-macho] Ensure segments are laid out contiguously
codesign/libstuff checks that the `__LLVM` segment is directly
before `__LINKEDIT` by checking that `fileOff + fileSize == next segment
fileOff`. Previously, there would be gaps between the segments due to
the fact that their fileOffs are page-aligned but their fileSizes
aren't. In order to satisfy codesign, we page-align fileOff *before*
calculating fileSize. (I don't think codesign checks for the relative
ordering of other segments, so in theory we could do this just for
`__LLVM`, but ld64 seems to do it for all segments.)
Note that we *don't* round up the fileSize of the `__LINKEDIT` segment.
Since it's the last segment, it doesn't need to worry about contiguity;
in addition, codesign checks that the last (hidden) section in
`__LINKEDIT` covers the last byte of the segment, so if we rounded up
`__LINKEDIT`'s size we would have to do the same for its last section,
which is a bother.
While at it, I also addressed a FIXME in the linkedit-contiguity.s test
to cover more `__LINKEDIT` sections.
Reviewed By: #lld-macho, thakis, alexshap
Differential Revision: https://reviews.llvm.org/D100848
2021-04-21 04:58:07 +08:00
|
|
|
linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff;
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2020-04-28 03:50:59 +08:00
|
|
|
void Writer::assignAddresses(OutputSegment *seg) {
|
|
|
|
seg->fileOff = fileOff;
|
|
|
|
|
2021-01-19 23:44:42 +08:00
|
|
|
for (OutputSection *osec : seg->getSections()) {
|
2020-07-29 00:56:55 +08:00
|
|
|
if (!osec->isNeeded())
|
|
|
|
continue;
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
addr = alignTo(addr, osec->align);
|
|
|
|
fileOff = alignTo(fileOff, osec->align);
|
|
|
|
osec->addr = addr;
|
|
|
|
osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
|
|
|
|
osec->finalize();
|
2021-07-16 00:54:42 +08:00
|
|
|
osec->assignAddressesToStartEndSymbols();
|
2020-05-02 07:29:06 +08:00
|
|
|
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 15:03:24 +08:00
|
|
|
addr += osec->getSize();
|
|
|
|
fileOff += osec->getFileSize();
|
2020-04-28 03:50:59 +08:00
|
|
|
}
|
2020-04-22 04:37:57 +08:00
|
|
|
}
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
void Writer::openFile() {
|
|
|
|
Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
|
2020-04-28 03:50:59 +08:00
|
|
|
FileOutputBuffer::create(config->outputFile, fileOff,
|
2020-04-03 02:54:05 +08:00
|
|
|
FileOutputBuffer::F_executable);
|
|
|
|
|
|
|
|
if (!bufferOrErr)
|
2022-03-01 10:56:38 +08:00
|
|
|
fatal("failed to open " + config->outputFile + ": " +
|
2020-04-03 02:54:05 +08:00
|
|
|
llvm::toString(bufferOrErr.takeError()));
|
2022-03-01 10:56:38 +08:00
|
|
|
buffer = std::move(*bufferOrErr);
|
|
|
|
in.bufferStart = buffer->getBufferStart();
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Writer::writeSections() {
|
|
|
|
uint8_t *buf = buffer->getBufferStart();
|
2022-06-09 08:09:48 +08:00
|
|
|
std::vector<const OutputSection *> osecs;
|
2021-03-10 13:41:34 +08:00
|
|
|
for (const OutputSegment *seg : outputSegments)
|
2022-06-09 08:09:48 +08:00
|
|
|
append_range(osecs, seg->getSections());
|
|
|
|
|
|
|
|
parallelForEach(osecs.begin(), osecs.end(), [&](const OutputSection *osec) {
|
|
|
|
osec->writeTo(buf + osec->fileOff);
|
|
|
|
});
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2021-04-01 03:48:18 +08:00
|
|
|
// In order to utilize multiple cores, we first split the buffer into chunks,
|
|
|
|
// compute a hash for each chunk, and then compute a hash value of the hash
|
|
|
|
// values.
|
2020-10-15 02:03:34 +08:00
|
|
|
void Writer::writeUuid() {
|
2021-03-26 02:39:44 +08:00
|
|
|
TimeTraceScope timeScope("Computing UUID");
|
2021-12-09 10:25:20 +08:00
|
|
|
|
2021-04-01 03:48:18 +08:00
|
|
|
ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
|
|
|
|
unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
|
|
|
|
// Round-up integer division
|
|
|
|
size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
|
|
|
|
std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
|
2022-04-01 02:49:12 +08:00
|
|
|
// Leave one slot for filename
|
|
|
|
std::vector<uint64_t> hashes(chunks.size() + 1);
|
2021-12-09 10:25:20 +08:00
|
|
|
SmallVector<std::shared_future<void>> threadFutures;
|
|
|
|
threadFutures.reserve(chunks.size());
|
|
|
|
for (size_t i = 0; i < chunks.size(); ++i)
|
|
|
|
threadFutures.emplace_back(threadPool.async(
|
2022-01-10 16:07:42 +08:00
|
|
|
[&](size_t j) { hashes[j] = xxHash64(chunks[j]); }, i));
|
2021-12-09 10:25:20 +08:00
|
|
|
for (std::shared_future<void> &future : threadFutures)
|
|
|
|
future.wait();
|
2022-04-01 02:49:12 +08:00
|
|
|
// Append the output filename so that identical binaries with different names
|
|
|
|
// don't get the same UUID.
|
|
|
|
hashes[chunks.size()] = xxHash64(sys::path::filename(config->finalOutput));
|
2021-04-01 03:48:18 +08:00
|
|
|
uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
|
|
|
|
hashes.size() * sizeof(uint64_t)});
|
2020-12-10 10:04:22 +08:00
|
|
|
uuidCommand->writeUuid(digest);
|
2020-10-15 02:03:34 +08:00
|
|
|
}
|
|
|
|
|
2021-01-07 10:11:44 +08:00
|
|
|
void Writer::writeCodeSignature() {
|
2022-06-22 22:58:33 +08:00
|
|
|
if (codeSignatureSection) {
|
|
|
|
TimeTraceScope timeScope("Write code signature");
|
2021-01-07 10:11:44 +08:00
|
|
|
codeSignatureSection->writeHashes(buffer->getBufferStart());
|
2022-06-22 22:58:33 +08:00
|
|
|
}
|
2021-01-07 10:11:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-26 02:39:44 +08:00
|
|
|
void Writer::writeOutputFile() {
|
|
|
|
TimeTraceScope timeScope("Write output file");
|
|
|
|
openFile();
|
2022-06-16 07:17:07 +08:00
|
|
|
reportPendingUndefinedSymbols();
|
2021-03-26 02:39:44 +08:00
|
|
|
if (errorCount())
|
|
|
|
return;
|
|
|
|
writeSections();
|
|
|
|
writeUuid();
|
|
|
|
writeCodeSignature();
|
|
|
|
|
|
|
|
if (auto e = buffer->commit())
|
|
|
|
error("failed to write to the output file: " + toString(std::move(e)));
|
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void Writer::run() {
|
2021-07-22 22:31:39 +08:00
|
|
|
treatSpecialUndefineds();
|
2021-05-11 03:45:18 +08:00
|
|
|
if (config->entry && !isa<Undefined>(config->entry))
|
2021-05-10 08:05:45 +08:00
|
|
|
prepareBranchTarget(config->entry);
|
2022-02-02 02:45:38 +08:00
|
|
|
|
2021-10-29 23:00:13 +08:00
|
|
|
// Canonicalization of all pointers to InputSections should be handled by
|
2022-02-02 02:45:38 +08:00
|
|
|
// these two scan* methods. I.e. from this point onward, for all live
|
|
|
|
// InputSections, we should have `isec->canonical() == isec`.
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
scanSymbols();
|
2020-05-02 07:29:06 +08:00
|
|
|
scanRelocations();
|
2021-09-02 02:28:51 +08:00
|
|
|
|
|
|
|
// Do not proceed if there was an undefined symbol.
|
2022-06-16 07:17:07 +08:00
|
|
|
reportPendingUndefinedSymbols();
|
2021-09-02 02:28:51 +08:00
|
|
|
if (errorCount())
|
|
|
|
return;
|
|
|
|
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
if (in.stubHelper->isNeeded())
|
|
|
|
in.stubHelper->setup();
|
2022-02-02 02:45:38 +08:00
|
|
|
// At this point, we should know exactly which output sections are needed,
|
|
|
|
// courtesy of scanSymbols() and scanRelocations().
|
2021-04-03 06:46:18 +08:00
|
|
|
createOutputSections<LP>();
|
2021-09-02 02:28:51 +08:00
|
|
|
|
2021-03-30 08:33:48 +08:00
|
|
|
// After this point, we create no new segments; HOWEVER, we might
|
|
|
|
// yet create branch-range extension thunks for architectures whose
|
|
|
|
// hardware call instructions have limited range, e.g., ARM(64).
|
|
|
|
// The thunks are created as InputSections interspersed among
|
|
|
|
// the ordinary __TEXT,_text InputSections.
|
2020-05-06 07:37:34 +08:00
|
|
|
sortSegmentsAndSections();
|
2021-04-03 06:46:18 +08:00
|
|
|
createLoadCommands<LP>();
|
2021-04-08 07:55:45 +08:00
|
|
|
finalizeAddresses();
|
2022-01-12 09:45:07 +08:00
|
|
|
threadPool.async([&] {
|
|
|
|
if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
|
|
|
|
timeTraceProfilerInitialize(config->timeTraceGranularity, "writeMapFile");
|
|
|
|
writeMapFile();
|
|
|
|
if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
|
|
|
|
timeTraceProfilerFinishThread();
|
|
|
|
});
|
2021-03-15 06:35:27 +08:00
|
|
|
finalizeLinkEditSegment();
|
2021-03-26 02:39:44 +08:00
|
|
|
writeOutputFile();
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP> void macho::writeResult() { Writer().run<LP>(); }
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2021-10-31 07:35:30 +08:00
|
|
|
void macho::resetWriter() { LCDylib::resetInstanceCount(); }
|
|
|
|
|
2021-05-04 06:31:23 +08:00
|
|
|
void macho::createSyntheticSections() {
|
|
|
|
in.header = make<MachHeaderSection>();
|
2022-02-02 02:45:38 +08:00
|
|
|
if (config->dedupLiterals)
|
2021-06-29 10:22:21 +08:00
|
|
|
in.cStringSection = make<DeduplicatedCStringSection>();
|
2022-02-02 02:45:38 +08:00
|
|
|
else
|
2021-06-29 10:22:21 +08:00
|
|
|
in.cStringSection = make<CStringSection>();
|
[lld-macho] Deduplicate fixed-width literals
Conceptually, the implementation is pretty straightforward: we put each
literal value into a hashtable, and then write out the keys of that
hashtable at the end.
In contrast with ELF, the Mach-O format does not support variable-length
literals that aren't strings. Its literals are either 4, 8, or 16 bytes
in length. LLD-ELF dedups its literals via sorting + uniq'ing, but since
we don't need to worry about overly-long values, we should be able to do
a faster job by just hashing.
That said, the implementation right now is far from optimal, because we
add to those hashtables serially. To parallelize this, we'll need a
basic concurrent hashtable (only needs to support concurrent writes w/o
interleave reads), which shouldn't be to hard to implement, but I'd like
to punt on it for now.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 4.27 4.39 4.315 4.3225 0.033225703
+ 20 4.36 4.82 4.44 4.4845 0.13152846
Difference at 95.0% confidence
0.162 +/- 0.0613971
3.74783% +/- 1.42041%
(Student's t, pooled s = 0.0959262)
This corresponds to binary size savings of 2MB out of 335MB, or 0.6%.
It's not a great tradeoff as-is, but as mentioned our implementation can
be signficantly optimized, and literal dedup will unlock more
opportunities for ICF to identify identical structures that reference
the same literals.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D103113
2021-06-12 07:49:50 +08:00
|
|
|
in.wordLiteralSection =
|
|
|
|
config->dedupLiterals ? make<WordLiteralSection>() : nullptr;
|
2020-09-06 01:55:33 +08:00
|
|
|
in.rebase = make<RebaseSection>();
|
2020-07-03 12:19:55 +08:00
|
|
|
in.binding = make<BindingSection>();
|
2020-08-25 12:57:59 +08:00
|
|
|
in.weakBinding = make<WeakBindingSection>();
|
2020-08-28 06:54:42 +08:00
|
|
|
in.lazyBinding = make<LazyBindingSection>();
|
2020-08-28 06:59:15 +08:00
|
|
|
in.exports = make<ExportSection>();
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
in.got = make<GotSection>();
|
2020-08-13 10:50:09 +08:00
|
|
|
in.tlvPointers = make<TlvPointerSection>();
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
in.lazyPointers = make<LazyPointerSection>();
|
|
|
|
in.stubs = make<StubsSection>();
|
|
|
|
in.stubHelper = make<StubHelperSection>();
|
2021-04-16 09:14:33 +08:00
|
|
|
in.unwindInfo = makeUnwindInfoSection();
|
2021-06-14 07:43:36 +08:00
|
|
|
|
|
|
|
// This section contains space for just a single word, and will be used by
|
|
|
|
// dyld to cache an address to the image loader it uses.
|
2022-01-21 03:53:18 +08:00
|
|
|
uint8_t *arr = bAlloc().Allocate<uint8_t>(target->wordSize);
|
2021-06-14 11:30:33 +08:00
|
|
|
memset(arr, 0, target->wordSize);
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
in.imageLoaderCache = makeSyntheticInputSection(
|
|
|
|
segment_names::data, section_names::data, S_REGULAR,
|
2021-06-14 11:30:33 +08:00
|
|
|
ArrayRef<uint8_t>{arr, target->wordSize},
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
/*align=*/target->wordSize);
|
2021-06-14 07:43:36 +08:00
|
|
|
// References from dyld are not visible to us, so ensure this section is
|
|
|
|
// always treated as live.
|
|
|
|
in.imageLoaderCache->live = true;
|
[lld-macho] Support calls to functions in dylibs
Summary:
This diff implements lazy symbol binding -- very similar to the PLT
mechanism in ELF.
ELF's .plt section is broken up into two sections in Mach-O:
StubsSection and StubHelperSection. Calls to functions in dylibs will
end up calling into StubsSection, which contains indirect jumps to
addresses stored in the LazyPointerSection (the counterpart to ELF's
.plt.got).
Initially, the LazyPointerSection contains addresses that point into one
of the entry points in the middle of the StubHelperSection. The code in
StubHelperSection will push on the stack an offset into the
LazyBindingSection. The push is followed by a jump to the beginning of
the StubHelperSection (similar to PLT0), which then calls into
dyld_stub_binder. dyld_stub_binder is a non-lazily bound symbol, so this
call looks it up in the GOT.
The stub binder will look up the bind opcodes in the LazyBindingSection
at the given offset. The bind opcodes will tell the binder to update the
address in the LazyPointerSection to point to the symbol, so that
subsequent calls don't have to redo the symbol resolution. The binder
will then jump to the resolved symbol.
Depends on D78269.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78270
2020-05-06 08:38:10 +08:00
|
|
|
}
|
2021-01-09 07:47:40 +08:00
|
|
|
|
|
|
|
OutputSection *macho::firstTLVDataSection = nullptr;
|
2021-04-03 06:46:18 +08:00
|
|
|
|
|
|
|
template void macho::writeResult<LP64>();
|
|
|
|
template void macho::writeResult<ILP32>();
|