2020-04-03 02:54:05 +08:00
|
|
|
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_MACHO_INPUT_FILES_H
|
|
|
|
#define LLD_MACHO_INPUT_FILES_H
|
|
|
|
|
2020-05-22 06:26:35 +08:00
|
|
|
#include "MachOStructs.h"
|
2021-05-04 06:31:23 +08:00
|
|
|
#include "Target.h"
|
2020-05-22 06:26:35 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "lld/Common/LLVM.h"
|
2020-08-19 06:46:21 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2021-11-05 00:42:57 +08:00
|
|
|
#include "llvm/ADT/CachedHashString.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "llvm/ADT/DenseSet.h"
|
2020-12-15 06:59:22 +08:00
|
|
|
#include "llvm/ADT/SetVector.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "llvm/Object/Archive.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
2021-04-06 00:59:50 +08:00
|
|
|
#include "llvm/TextAPI/TextAPIReader.h"
|
[lld-macho][re-land] Support .subsections_via_symbols
Summary:
This diff restores and builds upon @pcc and @ruiu's initial work on
subsections.
The .subsections_via_symbols directive indicates we can split each
section along symbol boundaries, unless those symbols have been marked
with `.alt_entry`.
We exercise this functionality in our tests by using order files that
rearrange those symbols.
Depends on D79668.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: thakis, llvm-commits, pcc, ruiu
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79926
2020-05-19 23:46:07 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2020-10-27 10:18:29 +08:00
|
|
|
namespace llvm {
|
|
|
|
namespace lto {
|
|
|
|
class InputFile;
|
|
|
|
} // namespace lto
|
2021-03-12 02:28:08 +08:00
|
|
|
namespace MachO {
|
|
|
|
class InterfaceFile;
|
|
|
|
} // namespace MachO
|
2020-11-29 11:38:27 +08:00
|
|
|
class TarWriter;
|
2020-10-27 10:18:29 +08:00
|
|
|
} // namespace llvm
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace macho {
|
|
|
|
|
2021-04-21 20:41:14 +08:00
|
|
|
struct PlatformInfo;
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
class ConcatInputSection;
|
2020-04-03 02:54:05 +08:00
|
|
|
class Symbol;
|
2021-11-16 02:46:59 +08:00
|
|
|
class Defined;
|
2020-04-03 02:54:05 +08:00
|
|
|
struct Reloc;
|
2020-12-17 08:14:57 +08:00
|
|
|
enum class RefState : uint8_t;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-11-29 11:38:27 +08:00
|
|
|
// If --reproduce option is given, all input files are written
|
|
|
|
// to this tar archive.
|
|
|
|
extern std::unique_ptr<llvm::TarWriter> tar;
|
|
|
|
|
[lld-macho][re-land] Support .subsections_via_symbols
Summary:
This diff restores and builds upon @pcc and @ruiu's initial work on
subsections.
The .subsections_via_symbols directive indicates we can split each
section along symbol boundaries, unless those symbols have been marked
with `.alt_entry`.
We exercise this functionality in our tests by using order files that
rearrange those symbols.
Depends on D79668.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: thakis, llvm-commits, pcc, ruiu
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79926
2020-05-19 23:46:07 +08:00
|
|
|
// If .subsections_via_symbols is set, each InputSection will be split along
|
2021-04-01 06:23:19 +08:00
|
|
|
// symbol boundaries. The field offset represents the offset of the subsection
|
|
|
|
// from the start of the original pre-split InputSection.
|
2021-11-05 11:55:31 +08:00
|
|
|
struct Subsection {
|
|
|
|
uint64_t offset = 0;
|
|
|
|
InputSection *isec = nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
using Subsections = std::vector<Subsection>;
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
class InputFile;
|
2021-11-05 11:55:31 +08:00
|
|
|
|
|
|
|
struct Section {
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
InputFile *file;
|
|
|
|
StringRef segname;
|
|
|
|
StringRef name;
|
|
|
|
uint32_t flags;
|
|
|
|
uint64_t addr;
|
2021-11-05 11:55:31 +08:00
|
|
|
Subsections subsections;
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
|
|
|
|
Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
|
|
|
|
uint64_t addr)
|
|
|
|
: file(file), segname(segname), name(name), flags(flags), addr(addr) {}
|
|
|
|
// Ensure pointers to Sections are never invalidated.
|
|
|
|
Section(const Section &) = delete;
|
|
|
|
Section &operator=(const Section &) = delete;
|
|
|
|
Section(Section &&) = delete;
|
|
|
|
Section &operator=(Section &&) = delete;
|
2021-04-01 06:23:19 +08:00
|
|
|
};
|
[lld-macho][re-land] Support .subsections_via_symbols
Summary:
This diff restores and builds upon @pcc and @ruiu's initial work on
subsections.
The .subsections_via_symbols directive indicates we can split each
section along symbol boundaries, unless those symbols have been marked
with `.alt_entry`.
We exercise this functionality in our tests by using order files that
rearrange those symbols.
Depends on D79668.
Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee
Reviewed By: smeenai
Subscribers: thakis, llvm-commits, pcc, ruiu
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79926
2020-05-19 23:46:07 +08:00
|
|
|
|
2022-01-12 23:47:04 +08:00
|
|
|
// Represents a call graph profile edge.
|
|
|
|
struct CallGraphEntry {
|
|
|
|
// The index of the caller in the symbol table.
|
|
|
|
uint32_t fromIndex;
|
|
|
|
// The index of the callee in the symbol table.
|
|
|
|
uint32_t toIndex;
|
|
|
|
// Number of calls from callee to caller in the profile.
|
|
|
|
uint64_t count;
|
2022-02-16 10:13:41 +08:00
|
|
|
|
|
|
|
CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
|
|
|
|
: fromIndex(fromIndex), toIndex(toIndex), count(count) {}
|
2022-01-12 23:47:04 +08:00
|
|
|
};
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
class InputFile {
|
|
|
|
public:
|
|
|
|
enum Kind {
|
|
|
|
ObjKind,
|
2020-10-27 10:18:29 +08:00
|
|
|
OpaqueKind,
|
2020-04-22 04:37:57 +08:00
|
|
|
DylibKind,
|
2020-05-15 03:43:51 +08:00
|
|
|
ArchiveKind,
|
2020-10-27 10:18:29 +08:00
|
|
|
BitcodeKind,
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
virtual ~InputFile() = default;
|
|
|
|
Kind kind() const { return fileKind; }
|
2020-08-19 06:46:21 +08:00
|
|
|
StringRef getName() const { return name; }
|
2021-10-31 07:35:30 +08:00
|
|
|
static void resetIdCount() { idCount = 0; }
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
MemoryBufferRef mb;
|
2020-12-02 08:00:48 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
std::vector<Symbol *> symbols;
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-04 08:53:29 +08:00
|
|
|
std::vector<Section *> sections;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2020-12-02 08:00:48 +08:00
|
|
|
// If not empty, this stores the name of the archive containing this file.
|
|
|
|
// We use this string for creating error messages.
|
|
|
|
std::string archiveName;
|
|
|
|
|
2022-01-20 02:14:49 +08:00
|
|
|
// Provides an easy way to sort InputFiles deterministically.
|
|
|
|
const int id;
|
|
|
|
|
|
|
|
// True if this is a lazy ObjFile or BitcodeFile.
|
|
|
|
bool lazy = false;
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
protected:
|
2022-01-20 02:14:49 +08:00
|
|
|
InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
|
|
|
|
: mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
|
|
|
|
name(mb.getBufferIdentifier()) {}
|
2020-08-19 06:46:21 +08:00
|
|
|
|
2021-03-12 02:28:08 +08:00
|
|
|
InputFile(Kind, const llvm::MachO::InterfaceFile &);
|
2020-04-03 02:54:05 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const Kind fileKind;
|
2020-08-19 06:46:21 +08:00
|
|
|
const StringRef name;
|
2020-12-02 06:45:12 +08:00
|
|
|
|
|
|
|
static int idCount;
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// .o file
|
2021-06-14 07:43:37 +08:00
|
|
|
class ObjFile final : public InputFile {
|
2020-04-03 02:54:05 +08:00
|
|
|
public:
|
2022-01-20 02:14:49 +08:00
|
|
|
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
|
|
|
|
bool lazy = false);
|
2021-12-11 14:01:14 +08:00
|
|
|
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
|
2022-01-20 02:14:49 +08:00
|
|
|
template <class LP> void parse();
|
2021-12-11 14:01:14 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
|
|
|
|
llvm::DWARFUnit *compileUnit = nullptr;
|
2020-12-02 06:45:11 +08:00
|
|
|
const uint32_t modTime;
|
[lld-macho] Implement cstring deduplication
Our implementation draws heavily from LLD-ELF's, which in turn delegates
its string deduplication to llvm-mc's StringTableBuilder. The messiness of
this diff is largely due to the fact that we've previously assumed that
all InputSections get concatenated together to form the output. This is
no longer true with CStringInputSections, which split their contents into
StringPieces. StringPieces are much more lightweight than InputSections,
which is important as we create a lot of them. They may also overlap in
the output, which makes it possible for strings to be tail-merged. In
fact, the initial version of this diff implemented tail merging, but
I've dropped it for reasons I'll explain later.
**Alignment Issues**
Mergeable cstring literals are found under the `__TEXT,__cstring`
section. In contrast to ELF, which puts strings that need different
alignments into different sections, clang's Mach-O backend puts them all
in one section. Strings that need to be aligned have the `.p2align`
directive emitted before them, which simply translates into zero padding
in the object file.
I *think* ld64 extracts the desired per-string alignment from this data
by preserving each string's offset from the last section-aligned
address. I'm not entirely certain since it doesn't seem consistent about
doing this; but perhaps this can be chalked up to cases where ld64 has
to deduplicate strings with different offset/alignment combos -- it
seems to pick one of their alignments to preserve. This doesn't seem
correct in general; we can in fact can induce ld64 to produce a crashing
binary just by linking in an additional object file that only contains
cstrings and no code. See PR50563 for details.
Moreover, this scheme seems rather inefficient: since unaligned and
aligned strings are all put in the same section, which has a single
alignment value, it doesn't seem possible to tell whether a given string
doesn't have any alignment requirements. Preserving offset+alignments
for strings that don't need it is wasteful.
In practice, the crashes seen so far seem to stem from x86_64 SIMD
operations on cstrings. X86_64 requires SIMD accesses to be
16-byte-aligned. So for now, I'm thinking of just aligning all strings
to 16 bytes on x86_64. This is indeed wasteful, but implementation-wise
it's simpler than preserving per-string alignment+offsets. It also
avoids the aforementioned crash after deduplication of
differently-aligned strings. Finally, the overhead is not huge: using
16-byte alignment (vs no alignment) is only a 0.5% size overhead when
linking chromium_framework.
With these alignment requirements, it doesn't make sense to attempt tail
merging -- most strings will not be eligible since their overlaps aren't
likely to start at a 16-byte boundary. Tail-merging (with alignment) for
chromium_framework only improves size by 0.3%.
It's worth noting that LLD-ELF only does tail merging at `-O2`. By
default (at `-O1`), it just deduplicates w/o tail merging. @thakis has
also mentioned that they saw it regress compressed size in some cases
and therefore turned it off. `ld64` does not seem to do tail merging at
all.
**Performance Numbers**
CString deduplication reduces chromium_framework from 250MB to 242MB, or
about a 3.2% reduction.
Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W:
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.99 4.14 4.015 4.0365 0.0492336
Difference at 95.0% confidence
0.0865 +/- 0.027245
2.18987% +/- 0.689746%
(Student's t, pooled s = 0.0425673)
As expected, cstring merging incurs some non-trivial overhead.
When passing `--no-literal-merge`, it seems that performance is the
same, i.e. the refactoring in this diff didn't cost us.
N Min Max Median Avg Stddev
x 20 3.91 4.03 3.935 3.95 0.034641016
+ 20 3.89 4.02 3.935 3.9435 0.043197831
No difference proven at 95.0% confidence
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D102964
2021-06-08 11:47:12 +08:00
|
|
|
std::vector<ConcatInputSection *> debugSections;
|
2022-01-12 23:47:04 +08:00
|
|
|
std::vector<CallGraphEntry> callGraph;
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
|
|
|
|
private:
|
2021-11-16 02:46:59 +08:00
|
|
|
Section *compactUnwindSection = nullptr;
|
|
|
|
|
2022-01-20 02:14:49 +08:00
|
|
|
template <class LP> void parseLazy();
|
2021-11-05 11:55:31 +08:00
|
|
|
template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class LP>
|
|
|
|
void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
|
|
|
|
ArrayRef<typename LP::nlist> nList, const char *strtab,
|
2020-12-02 11:57:37 +08:00
|
|
|
bool subsectionsViaSymbols);
|
2021-04-03 06:46:18 +08:00
|
|
|
template <class NList>
|
|
|
|
Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
|
2021-11-05 11:55:31 +08:00
|
|
|
template <class SectionHeader>
|
|
|
|
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
|
2022-03-17 06:05:32 +08:00
|
|
|
const SectionHeader &, Section &);
|
[lld-macho] Emit STABS symbols for debugging, and drop debug sections
Debug sections contain a large amount of data. In order not to bloat the size
of the final binary, we remove them and instead emit STABS symbols for
`dsymutil` and the debugger to locate their contents in the object files.
With this diff, `dsymutil` is able to locate the debug info. However, we need
a few more features before `lldb` is able to work well with our binaries --
e.g. having `LC_DYSYMTAB` accurately reflect the number of local symbols,
emitting `LC_UUID`, and more. Those will be handled in follow-up diffs.
Note also that the STABS we emit differ slightly from what ld64 does. First, we
emit the path to the source file as one `N_SO` symbol instead of two. (`ld64`
emits one `N_SO` for the dirname and one of the basename.) Second, we do not
emit `N_BNSYM` and `N_ENSYM` STABS to mark the start and end of functions,
because the `N_FUN` STABS already serve that purpose. @clayborg recommended
these changes based on his knowledge of what the debugging tools look for.
Additionally, this current implementation doesn't accurately reflect the size
of function symbols. It uses the size of their containing sectioins as a proxy,
but that is only accurate if `.subsections_with_symbols` is set, and if there
isn't an `N_ALT_ENTRY` in that particular subsection. I think we have two
options to solve this:
1. We can split up subsections by symbol even if `.subsections_with_symbols`
is not set, but include constraints to ensure those subsections retain
their order in the final output. This is `ld64`'s approach.
2. We could just add a `size` field to our `Symbol` class. This seems simpler,
and I'm more inclined toward it, but I'm not sure if there are use cases
that it doesn't handle well. As such I'm punting on the decision for now.
Reviewed By: clayborg
Differential Revision: https://reviews.llvm.org/D89257
2020-12-02 06:45:01 +08:00
|
|
|
void parseDebugInfo();
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
void registerCompactUnwind();
|
2020-04-03 02:54:05 +08:00
|
|
|
};
|
|
|
|
|
2020-08-11 09:47:13 +08:00
|
|
|
// command-line -sectcreate file
|
2021-06-14 07:43:37 +08:00
|
|
|
class OpaqueFile final : public InputFile {
|
2020-08-11 09:47:13 +08:00
|
|
|
public:
|
2020-12-02 11:57:37 +08:00
|
|
|
OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
|
2020-08-11 09:47:13 +08:00
|
|
|
static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
|
|
|
|
};
|
|
|
|
|
2021-06-07 09:52:23 +08:00
|
|
|
// .dylib or .tbd file
|
2021-06-14 07:43:37 +08:00
|
|
|
class DylibFile final : public InputFile {
|
2020-04-22 04:37:57 +08:00
|
|
|
public:
|
2020-04-24 11:16:49 +08:00
|
|
|
// Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
|
|
|
|
// symbols in those sub-libraries will be available under the umbrella
|
|
|
|
// library's namespace. Those sub-libraries can also have their own
|
|
|
|
// re-exports. When loading a re-exported dylib, `umbrella` should be set to
|
|
|
|
// the root dylib to ensure symbols in the child library are correctly bound
|
|
|
|
// to the root. On the other hand, if a dylib is being directly loaded
|
|
|
|
// (through an -lfoo flag), then `umbrella` should be a nullptr.
|
2021-06-07 09:52:23 +08:00
|
|
|
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
|
|
|
|
bool isBundleLoader = false);
|
|
|
|
explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
|
|
|
|
DylibFile *umbrella = nullptr,
|
|
|
|
bool isBundleLoader = false);
|
|
|
|
|
2021-06-03 09:53:44 +08:00
|
|
|
void parseLoadCommands(MemoryBufferRef mb);
|
2021-06-01 02:59:48 +08:00
|
|
|
void parseReexports(const llvm::MachO::InterfaceFile &interface);
|
2022-02-02 02:45:38 +08:00
|
|
|
bool isReferenced() const { return numReferencedSymbols > 0; }
|
2020-08-19 06:46:21 +08:00
|
|
|
|
2020-04-22 04:37:57 +08:00
|
|
|
static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
|
|
|
|
|
2021-06-07 06:25:28 +08:00
|
|
|
StringRef installName;
|
2021-06-02 04:34:10 +08:00
|
|
|
DylibFile *exportingFile = nullptr;
|
2021-06-03 09:53:44 +08:00
|
|
|
DylibFile *umbrella;
|
2021-06-07 09:52:23 +08:00
|
|
|
SmallVector<StringRef, 2> rpaths;
|
2020-12-16 04:25:15 +08:00
|
|
|
uint32_t compatibilityVersion = 0;
|
|
|
|
uint32_t currentVersion = 0;
|
2021-02-23 02:03:02 +08:00
|
|
|
int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
|
2022-02-02 02:45:38 +08:00
|
|
|
unsigned numReferencedSymbols = 0;
|
2020-12-17 08:14:57 +08:00
|
|
|
RefState refState;
|
2020-04-24 11:16:49 +08:00
|
|
|
bool reexport = false;
|
2021-06-02 23:06:42 +08:00
|
|
|
bool forceNeeded = false;
|
2020-09-19 02:38:15 +08:00
|
|
|
bool forceWeakImport = false;
|
2021-06-01 10:12:35 +08:00
|
|
|
bool deadStrippable = false;
|
|
|
|
bool explicitlyLinked = false;
|
2021-02-23 02:03:02 +08:00
|
|
|
// An executable can be used as a bundle loader that will load the output
|
|
|
|
// file being linked, and that contains symbols referenced, but not
|
|
|
|
// implemented in the bundle. When used like this, it is very similar
|
2022-02-02 02:45:38 +08:00
|
|
|
// to a dylib, so we've used the same class to represent it.
|
2021-02-23 02:03:02 +08:00
|
|
|
bool isBundleLoader;
|
2021-06-05 14:31:40 +08:00
|
|
|
|
|
|
|
private:
|
2021-06-06 03:51:36 +08:00
|
|
|
bool handleLDSymbol(StringRef originalName);
|
|
|
|
void handleLDPreviousSymbol(StringRef name, StringRef originalName);
|
|
|
|
void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
|
2021-12-15 10:07:06 +08:00
|
|
|
void handleLDHideSymbol(StringRef name, StringRef originalName);
|
2021-07-12 22:26:54 +08:00
|
|
|
void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
|
2021-12-15 10:07:06 +08:00
|
|
|
|
|
|
|
llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
|
2020-04-22 04:37:57 +08:00
|
|
|
};
|
|
|
|
|
2020-05-15 03:43:51 +08:00
|
|
|
// .a file
|
2021-06-14 07:43:37 +08:00
|
|
|
class ArchiveFile final : public InputFile {
|
2020-05-15 03:43:51 +08:00
|
|
|
public:
|
|
|
|
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
|
2021-08-26 23:49:47 +08:00
|
|
|
void addLazySymbols();
|
|
|
|
void fetch(const llvm::object::Archive::Symbol &);
|
|
|
|
// LLD normally doesn't use Error for error-handling, but the underlying
|
|
|
|
// Archive library does, so this is the cleanest way to wrap it.
|
|
|
|
Error fetch(const llvm::object::Archive::Child &, StringRef reason);
|
|
|
|
const llvm::object::Archive &getArchive() const { return *file; };
|
2020-05-15 03:43:51 +08:00
|
|
|
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::unique_ptr<llvm::object::Archive> file;
|
|
|
|
// Keep track of children fetched from the archive by tracking
|
|
|
|
// which address offsets have been fetched already.
|
|
|
|
llvm::DenseSet<uint64_t> seen;
|
|
|
|
};
|
|
|
|
|
2021-06-14 07:43:37 +08:00
|
|
|
class BitcodeFile final : public InputFile {
|
2020-10-27 10:18:29 +08:00
|
|
|
public:
|
2021-07-23 10:47:22 +08:00
|
|
|
explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
|
2022-01-20 02:14:49 +08:00
|
|
|
uint64_t offsetInArchive, bool lazy = false);
|
2020-10-27 10:18:29 +08:00
|
|
|
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
|
2022-01-20 02:14:49 +08:00
|
|
|
void parse();
|
2020-10-27 10:18:29 +08:00
|
|
|
|
|
|
|
std::unique_ptr<llvm::lto::InputFile> obj;
|
2022-01-20 02:14:49 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
void parseLazy();
|
2020-10-27 10:18:29 +08:00
|
|
|
};
|
|
|
|
|
2020-12-15 06:59:22 +08:00
|
|
|
extern llvm::SetVector<InputFile *> inputFiles;
|
2021-11-05 00:42:57 +08:00
|
|
|
extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-03-02 17:20:22 +08:00
|
|
|
llvm::Optional<MemoryBufferRef> readFile(StringRef path);
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2022-01-20 02:14:49 +08:00
|
|
|
void extract(InputFile &file, StringRef reason);
|
|
|
|
|
2021-05-06 23:18:19 +08:00
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
template <class CommandType, class... Types>
|
|
|
|
std::vector<const CommandType *>
|
|
|
|
findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
|
|
|
|
std::vector<const CommandType *> cmds;
|
2021-04-21 23:18:20 +08:00
|
|
|
std::initializer_list<uint32_t> typesList{types...};
|
2021-05-04 06:31:23 +08:00
|
|
|
const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
|
|
|
|
const uint8_t *p =
|
|
|
|
reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
|
2021-03-05 05:58:21 +08:00
|
|
|
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
|
|
|
|
auto *cmd = reinterpret_cast<const CommandType *>(p);
|
2021-05-06 23:18:19 +08:00
|
|
|
if (llvm::is_contained(typesList, cmd->cmd)) {
|
|
|
|
cmds.push_back(cmd);
|
|
|
|
if (cmds.size() == maxCommands)
|
|
|
|
return cmds;
|
|
|
|
}
|
2021-03-05 05:58:21 +08:00
|
|
|
p += cmd->cmdsize;
|
|
|
|
}
|
2021-05-06 23:18:19 +08:00
|
|
|
return cmds;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
|
|
|
|
// anyHdr should be a pointer to either mach_header or mach_header_64
|
|
|
|
template <class CommandType = llvm::MachO::load_command, class... Types>
|
|
|
|
const CommandType *findCommand(const void *anyHdr, Types... types) {
|
|
|
|
std::vector<const CommandType *> cmds =
|
|
|
|
detail::findCommands<CommandType>(anyHdr, 1, types...);
|
|
|
|
return cmds.size() ? cmds[0] : nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class CommandType = llvm::MachO::load_command, class... Types>
|
|
|
|
std::vector<const CommandType *> findCommands(const void *anyHdr,
|
|
|
|
Types... types) {
|
|
|
|
return detail::findCommands<CommandType>(anyHdr, 0, types...);
|
2021-03-05 05:58:21 +08:00
|
|
|
}
|
2020-08-19 05:37:04 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
} // namespace macho
|
|
|
|
|
|
|
|
std::string toString(const macho::InputFile *file);
|
2022-04-08 02:28:44 +08:00
|
|
|
std::string toString(const macho::Section &);
|
2020-04-03 02:54:05 +08:00
|
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif
|