2020-08-21 04:05:13 +08:00
|
|
|
//===- UnwindInfoSection.cpp ----------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "UnwindInfoSection.h"
|
2021-05-26 02:57:16 +08:00
|
|
|
#include "ConcatOutputSection.h"
|
2020-08-21 04:05:13 +08:00
|
|
|
#include "Config.h"
|
|
|
|
#include "InputSection.h"
|
|
|
|
#include "OutputSection.h"
|
|
|
|
#include "OutputSegment.h"
|
2021-02-09 02:47:33 +08:00
|
|
|
#include "SymbolTable.h"
|
2020-08-21 04:05:13 +08:00
|
|
|
#include "Symbols.h"
|
|
|
|
#include "SyntheticSections.h"
|
|
|
|
#include "Target.h"
|
|
|
|
|
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2021-02-09 02:47:33 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2021-11-11 08:31:54 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2021-04-26 13:23:32 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2020-08-21 04:05:13 +08:00
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
2021-10-27 04:04:06 +08:00
|
|
|
#include "llvm/Support/Parallel.h"
|
2020-08-21 04:05:13 +08:00
|
|
|
|
2021-11-11 08:31:54 +08:00
|
|
|
#include <numeric>
|
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::MachO;
|
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
2020-12-07 14:33:38 +08:00
|
|
|
#define COMMON_ENCODINGS_MAX 127
|
|
|
|
#define COMPACT_ENCODINGS_MAX 256
|
|
|
|
|
|
|
|
#define SECOND_LEVEL_PAGE_BYTES 4096
|
|
|
|
#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
|
|
|
|
#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \
|
|
|
|
((SECOND_LEVEL_PAGE_BYTES - \
|
|
|
|
sizeof(unwind_info_regular_second_level_page_header)) / \
|
|
|
|
sizeof(unwind_info_regular_second_level_entry))
|
|
|
|
#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
|
|
|
|
((SECOND_LEVEL_PAGE_BYTES - \
|
|
|
|
sizeof(unwind_info_compressed_second_level_page_header)) / \
|
|
|
|
sizeof(uint32_t))
|
|
|
|
|
|
|
|
#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
|
|
|
|
#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
|
|
|
|
UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
|
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
|
|
|
|
// optimizes space and exception-time lookup. Most DWARF unwind
|
|
|
|
// entries can be replaced with Compact Unwind entries, but the ones
|
|
|
|
// that cannot are retained in DWARF form.
|
|
|
|
//
|
|
|
|
// This comment will address macro-level organization of the pre-link
|
|
|
|
// and post-link compact unwind tables. For micro-level organization
|
|
|
|
// pertaining to the bitfield layout of the 32-bit compact unwind
|
|
|
|
// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
|
|
|
|
//
|
|
|
|
// Important clarifying factoids:
|
|
|
|
//
|
|
|
|
// * __LD,__compact_unwind is the compact unwind format for compiler
|
|
|
|
// output and linker input. It is never a final output. It could be
|
|
|
|
// an intermediate output with the `-r` option which retains relocs.
|
|
|
|
//
|
|
|
|
// * __TEXT,__unwind_info is the compact unwind format for final
|
|
|
|
// linker output. It is never an input.
|
|
|
|
//
|
|
|
|
// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
|
|
|
|
//
|
|
|
|
// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
|
|
|
|
// level) by ascending address, and the pages are referenced by an
|
|
|
|
// index (1st level) in the section header.
|
|
|
|
//
|
|
|
|
// * Following the headers in __TEXT,__unwind_info, the bulk of the
|
|
|
|
// section contains a vector of compact unwind entries
|
|
|
|
// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
|
|
|
|
// Adjacent entries with the same encoding can be folded to great
|
|
|
|
// advantage, achieving a 3-order-of-magnitude reduction in the
|
|
|
|
// number of entries.
|
|
|
|
//
|
|
|
|
// * The __TEXT,__unwind_info format can accommodate up to 127 unique
|
|
|
|
// encodings for the space-efficient compressed format. In practice,
|
|
|
|
// fewer than a dozen unique encodings are used by C++ programs of
|
|
|
|
// all sizes. Therefore, we don't even bother implementing the regular
|
|
|
|
// non-compressed format. Time will tell if anyone in the field ever
|
|
|
|
// overflows the 127-encodings limit.
|
2021-02-09 02:47:33 +08:00
|
|
|
//
|
|
|
|
// Refer to the definition of unwind_info_section_header in
|
|
|
|
// compact_unwind_encoding.h for an overview of the format we are encoding
|
|
|
|
// here.
|
2020-08-21 04:05:13 +08:00
|
|
|
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
|
2020-08-21 04:05:13 +08:00
|
|
|
// TODO(gkm): how do we align the 2nd-level pages?
|
|
|
|
|
2021-10-27 04:04:06 +08:00
|
|
|
template <class Ptr> struct CompactUnwindEntry {
|
|
|
|
Ptr functionAddress;
|
|
|
|
uint32_t functionLength;
|
|
|
|
compact_unwind_encoding_t encoding;
|
|
|
|
Ptr personality;
|
|
|
|
Ptr lsda;
|
|
|
|
};
|
|
|
|
|
2021-07-12 06:35:45 +08:00
|
|
|
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
|
2021-04-16 09:14:33 +08:00
|
|
|
|
|
|
|
struct SecondLevelPage {
|
|
|
|
uint32_t kind;
|
|
|
|
size_t entryIndex;
|
|
|
|
size_t entryCount;
|
|
|
|
size_t byteCount;
|
|
|
|
std::vector<compact_unwind_encoding_t> localEncodings;
|
|
|
|
EncodingMap localEncodingIndexes;
|
|
|
|
};
|
|
|
|
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
template <class Ptr>
|
|
|
|
class UnwindInfoSectionImpl final : public UnwindInfoSection {
|
2021-04-16 09:14:33 +08:00
|
|
|
public:
|
2021-06-12 07:49:52 +08:00
|
|
|
void prepareRelocations(ConcatInputSection *) override;
|
2021-10-27 04:04:06 +08:00
|
|
|
void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
|
2021-11-11 08:31:54 +08:00
|
|
|
Reloc *findLsdaReloc(ConcatInputSection *) const;
|
|
|
|
void encodePersonalities();
|
2021-04-16 09:14:33 +08:00
|
|
|
void finalize() override;
|
|
|
|
void writeTo(uint8_t *buf) const override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
|
|
|
|
EncodingMap commonEncodingIndexes;
|
2021-11-11 08:31:54 +08:00
|
|
|
// The entries here will be in the same order as their originating symbols
|
|
|
|
// in symbolsVec.
|
|
|
|
std::vector<CompactUnwindEntry<Ptr>> cuEntries;
|
|
|
|
// Indices into the cuEntries vector.
|
|
|
|
std::vector<size_t> cuIndices;
|
2021-04-16 09:14:33 +08:00
|
|
|
// Indices of personality functions within the GOT.
|
2021-07-29 13:07:22 +08:00
|
|
|
std::vector<Ptr> personalities;
|
2021-04-16 09:14:33 +08:00
|
|
|
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
|
|
|
|
personalityTable;
|
2021-11-11 08:31:54 +08:00
|
|
|
// Indices into cuEntries for CUEs with a non-null LSDA.
|
|
|
|
std::vector<size_t> entriesWithLsda;
|
|
|
|
// Map of cuEntries index to an index within the LSDA array.
|
|
|
|
DenseMap<size_t, uint32_t> lsdaIndex;
|
2021-04-16 09:14:33 +08:00
|
|
|
std::vector<SecondLevelPage> secondLevelPages;
|
|
|
|
uint64_t level2PagesOffset = 0;
|
|
|
|
};
|
2021-02-24 10:42:02 +08:00
|
|
|
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
UnwindInfoSection::UnwindInfoSection()
|
|
|
|
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
|
|
|
|
align = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
void UnwindInfoSection::prepareRelocations() {
|
2021-10-27 04:04:06 +08:00
|
|
|
// This iteration needs to be deterministic, since prepareRelocations may add
|
|
|
|
// entries to the GOT. Hence the use of a MapVector for
|
|
|
|
// UnwindInfoSection::symbols.
|
|
|
|
for (const Defined *d : make_second_range(symbols))
|
2021-11-16 02:46:59 +08:00
|
|
|
if (d->unwindEntry)
|
|
|
|
prepareRelocations(d->unwindEntry);
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
}
|
|
|
|
|
2021-10-27 04:04:06 +08:00
|
|
|
// Record function symbols that may need entries emitted in __unwind_info, which
|
|
|
|
// stores unwind data for address ranges.
|
|
|
|
//
|
|
|
|
// Note that if several adjacent functions have the same unwind encoding, LSDA,
|
|
|
|
// and personality function, they share one unwind entry. For this to work,
|
|
|
|
// functions without unwind info need explicit "no unwind info" unwind entries
|
|
|
|
// -- else the unwinder would think they have the unwind info of the closest
|
|
|
|
// function with unwind info right before in the image. Thus, we add function
|
|
|
|
// symbols for each unique address regardless of whether they have associated
|
|
|
|
// unwind info.
|
|
|
|
void UnwindInfoSection::addSymbol(const Defined *d) {
|
2021-11-16 02:46:59 +08:00
|
|
|
if (d->unwindEntry)
|
2021-10-27 04:04:06 +08:00
|
|
|
allEntriesAreOmitted = false;
|
|
|
|
// We don't yet know the final output address of this symbol, but we know that
|
|
|
|
// they are uniquely determined by a combination of the isec and value, so
|
|
|
|
// we use that as the key here.
|
|
|
|
auto p = symbols.insert({{d->isec, d->value}, d});
|
|
|
|
// If we have multiple symbols at the same address, only one of them can have
|
|
|
|
// an associated CUE.
|
2021-11-16 02:46:59 +08:00
|
|
|
if (!p.second && d->unwindEntry) {
|
|
|
|
assert(!p.first->second->unwindEntry);
|
2021-10-27 04:04:06 +08:00
|
|
|
p.first->second = d;
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
}
|
[lld-macho] Move ICF earlier to avoid emitting redundant binds
This is a pretty big refactoring diff, so here are the motivations:
Previously, ICF ran after scanRelocations(), where we emitting
bind/rebase opcodes etc. So we had a bunch of redundant leftovers after
ICF. Having ICF run before Writer seems like a better design, and is
what LLD-ELF does, so this diff refactors it accordingly.
However, ICF had two dependencies on things occurring in Writer: 1) it
needs literals to be deduplicated beforehand and 2) it needs to know
which functions have unwind info, which was being handled by
`UnwindInfoSection::prepareRelocations()`.
In order to do literal deduplication earlier, we need to add literal
input sections to their corresponding output sections. So instead of
putting all input sections into the big `inputSections` vector, and then
filtering them by type later on, I've changed things so that literal
sections get added directly to their output sections during the 'gather'
phase. Likewise for compact unwind sections -- they get added directly
to the UnwindInfoSection now. This latter change is not strictly
necessary, but makes it easier for ICF to determine which functions have
unwind info.
Adding literal sections directly to their output sections means that we
can no longer determine `inputOrder` from iterating over
`inputSections`. Instead, we store that order explicitly on
InputSection. Bloating the size of InputSection for this purpose would
be unfortunate -- but LLD-ELF has already solved this problem: it reuses
`outSecOff` to store this order value.
One downside of this refactor is that we now make an additional pass
over the unwind info relocations to figure out which functions have
unwind info, since want to know that before `processRelocations()`. I've
made sure to run that extra loop only if ICF is enabled, so there should
be no overhead in non-optimizing runs of the linker.
The upside of all this is that the `inputSections` vector now contains
only ConcatInputSections that are destined for ConcatOutputSections, so
we can clean up a bunch of code that just existed to filter out other
elements from that vector.
I will test for the lack of redundant binds/rebases in the upcoming
cfstring deduplication diff. While binds/rebases can also happen in the
regular `.text` section, they're more common in `.data` sections, so it
seems more natural to test it that way.
This change is perf-neutral when linking chromium_framework.
Reviewed By: oontvoo
Differential Revision: https://reviews.llvm.org/D105044
2021-07-02 08:33:42 +08:00
|
|
|
}
|
|
|
|
|
2021-02-09 02:47:33 +08:00
|
|
|
// Compact unwind relocations have different semantics, so we handle them in a
|
|
|
|
// separate code path from regular relocations. First, we do not wish to add
|
|
|
|
// rebase opcodes for __LD,__compact_unwind, because that section doesn't
|
|
|
|
// actually end up in the final binary. Second, personality pointers always
|
|
|
|
// reside in the GOT and must be treated specially.
|
2021-04-16 09:14:33 +08:00
|
|
|
template <class Ptr>
|
2021-06-12 07:49:52 +08:00
|
|
|
void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
assert(!isec->shouldOmitFromOutput() &&
|
|
|
|
"__compact_unwind section should not be omitted");
|
2021-02-09 02:47:33 +08:00
|
|
|
|
2021-06-14 01:30:05 +08:00
|
|
|
// FIXME: Make this skip relocations for CompactUnwindEntries that
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
// point to dead-stripped functions. That might save some amount of
|
|
|
|
// work. But since there are usually just few personality functions
|
|
|
|
// that are referenced from many places, at least some of them likely
|
|
|
|
// live, it wouldn't reduce number of got entries.
|
2021-05-20 00:58:17 +08:00
|
|
|
for (size_t i = 0; i < isec->relocs.size(); ++i) {
|
|
|
|
Reloc &r = isec->relocs[i];
|
2021-02-24 10:42:02 +08:00
|
|
|
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
|
2021-07-07 23:28:27 +08:00
|
|
|
|
2021-11-16 02:46:59 +08:00
|
|
|
// Functions and LSDA entries always reside in the same object file as the
|
|
|
|
// compact unwind entries that references them, and thus appear as section
|
|
|
|
// relocs. There is no need to prepare them. We only prepare relocs for
|
|
|
|
// personality functions.
|
2021-04-16 09:14:33 +08:00
|
|
|
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
|
|
|
|
offsetof(CompactUnwindEntry<Ptr>, personality))
|
2021-02-09 02:47:33 +08:00
|
|
|
continue;
|
|
|
|
|
2021-03-30 08:19:29 +08:00
|
|
|
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
|
2021-11-16 02:46:59 +08:00
|
|
|
// Personality functions are nearly always system-defined (e.g.,
|
|
|
|
// ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an
|
|
|
|
// application provides its own personality function, it might be
|
|
|
|
// referenced by an extern Defined symbol reloc, or a local section reloc.
|
2021-09-16 03:49:56 +08:00
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
|
|
|
// XXX(vyng) This is a a special case for handling duplicate personality
|
|
|
|
// symbols. Note that LD64's behavior is a bit different and it is
|
|
|
|
// inconsistent with how symbol resolution usually work
|
|
|
|
//
|
|
|
|
// So we've decided not to follow it. Instead, simply pick the symbol
|
|
|
|
// with the same name from the symbol table to replace the local one.
|
|
|
|
//
|
|
|
|
// (See discussions/alternatives already considered on D107533)
|
|
|
|
if (!defined->isExternal())
|
2021-11-19 23:56:58 +08:00
|
|
|
if (Symbol *sym = symtab->find(defined->getName()))
|
|
|
|
if (sym->kind() != Symbol::LazyKind)
|
|
|
|
r.referent = s = sym;
|
2021-09-16 03:49:56 +08:00
|
|
|
}
|
2021-02-24 10:42:02 +08:00
|
|
|
if (auto *undefined = dyn_cast<Undefined>(s)) {
|
2021-02-09 02:47:33 +08:00
|
|
|
treatUndefinedSymbol(*undefined);
|
2021-03-01 02:42:14 +08:00
|
|
|
// treatUndefinedSymbol() can replace s with a DylibSymbol; re-check.
|
|
|
|
if (isa<Undefined>(s))
|
|
|
|
continue;
|
2021-02-24 10:42:02 +08:00
|
|
|
}
|
2021-09-16 03:49:56 +08:00
|
|
|
|
2021-02-24 10:42:02 +08:00
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
|
|
|
// Check if we have created a synthetic symbol at the same address.
|
2021-03-30 08:19:29 +08:00
|
|
|
Symbol *&personality =
|
2021-02-24 10:42:02 +08:00
|
|
|
personalityTable[{defined->isec, defined->value}];
|
|
|
|
if (personality == nullptr) {
|
|
|
|
personality = defined;
|
|
|
|
in.got->addEntry(defined);
|
|
|
|
} else if (personality != defined) {
|
|
|
|
r.referent = personality;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
assert(isa<DylibSymbol>(s));
|
|
|
|
in.got->addEntry(s);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
|
2021-06-17 03:23:04 +08:00
|
|
|
assert(!isCoalescedWeak(referentIsec));
|
2021-02-09 02:47:33 +08:00
|
|
|
// Personality functions can be referenced via section relocations
|
2021-02-24 10:42:02 +08:00
|
|
|
// if they live in the same object file. Create placeholder synthetic
|
|
|
|
// symbols for them in the GOT.
|
2021-03-30 08:19:29 +08:00
|
|
|
Symbol *&s = personalityTable[{referentIsec, r.addend}];
|
2021-02-09 02:47:33 +08:00
|
|
|
if (s == nullptr) {
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
// This runs after dead stripping, so the noDeadStrip argument does not
|
|
|
|
// matter.
|
2021-04-22 22:44:56 +08:00
|
|
|
s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec,
|
|
|
|
r.addend, /*size=*/0, /*isWeakDef=*/false,
|
2021-05-01 04:17:26 +08:00
|
|
|
/*isExternal=*/false, /*isPrivateExtern=*/false,
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
/*isThumb=*/false, /*isReferencedDynamically=*/false,
|
|
|
|
/*noDeadStrip=*/false);
|
2021-02-09 02:47:33 +08:00
|
|
|
in.got->addEntry(s);
|
|
|
|
}
|
|
|
|
r.referent = s;
|
|
|
|
r.addend = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unwind info lives in __DATA, and finalization of __TEXT will occur before
|
|
|
|
// finalization of __DATA. Moreover, the finalization of unwind info depends on
|
|
|
|
// the exact addresses that it references. So it is safe for compact unwind to
|
|
|
|
// reference addresses in __TEXT, but not addresses in any other segment.
|
2021-06-12 07:49:52 +08:00
|
|
|
static ConcatInputSection *checkTextSegment(InputSection *isec) {
|
2021-07-02 08:33:55 +08:00
|
|
|
if (isec->getSegName() != segment_names::text)
|
2021-02-09 02:47:33 +08:00
|
|
|
error("compact unwind references address in " + toString(isec) +
|
|
|
|
" which is not in segment __TEXT");
|
2021-06-12 07:49:52 +08:00
|
|
|
// __text should always be a ConcatInputSection.
|
|
|
|
return cast<ConcatInputSection>(isec);
|
2021-02-09 02:47:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// We need to apply the relocations to the pre-link compact unwind section
|
|
|
|
// before converting it to post-link form. There should only be absolute
|
|
|
|
// relocations here: since we are not emitting the pre-link CU section, there
|
|
|
|
// is no source address to make a relative location meaningful.
|
2021-04-16 09:14:33 +08:00
|
|
|
template <class Ptr>
|
2021-10-27 04:04:06 +08:00
|
|
|
void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
|
2021-11-11 08:31:54 +08:00
|
|
|
std::vector<CompactUnwindEntry<Ptr>> &cuEntries) {
|
2021-10-27 04:04:06 +08:00
|
|
|
parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
|
2021-11-11 08:31:54 +08:00
|
|
|
uint8_t *buf = reinterpret_cast<uint8_t *>(cuEntries.data()) +
|
2021-10-27 04:04:06 +08:00
|
|
|
i * sizeof(CompactUnwindEntry<Ptr>);
|
|
|
|
const Defined *d = symbolsVec[i].second;
|
|
|
|
// Write the functionAddress.
|
|
|
|
writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
|
2021-11-16 02:46:59 +08:00
|
|
|
if (!d->unwindEntry)
|
2021-10-27 04:04:06 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// Write the rest of the CUE.
|
2021-11-16 02:46:59 +08:00
|
|
|
memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
|
|
|
|
d->unwindEntry->data.size());
|
|
|
|
for (const Reloc &r : d->unwindEntry->relocs) {
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-27 04:04:04 +08:00
|
|
|
uint64_t referentVA = 0;
|
2021-03-30 08:19:29 +08:00
|
|
|
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
|
2021-02-09 02:47:33 +08:00
|
|
|
if (!isa<Undefined>(referentSym)) {
|
|
|
|
if (auto *defined = dyn_cast<Defined>(referentSym))
|
|
|
|
checkTextSegment(defined->isec);
|
|
|
|
// At this point in the link, we may not yet know the final address of
|
|
|
|
// the GOT, so we just encode the index. We make it a 1-based index so
|
|
|
|
// that we can distinguish the null pointer case.
|
|
|
|
referentVA = referentSym->gotIndex + 1;
|
|
|
|
}
|
2021-06-27 05:38:25 +08:00
|
|
|
} else {
|
|
|
|
auto *referentIsec = r.referent.get<InputSection *>();
|
2021-11-13 05:01:25 +08:00
|
|
|
checkTextSegment(referentIsec);
|
|
|
|
referentVA = referentIsec->getVA(r.addend);
|
2021-02-09 02:47:33 +08:00
|
|
|
}
|
2021-04-16 09:14:33 +08:00
|
|
|
writeAddress(buf + r.offset, referentVA, r.length);
|
2021-02-09 02:47:33 +08:00
|
|
|
}
|
2021-10-27 04:04:06 +08:00
|
|
|
});
|
2021-02-09 02:47:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// There should only be a handful of unique personality pointers, so we can
|
|
|
|
// encode them as 2-bit indices into a small array.
|
2021-11-11 08:31:54 +08:00
|
|
|
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
|
|
|
|
for (size_t idx : cuIndices) {
|
|
|
|
CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
|
|
|
|
if (cu.personality == 0)
|
2021-02-09 02:47:33 +08:00
|
|
|
continue;
|
|
|
|
// Linear search is fast enough for a small array.
|
2021-11-11 08:31:54 +08:00
|
|
|
auto it = find(personalities, cu.personality);
|
2021-02-09 02:47:33 +08:00
|
|
|
uint32_t personalityIndex; // 1-based index
|
|
|
|
if (it != personalities.end()) {
|
|
|
|
personalityIndex = std::distance(personalities.begin(), it) + 1;
|
|
|
|
} else {
|
2021-11-11 08:31:54 +08:00
|
|
|
personalities.push_back(cu.personality);
|
2021-02-09 02:47:33 +08:00
|
|
|
personalityIndex = personalities.size();
|
|
|
|
}
|
2021-11-11 08:31:54 +08:00
|
|
|
cu.encoding |=
|
2021-02-09 02:47:33 +08:00
|
|
|
personalityIndex << countTrailingZeros(
|
|
|
|
static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
|
|
|
|
}
|
|
|
|
if (personalities.size() > 3)
|
|
|
|
error("too many personalities (" + std::to_string(personalities.size()) +
|
|
|
|
") for compact unwind to encode");
|
|
|
|
}
|
|
|
|
|
2021-06-26 10:50:46 +08:00
|
|
|
static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
|
|
|
|
// From compact_unwind_encoding.h:
|
|
|
|
// UNWIND_X86_64_MODE_STACK_IND:
|
|
|
|
// A "frameless" (RBP not used as frame pointer) function large constant
|
|
|
|
// stack size. This case is like the previous, except the stack size is too
|
|
|
|
// large to encode in the compact unwind encoding. Instead it requires that
|
|
|
|
// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact
|
|
|
|
// encoding contains the offset to the nnnnnnnn value in the function in
|
|
|
|
// UNWIND_X86_64_FRAMELESS_STACK_SIZE.
|
|
|
|
// Since this means the unwinder has to look at the `subq` in the function
|
|
|
|
// of the unwind info's unwind address, two functions that have identical
|
|
|
|
// unwind info can't be folded if it's using this encoding since both
|
|
|
|
// entries need unique addresses.
|
|
|
|
static_assert(UNWIND_X86_64_MODE_MASK == UNWIND_X86_MODE_MASK, "");
|
|
|
|
static_assert(UNWIND_X86_64_MODE_STACK_IND == UNWIND_X86_MODE_STACK_IND, "");
|
|
|
|
if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) &&
|
|
|
|
(encoding & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) {
|
|
|
|
// FIXME: Consider passing in the two function addresses and getting
|
|
|
|
// their two stack sizes off the `subq` and only returning false if they're
|
|
|
|
// actually different.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-11-11 08:31:54 +08:00
|
|
|
template <class Ptr>
|
|
|
|
Reloc *
|
|
|
|
UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const {
|
|
|
|
if (isec == nullptr)
|
|
|
|
return nullptr;
|
|
|
|
auto it = llvm::find_if(isec->relocs, [](const Reloc &r) {
|
|
|
|
return r.offset % sizeof(CompactUnwindEntry<Ptr>) ==
|
|
|
|
offsetof(CompactUnwindEntry<Ptr>, lsda);
|
|
|
|
});
|
|
|
|
if (it == isec->relocs.end())
|
|
|
|
return nullptr;
|
|
|
|
return &*it;
|
|
|
|
}
|
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// Scan the __LD,__compact_unwind entries and compute the space needs of
|
|
|
|
// __TEXT,__unwind_info and __TEXT,__eh_frame
|
2021-04-16 09:14:33 +08:00
|
|
|
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
|
2021-10-27 04:04:06 +08:00
|
|
|
if (symbols.empty())
|
2020-08-21 04:05:13 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// At this point, the address space for __TEXT,__text has been
|
|
|
|
// assigned, so we can relocate the __LD,__compact_unwind entries
|
|
|
|
// into a temporary buffer. Relocation is necessary in order to sort
|
|
|
|
// the CU entries by function address. Sorting is necessary so that
|
|
|
|
// we can fold adjacent CU entries with identical
|
|
|
|
// encoding+personality+lsda. Folding is necessary because it reduces
|
|
|
|
// the number of CU entries by as much as 3 orders of magnitude!
|
2021-11-11 08:31:54 +08:00
|
|
|
cuEntries.resize(symbols.size());
|
|
|
|
// The "map" part of the symbols MapVector was only needed for deduplication
|
|
|
|
// in addSymbol(). Now that we are done adding, move the contents to a plain
|
|
|
|
// std::vector for indexed access.
|
|
|
|
symbolsVec = symbols.takeVector();
|
|
|
|
relocateCompactUnwind(cuEntries);
|
2021-06-22 10:29:11 +08:00
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// Rather than sort & fold the 32-byte entries directly, we create a
|
2021-11-11 08:31:54 +08:00
|
|
|
// vector of indices to entries and sort & fold that instead.
|
|
|
|
cuIndices.resize(cuEntries.size());
|
|
|
|
std::iota(cuIndices.begin(), cuIndices.end(), 0);
|
|
|
|
llvm::sort(cuIndices, [&](size_t a, size_t b) {
|
|
|
|
return cuEntries[a].functionAddress < cuEntries[b].functionAddress;
|
2021-04-26 13:23:32 +08:00
|
|
|
});
|
2020-08-21 04:05:13 +08:00
|
|
|
|
|
|
|
// Fold adjacent entries with matching encoding+personality+lsda
|
2021-11-11 08:31:54 +08:00
|
|
|
// We use three iterators on the same cuIndices to fold in-situ:
|
2020-08-21 04:05:13 +08:00
|
|
|
// (1) `foldBegin` is the first of a potential sequence of matching entries
|
|
|
|
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
|
|
|
|
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
|
|
|
|
// entries that can be folded into a single entry and written to ...
|
|
|
|
// (3) `foldWrite`
|
2021-11-11 08:31:54 +08:00
|
|
|
auto foldWrite = cuIndices.begin();
|
|
|
|
for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) {
|
2020-08-21 04:05:13 +08:00
|
|
|
auto foldEnd = foldBegin;
|
2021-11-11 08:31:54 +08:00
|
|
|
while (++foldEnd < cuIndices.end() &&
|
|
|
|
cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
|
|
|
|
cuEntries[*foldBegin].personality ==
|
|
|
|
cuEntries[*foldEnd].personality &&
|
|
|
|
canFoldEncoding(cuEntries[*foldEnd].encoding)) {
|
|
|
|
// In most cases, we can just compare the values of cuEntries[*].lsda.
|
|
|
|
// However, it is possible for -rename_section to cause the LSDA section
|
|
|
|
// (__gcc_except_tab) to be finalized after the unwind info section. In
|
|
|
|
// that case, we don't yet have unique addresses for the LSDA entries.
|
|
|
|
// So we check their relocations instead.
|
|
|
|
// FIXME: should we account for an LSDA at an absolute address? ld64 seems
|
|
|
|
// to support it, but it seems unlikely to be used in practice.
|
2021-11-16 02:46:59 +08:00
|
|
|
Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
|
|
|
|
Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
|
2021-11-11 08:31:54 +08:00
|
|
|
if (lsda1 == nullptr && lsda2 == nullptr)
|
|
|
|
continue;
|
|
|
|
if (lsda1 == nullptr || lsda2 == nullptr)
|
|
|
|
break;
|
2021-11-13 04:59:07 +08:00
|
|
|
if (lsda1->referent != lsda2->referent)
|
2021-11-11 08:31:54 +08:00
|
|
|
break;
|
|
|
|
if (lsda1->addend != lsda2->addend)
|
|
|
|
break;
|
|
|
|
}
|
2020-08-21 04:05:13 +08:00
|
|
|
*foldWrite++ = *foldBegin;
|
|
|
|
foldBegin = foldEnd;
|
|
|
|
}
|
2021-11-11 08:31:54 +08:00
|
|
|
cuIndices.erase(foldWrite, cuIndices.end());
|
2020-08-21 04:05:13 +08:00
|
|
|
|
2021-11-11 08:31:54 +08:00
|
|
|
encodePersonalities();
|
2021-02-09 02:47:33 +08:00
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// Count frequencies of the folded encodings
|
2020-12-07 14:33:38 +08:00
|
|
|
EncodingMap encodingFrequencies;
|
2021-11-11 08:31:54 +08:00
|
|
|
for (size_t idx : cuIndices)
|
|
|
|
encodingFrequencies[cuEntries[idx].encoding]++;
|
2020-08-21 04:05:13 +08:00
|
|
|
|
2020-12-07 14:33:38 +08:00
|
|
|
// Make a vector of encodings, sorted by descending frequency
|
2020-08-21 04:05:13 +08:00
|
|
|
for (const auto &frequency : encodingFrequencies)
|
|
|
|
commonEncodings.emplace_back(frequency);
|
2021-04-26 13:23:32 +08:00
|
|
|
llvm::sort(commonEncodings,
|
|
|
|
[](const std::pair<compact_unwind_encoding_t, size_t> &a,
|
|
|
|
const std::pair<compact_unwind_encoding_t, size_t> &b) {
|
|
|
|
if (a.second == b.second)
|
|
|
|
// When frequencies match, secondarily sort on encoding
|
|
|
|
// to maintain parity with validate-unwind-info.py
|
|
|
|
return a.first > b.first;
|
|
|
|
return a.second > b.second;
|
|
|
|
});
|
2020-08-21 04:05:13 +08:00
|
|
|
|
2020-12-07 14:33:38 +08:00
|
|
|
// Truncate the vector to 127 elements.
|
2021-01-02 11:28:11 +08:00
|
|
|
// Common encoding indexes are limited to 0..126, while encoding
|
2020-12-07 14:33:38 +08:00
|
|
|
// indexes 127..255 are local to each second-level page
|
|
|
|
if (commonEncodings.size() > COMMON_ENCODINGS_MAX)
|
|
|
|
commonEncodings.resize(COMMON_ENCODINGS_MAX);
|
|
|
|
|
|
|
|
// Create a map from encoding to common-encoding-table index
|
|
|
|
for (size_t i = 0; i < commonEncodings.size(); i++)
|
|
|
|
commonEncodingIndexes[commonEncodings[i].first] = i;
|
|
|
|
|
|
|
|
// Split folded encodings into pages, where each page is limited by ...
|
|
|
|
// (a) 4 KiB capacity
|
|
|
|
// (b) 24-bit difference between first & final function address
|
|
|
|
// (c) 8-bit compact-encoding-table index,
|
|
|
|
// for which 0..126 references the global common-encodings table,
|
|
|
|
// and 127..255 references a local per-second-level-page table.
|
|
|
|
// First we try the compact format and determine how many entries fit.
|
|
|
|
// If more entries fit in the regular format, we use that.
|
2021-11-11 08:31:54 +08:00
|
|
|
for (size_t i = 0; i < cuIndices.size();) {
|
|
|
|
size_t idx = cuIndices[i];
|
2020-12-07 14:33:38 +08:00
|
|
|
secondLevelPages.emplace_back();
|
2021-04-16 09:14:33 +08:00
|
|
|
SecondLevelPage &page = secondLevelPages.back();
|
2020-12-07 14:33:38 +08:00
|
|
|
page.entryIndex = i;
|
|
|
|
uintptr_t functionAddressMax =
|
2021-11-11 08:31:54 +08:00
|
|
|
cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
|
2020-12-07 14:33:38 +08:00
|
|
|
size_t n = commonEncodings.size();
|
|
|
|
size_t wordsRemaining =
|
|
|
|
SECOND_LEVEL_PAGE_WORDS -
|
|
|
|
sizeof(unwind_info_compressed_second_level_page_header) /
|
|
|
|
sizeof(uint32_t);
|
2021-11-11 08:31:54 +08:00
|
|
|
while (wordsRemaining >= 1 && i < cuIndices.size()) {
|
|
|
|
idx = cuIndices[i];
|
|
|
|
const CompactUnwindEntry<Ptr> *cuPtr = &cuEntries[idx];
|
2020-12-07 14:33:38 +08:00
|
|
|
if (cuPtr->functionAddress >= functionAddressMax) {
|
|
|
|
break;
|
|
|
|
} else if (commonEncodingIndexes.count(cuPtr->encoding) ||
|
|
|
|
page.localEncodingIndexes.count(cuPtr->encoding)) {
|
|
|
|
i++;
|
|
|
|
wordsRemaining--;
|
|
|
|
} else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) {
|
|
|
|
page.localEncodings.emplace_back(cuPtr->encoding);
|
|
|
|
page.localEncodingIndexes[cuPtr->encoding] = n++;
|
|
|
|
i++;
|
|
|
|
wordsRemaining -= 2;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
page.entryCount = i - page.entryIndex;
|
|
|
|
|
|
|
|
// If this is not the final page, see if it's possible to fit more
|
|
|
|
// entries by using the regular format. This can happen when there
|
|
|
|
// are many unique encodings, and we we saturated the local
|
|
|
|
// encoding table early.
|
2021-11-11 08:31:54 +08:00
|
|
|
if (i < cuIndices.size() &&
|
2020-12-07 14:33:38 +08:00
|
|
|
page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
|
|
|
|
page.kind = UNWIND_SECOND_LEVEL_REGULAR;
|
|
|
|
page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
|
2021-11-11 08:31:54 +08:00
|
|
|
cuIndices.size() - page.entryIndex);
|
2020-12-07 14:33:38 +08:00
|
|
|
i = page.entryIndex + page.entryCount;
|
|
|
|
} else {
|
|
|
|
page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
|
|
|
|
}
|
2020-08-21 04:05:13 +08:00
|
|
|
}
|
|
|
|
|
2021-11-11 08:31:54 +08:00
|
|
|
for (size_t idx : cuIndices) {
|
|
|
|
lsdaIndex[idx] = entriesWithLsda.size();
|
2021-11-16 02:46:59 +08:00
|
|
|
const Defined *d = symbolsVec[idx].second;
|
|
|
|
if (findLsdaReloc(d->unwindEntry))
|
2021-11-11 08:31:54 +08:00
|
|
|
entriesWithLsda.push_back(idx);
|
2021-02-09 02:47:34 +08:00
|
|
|
}
|
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// compute size of __TEXT,__unwind_info section
|
2021-11-11 08:31:54 +08:00
|
|
|
level2PagesOffset = sizeof(unwind_info_section_header) +
|
|
|
|
commonEncodings.size() * sizeof(uint32_t) +
|
|
|
|
personalities.size() * sizeof(uint32_t) +
|
|
|
|
// The extra second-level-page entry is for the sentinel
|
|
|
|
(secondLevelPages.size() + 1) *
|
|
|
|
sizeof(unwind_info_section_header_index_entry) +
|
|
|
|
entriesWithLsda.size() *
|
|
|
|
sizeof(unwind_info_section_header_lsda_index_entry);
|
2020-12-07 14:33:38 +08:00
|
|
|
unwindInfoSize =
|
|
|
|
level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
|
2020-08-21 04:05:13 +08:00
|
|
|
}
|
|
|
|
|
2020-12-02 09:27:33 +08:00
|
|
|
// All inputs are relocated and output addresses are known, so write!
|
2020-08-21 04:05:13 +08:00
|
|
|
|
2021-04-16 09:14:33 +08:00
|
|
|
template <class Ptr>
|
|
|
|
void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
|
2021-11-11 08:31:54 +08:00
|
|
|
assert(!cuIndices.empty() && "call only if there is unwind info");
|
2021-07-07 23:28:27 +08:00
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
// section header
|
|
|
|
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
|
|
|
|
uip->version = 1;
|
|
|
|
uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header);
|
|
|
|
uip->commonEncodingsArrayCount = commonEncodings.size();
|
|
|
|
uip->personalityArraySectionOffset =
|
|
|
|
uip->commonEncodingsArraySectionOffset +
|
|
|
|
(uip->commonEncodingsArrayCount * sizeof(uint32_t));
|
|
|
|
uip->personalityArrayCount = personalities.size();
|
|
|
|
uip->indexSectionOffset = uip->personalityArraySectionOffset +
|
|
|
|
(uip->personalityArrayCount * sizeof(uint32_t));
|
2020-12-07 14:33:38 +08:00
|
|
|
uip->indexCount = secondLevelPages.size() + 1;
|
2020-08-21 04:05:13 +08:00
|
|
|
|
|
|
|
// Common encodings
|
|
|
|
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
|
|
|
|
for (const auto &encoding : commonEncodings)
|
|
|
|
*i32p++ = encoding.first;
|
|
|
|
|
|
|
|
// Personalities
|
2021-08-26 10:46:48 +08:00
|
|
|
for (Ptr personality : personalities)
|
2021-04-16 09:14:33 +08:00
|
|
|
*i32p++ =
|
|
|
|
in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
|
2020-08-21 04:05:13 +08:00
|
|
|
|
|
|
|
// Level-1 index
|
|
|
|
uint32_t lsdaOffset =
|
|
|
|
uip->indexSectionOffset +
|
|
|
|
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
|
|
|
|
uint64_t l2PagesOffset = level2PagesOffset;
|
|
|
|
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
|
2020-12-07 14:33:38 +08:00
|
|
|
for (const SecondLevelPage &page : secondLevelPages) {
|
2021-11-11 08:31:54 +08:00
|
|
|
size_t idx = cuIndices[page.entryIndex];
|
|
|
|
iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr;
|
2020-08-21 04:05:13 +08:00
|
|
|
iep->secondLevelPagesSectionOffset = l2PagesOffset;
|
2021-02-09 02:47:34 +08:00
|
|
|
iep->lsdaIndexArraySectionOffset =
|
2021-11-11 08:31:54 +08:00
|
|
|
lsdaOffset + lsdaIndex.lookup(idx) *
|
2021-02-09 02:47:34 +08:00
|
|
|
sizeof(unwind_info_section_header_lsda_index_entry);
|
2020-08-21 04:05:13 +08:00
|
|
|
iep++;
|
2020-12-07 14:33:38 +08:00
|
|
|
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
|
2020-08-21 04:05:13 +08:00
|
|
|
}
|
|
|
|
// Level-1 sentinel
|
2021-11-11 08:31:54 +08:00
|
|
|
const CompactUnwindEntry<Ptr> &cuEnd = cuEntries[cuIndices.back()];
|
2021-07-04 10:23:42 +08:00
|
|
|
iep->functionOffset =
|
|
|
|
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
|
2020-08-21 04:05:13 +08:00
|
|
|
iep->secondLevelPagesSectionOffset = 0;
|
2021-02-09 02:47:34 +08:00
|
|
|
iep->lsdaIndexArraySectionOffset =
|
2021-11-11 08:31:54 +08:00
|
|
|
lsdaOffset + entriesWithLsda.size() *
|
|
|
|
sizeof(unwind_info_section_header_lsda_index_entry);
|
2020-08-21 04:05:13 +08:00
|
|
|
iep++;
|
|
|
|
|
|
|
|
// LSDAs
|
2021-11-11 08:31:54 +08:00
|
|
|
auto *lep =
|
|
|
|
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
|
|
|
|
for (size_t idx : entriesWithLsda) {
|
|
|
|
const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
|
|
|
|
const Defined *d = symbolsVec[idx].second;
|
2021-11-16 02:46:59 +08:00
|
|
|
if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
|
2021-11-13 04:59:07 +08:00
|
|
|
uint64_t va;
|
|
|
|
if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
|
|
|
|
va = isec->getVA(r->addend);
|
|
|
|
} else {
|
|
|
|
auto *sym = r->referent.get<Symbol *>();
|
|
|
|
va = sym->getVA() + r->addend;
|
|
|
|
}
|
|
|
|
lep->lsdaOffset = va - in.header->addr;
|
2021-11-11 08:31:54 +08:00
|
|
|
}
|
|
|
|
lep->functionOffset = cu.functionAddress - in.header->addr;
|
|
|
|
lep++;
|
|
|
|
}
|
2020-08-21 04:05:13 +08:00
|
|
|
|
|
|
|
// Level-2 pages
|
2021-11-11 08:31:54 +08:00
|
|
|
auto *pp = reinterpret_cast<uint32_t *>(lep);
|
2020-12-07 14:33:38 +08:00
|
|
|
for (const SecondLevelPage &page : secondLevelPages) {
|
|
|
|
if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
|
|
|
|
uintptr_t functionAddressBase =
|
2021-11-11 08:31:54 +08:00
|
|
|
cuEntries[cuIndices[page.entryIndex]].functionAddress;
|
2020-12-07 14:33:38 +08:00
|
|
|
auto *p2p =
|
|
|
|
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
|
|
|
|
pp);
|
|
|
|
p2p->kind = page.kind;
|
|
|
|
p2p->entryPageOffset =
|
|
|
|
sizeof(unwind_info_compressed_second_level_page_header);
|
|
|
|
p2p->entryCount = page.entryCount;
|
|
|
|
p2p->encodingsPageOffset =
|
|
|
|
p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
|
|
|
|
p2p->encodingsCount = page.localEncodings.size();
|
|
|
|
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
|
|
|
|
for (size_t i = 0; i < page.entryCount; i++) {
|
2021-11-11 08:31:54 +08:00
|
|
|
const CompactUnwindEntry<Ptr> &cue =
|
|
|
|
cuEntries[cuIndices[page.entryIndex + i]];
|
|
|
|
auto it = commonEncodingIndexes.find(cue.encoding);
|
2020-12-07 14:33:38 +08:00
|
|
|
if (it == commonEncodingIndexes.end())
|
2021-11-11 08:31:54 +08:00
|
|
|
it = page.localEncodingIndexes.find(cue.encoding);
|
2020-12-07 14:33:38 +08:00
|
|
|
*ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
|
2021-11-11 08:31:54 +08:00
|
|
|
(cue.functionAddress - functionAddressBase);
|
2020-12-07 14:33:38 +08:00
|
|
|
}
|
2021-10-27 03:14:25 +08:00
|
|
|
if (!page.localEncodings.empty())
|
2020-12-21 12:01:20 +08:00
|
|
|
memcpy(ep, page.localEncodings.data(),
|
|
|
|
page.localEncodings.size() * sizeof(uint32_t));
|
2020-12-07 14:33:38 +08:00
|
|
|
} else {
|
|
|
|
auto *p2p =
|
|
|
|
reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp);
|
|
|
|
p2p->kind = page.kind;
|
|
|
|
p2p->entryPageOffset =
|
|
|
|
sizeof(unwind_info_regular_second_level_page_header);
|
|
|
|
p2p->entryCount = page.entryCount;
|
|
|
|
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
|
|
|
|
for (size_t i = 0; i < page.entryCount; i++) {
|
2021-11-11 08:31:54 +08:00
|
|
|
const CompactUnwindEntry<Ptr> &cue =
|
|
|
|
cuEntries[cuIndices[page.entryIndex + i]];
|
|
|
|
*ep++ = cue.functionAddress;
|
|
|
|
*ep++ = cue.encoding;
|
2020-12-07 14:33:38 +08:00
|
|
|
}
|
2020-08-21 04:05:13 +08:00
|
|
|
}
|
2020-12-07 14:33:38 +08:00
|
|
|
pp += SECOND_LEVEL_PAGE_WORDS;
|
2020-08-21 04:05:13 +08:00
|
|
|
}
|
|
|
|
}
|
2021-04-16 09:14:33 +08:00
|
|
|
|
|
|
|
UnwindInfoSection *macho::makeUnwindInfoSection() {
|
|
|
|
if (target->wordSize == 8)
|
|
|
|
return make<UnwindInfoSectionImpl<uint64_t>>();
|
|
|
|
else
|
|
|
|
return make<UnwindInfoSectionImpl<uint32_t>>();
|
|
|
|
}
|