2020-04-03 02:54:05 +08:00
|
|
|
//===- SymbolTable.cpp ----------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SymbolTable.h"
|
2020-12-14 11:31:33 +08:00
|
|
|
#include "Config.h"
|
2020-04-03 02:54:05 +08:00
|
|
|
#include "InputFiles.h"
|
|
|
|
#include "Symbols.h"
|
|
|
|
#include "lld/Common/ErrorHandler.h"
|
|
|
|
#include "lld/Common/Memory.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
2021-03-11 08:45:18 +08:00
|
|
|
Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
|
|
|
|
auto it = symMap.find(cachedName);
|
2020-04-03 02:54:05 +08:00
|
|
|
if (it == symMap.end())
|
|
|
|
return nullptr;
|
|
|
|
return symVector[it->second];
|
|
|
|
}
|
|
|
|
|
2021-04-16 09:14:29 +08:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
|
|
|
|
const InputFile *file) {
|
2020-04-03 02:54:05 +08:00
|
|
|
auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
|
|
|
|
|
2021-04-16 09:14:29 +08:00
|
|
|
Symbol *sym;
|
|
|
|
if (!p.second) {
|
|
|
|
// Name already present in the symbol table.
|
|
|
|
sym = symVector[p.first->second];
|
|
|
|
} else {
|
|
|
|
// Name is a new symbol.
|
|
|
|
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
|
|
|
symVector.push_back(sym);
|
|
|
|
}
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-04-16 09:14:29 +08:00
|
|
|
sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
|
|
|
|
return {sym, p.second};
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2021-03-13 06:26:12 +08:00
|
|
|
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
|
2021-04-02 08:48:09 +08:00
|
|
|
InputSection *isec, uint64_t value,
|
|
|
|
uint64_t size, bool isWeakDef,
|
2021-05-17 21:15:39 +08:00
|
|
|
bool isPrivateExtern, bool isThumb,
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
bool isReferencedDynamically,
|
|
|
|
bool noDeadStrip) {
|
2020-04-03 02:54:05 +08:00
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2020-08-28 06:59:30 +08:00
|
|
|
bool overridesWeakDef = false;
|
2021-04-16 09:14:29 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, file);
|
2020-04-03 02:54:05 +08:00
|
|
|
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
assert(!isWeakDef || (isa<BitcodeFile>(file) && !isec) ||
|
2021-07-02 08:33:55 +08:00
|
|
|
(isa<ObjFile>(file) && file == isec->getFile()));
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
|
2020-07-25 06:55:25 +08:00
|
|
|
if (!wasInserted) {
|
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
if (isWeakDef) {
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
if (defined->isWeakDef()) {
|
2021-05-17 21:15:39 +08:00
|
|
|
// Both old and new symbol weak (e.g. inline function in two TUs):
|
|
|
|
// If one of them isn't private extern, the merged symbol isn't.
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
defined->privateExtern &= isPrivateExtern;
|
2021-05-17 21:15:39 +08:00
|
|
|
defined->referencedDynamically |= isReferencedDynamically;
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
defined->noDeadStrip |= noDeadStrip;
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
|
|
|
|
// FIXME: Handle this for bitcode files.
|
|
|
|
// FIXME: We currently only do this if both symbols are weak.
|
|
|
|
// We could do this if either is weak (but getting the
|
|
|
|
// case where !isWeakDef && defined->isWeakDef() right
|
|
|
|
// requires some care and testing).
|
2021-06-12 07:49:52 +08:00
|
|
|
if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
|
|
|
|
concatIsec->wasCoalesced = true;
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-07 02:47:57 +08:00
|
|
|
}
|
|
|
|
|
2021-03-13 06:26:12 +08:00
|
|
|
return defined;
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
}
|
2021-03-17 12:34:28 +08:00
|
|
|
if (!defined->isWeakDef())
|
2021-02-04 02:31:40 +08:00
|
|
|
error("duplicate symbol: " + name + "\n>>> defined in " +
|
|
|
|
toString(defined->getFile()) + "\n>>> defined in " +
|
|
|
|
toString(file));
|
2020-08-28 06:59:30 +08:00
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
|
|
|
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
|
2021-06-01 10:12:35 +08:00
|
|
|
dysym->unreference();
|
2020-07-25 06:55:25 +08:00
|
|
|
}
|
|
|
|
// Defined symbols take priority over other types of symbols, so in case
|
|
|
|
// of a name conflict, we fall through to the replaceSymbol() call below.
|
|
|
|
}
|
|
|
|
|
2021-05-17 21:15:39 +08:00
|
|
|
Defined *defined = replaceSymbol<Defined>(
|
|
|
|
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip);
|
2020-08-28 06:59:30 +08:00
|
|
|
defined->overridesWeakDef = overridesWeakDef;
|
2021-03-13 06:26:12 +08:00
|
|
|
return defined;
|
2020-04-03 02:54:05 +08:00
|
|
|
}
|
|
|
|
|
2021-02-04 02:31:40 +08:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
|
|
|
|
bool isWeakRef) {
|
2020-04-03 02:54:05 +08:00
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2021-04-16 09:14:29 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, file);
|
2020-04-03 02:54:05 +08:00
|
|
|
|
2021-03-10 12:15:29 +08:00
|
|
|
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
|
2020-12-16 10:05:06 +08:00
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
if (wasInserted)
|
2021-02-04 02:31:40 +08:00
|
|
|
replaceSymbol<Undefined>(s, name, file, refState);
|
2020-12-16 10:05:06 +08:00
|
|
|
else if (auto *lazy = dyn_cast<LazySymbol>(s))
|
2020-05-15 03:43:51 +08:00
|
|
|
lazy->fetchArchiveMember();
|
2020-12-16 10:05:06 +08:00
|
|
|
else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
|
2021-06-01 10:12:35 +08:00
|
|
|
dynsym->reference(refState);
|
2020-12-16 10:05:06 +08:00
|
|
|
else if (auto *undefined = dyn_cast<Undefined>(s))
|
|
|
|
undefined->refState = std::max(undefined->refState, refState);
|
2020-04-03 02:54:05 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2020-09-25 05:44:14 +08:00
|
|
|
Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
uint32_t align, bool isPrivateExtern) {
|
2020-09-25 05:44:14 +08:00
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2021-04-16 09:14:29 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, file);
|
2020-09-25 05:44:14 +08:00
|
|
|
|
|
|
|
if (!wasInserted) {
|
|
|
|
if (auto *common = dyn_cast<CommonSymbol>(s)) {
|
|
|
|
if (size < common->size)
|
|
|
|
return s;
|
2020-09-25 06:00:56 +08:00
|
|
|
} else if (isa<Defined>(s)) {
|
2020-09-25 05:44:14 +08:00
|
|
|
return s;
|
|
|
|
}
|
2020-09-25 06:00:56 +08:00
|
|
|
// Common symbols take priority over all non-Defined symbols, so in case of
|
|
|
|
// a name conflict, we fall through to the replaceSymbol() call below.
|
2020-09-25 05:44:14 +08:00
|
|
|
}
|
|
|
|
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-18 02:30:18 +08:00
|
|
|
replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern);
|
2020-09-25 05:44:14 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2020-08-13 10:50:09 +08:00
|
|
|
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
|
|
|
|
bool isTlv) {
|
2020-04-22 04:37:57 +08:00
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2021-04-16 09:14:29 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, file);
|
2020-04-22 04:37:57 +08:00
|
|
|
|
2021-03-10 12:15:29 +08:00
|
|
|
RefState refState = RefState::Unreferenced;
|
2020-12-16 10:05:06 +08:00
|
|
|
if (!wasInserted) {
|
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
|
|
|
if (isWeakDef && !defined->isWeakDef())
|
2020-08-28 06:59:30 +08:00
|
|
|
defined->overridesWeakDef = true;
|
2020-12-16 10:05:06 +08:00
|
|
|
} else if (auto *undefined = dyn_cast<Undefined>(s)) {
|
|
|
|
refState = undefined->refState;
|
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
2021-06-01 10:12:35 +08:00
|
|
|
refState = dysym->getRefState();
|
2020-12-16 10:05:06 +08:00
|
|
|
}
|
|
|
|
}
|
2020-08-28 06:59:30 +08:00
|
|
|
|
2021-02-26 08:56:31 +08:00
|
|
|
bool isDynamicLookup = file == nullptr;
|
2020-07-25 06:55:25 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) ||
|
2021-02-26 08:56:31 +08:00
|
|
|
(isa<DylibSymbol>(s) &&
|
|
|
|
((!isWeakDef && s->isWeakDef()) ||
|
2021-06-01 10:12:35 +08:00
|
|
|
(!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) {
|
|
|
|
if (auto *dynsym = dyn_cast<DylibSymbol>(s))
|
|
|
|
dynsym->unreference();
|
2020-12-16 10:05:06 +08:00
|
|
|
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
|
2021-06-01 10:12:35 +08:00
|
|
|
}
|
2020-07-25 06:55:25 +08:00
|
|
|
|
2020-04-22 04:37:57 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-02-26 08:56:31 +08:00
|
|
|
Symbol *SymbolTable::addDynamicLookup(StringRef name) {
|
|
|
|
return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false);
|
|
|
|
}
|
|
|
|
|
2020-05-15 03:43:51 +08:00
|
|
|
Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
|
2021-01-10 00:58:19 +08:00
|
|
|
const object::Archive::Symbol &sym) {
|
2020-05-15 03:43:51 +08:00
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2021-04-16 09:14:29 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, file);
|
2020-05-15 03:43:51 +08:00
|
|
|
|
|
|
|
if (wasInserted)
|
|
|
|
replaceSymbol<LazySymbol>(s, file, sym);
|
2020-07-25 06:55:25 +08:00
|
|
|
else if (isa<Undefined>(s) || (isa<DylibSymbol>(s) && s->isWeakDef()))
|
2020-05-15 03:43:51 +08:00
|
|
|
file->fetch(sym);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-03-13 06:26:12 +08:00
|
|
|
Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
|
2021-04-07 05:52:30 +08:00
|
|
|
uint64_t value, bool isPrivateExtern,
|
2021-05-17 21:15:39 +08:00
|
|
|
bool includeInSymtab,
|
|
|
|
bool referencedDynamically) {
|
2021-04-07 03:09:14 +08:00
|
|
|
Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
|
2021-05-01 04:17:26 +08:00
|
|
|
/*isWeakDef=*/false, isPrivateExtern,
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-08 05:10:05 +08:00
|
|
|
/*isThumb=*/false, referencedDynamically,
|
|
|
|
/*noDeadStrip=*/false);
|
2021-03-19 06:49:45 +08:00
|
|
|
s->includeInSymtab = includeInSymtab;
|
2020-07-31 05:28:41 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-05-11 03:45:18 +08:00
|
|
|
void lld::macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) {
|
2021-07-22 23:20:36 +08:00
|
|
|
// Handle -U.
|
|
|
|
if (config->explicitDynamicLookups.count(sym.getName())) {
|
|
|
|
symtab->addDynamicLookup(sym.getName());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle -undefined.
|
2021-05-11 03:45:18 +08:00
|
|
|
auto message = [source, &sym]() {
|
2021-07-06 08:52:09 +08:00
|
|
|
std::string message = "undefined symbol";
|
|
|
|
if (config->archMultiple)
|
|
|
|
message += (" for arch " + getArchitectureName(config->arch())).str();
|
|
|
|
message += ": " + toString(sym);
|
2021-05-11 03:45:18 +08:00
|
|
|
if (!source.empty())
|
|
|
|
message += "\n>>> referenced by " + source.str();
|
|
|
|
else
|
|
|
|
message += "\n>>> referenced by " + toString(sym.getFile());
|
2021-03-01 02:42:14 +08:00
|
|
|
return message;
|
|
|
|
};
|
2020-12-14 11:31:33 +08:00
|
|
|
switch (config->undefinedSymbolTreatment) {
|
|
|
|
case UndefinedSymbolTreatment::error:
|
2021-05-11 03:45:18 +08:00
|
|
|
error(message());
|
2020-12-14 11:31:33 +08:00
|
|
|
break;
|
|
|
|
case UndefinedSymbolTreatment::warning:
|
2021-05-11 03:45:18 +08:00
|
|
|
warn(message());
|
2021-03-01 02:42:14 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2020-12-14 11:31:33 +08:00
|
|
|
case UndefinedSymbolTreatment::dynamic_lookup:
|
2021-03-01 02:42:14 +08:00
|
|
|
case UndefinedSymbolTreatment::suppress:
|
|
|
|
symtab->addDynamicLookup(sym.getName());
|
2020-12-14 11:31:33 +08:00
|
|
|
break;
|
|
|
|
case UndefinedSymbolTreatment::unknown:
|
|
|
|
llvm_unreachable("unknown -undefined TREATMENT");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-03 02:54:05 +08:00
|
|
|
SymbolTable *macho::symtab;
|