2015-08-14 22:12:54 +08:00
|
|
|
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
#define LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
|
2019-05-28 14:33:06 +08:00
|
|
|
#include "Symbols.h"
|
2016-10-19 01:50:36 +08:00
|
|
|
#include "llvm/ADT/CachedHashString.h"
|
2016-04-15 04:42:43 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2015-07-25 05:03:07 +08:00
|
|
|
|
|
|
|
namespace lld {
|
2016-02-28 08:25:54 +08:00
|
|
|
namespace elf {
|
2019-05-16 10:14:00 +08:00
|
|
|
|
2022-02-08 13:53:34 +08:00
|
|
|
class InputFile;
|
2022-02-24 13:36:45 +08:00
|
|
|
class SharedFile;
|
2022-02-08 13:53:34 +08:00
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
// SymbolTable is a bucket of all known symbols, including defined,
|
|
|
|
// undefined, or lazy symbols (the last one is symbols in archive
|
|
|
|
// files whose archive members are not yet loaded).
|
|
|
|
//
|
|
|
|
// We put all symbols of all files to a SymbolTable, and the
|
|
|
|
// SymbolTable selects the "best" symbols if there are name
|
|
|
|
// conflicts. For example, obviously, a defined symbol is better than
|
|
|
|
// an undefined symbol. Or, if there's a conflict between a lazy and a
|
|
|
|
// undefined, it'll read an archive member to read a real definition
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 12:55:03 +08:00
|
|
|
// to replace the lazy symbol. The logic is implemented in the
|
|
|
|
// add*() functions, which are called by input files as they are parsed. There
|
|
|
|
// is one add* function per symbol type.
|
2017-07-27 02:42:48 +08:00
|
|
|
class SymbolTable {
|
2019-11-21 03:16:15 +08:00
|
|
|
public:
|
2021-12-27 10:11:45 +08:00
|
|
|
ArrayRef<Symbol *> symbols() const { return symVector; }
|
2015-09-09 03:43:27 +08:00
|
|
|
|
2019-11-21 03:16:15 +08:00
|
|
|
void wrap(Symbol *sym, Symbol *real, Symbol *wrap);
|
|
|
|
|
2019-05-17 09:55:20 +08:00
|
|
|
Symbol *insert(StringRef name);
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 12:55:03 +08:00
|
|
|
|
2019-08-13 14:19:39 +08:00
|
|
|
Symbol *addSymbol(const Symbol &newSym);
|
2022-02-23 02:07:58 +08:00
|
|
|
Symbol *addAndCheckDuplicate(const Defined &newSym);
|
2017-02-22 06:32:51 +08:00
|
|
|
|
2016-04-23 04:21:26 +08:00
|
|
|
void scanVersionScript();
|
2016-06-23 15:00:17 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *find(StringRef name);
|
2016-07-18 01:50:09 +08:00
|
|
|
|
2017-09-09 02:16:59 +08:00
|
|
|
void handleDynamicList();
|
|
|
|
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 10:25:05 +08:00
|
|
|
// Set of .so files to not link the same shared object file more than once.
|
2022-01-17 13:19:01 +08:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, SharedFile *> soNames;
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 10:25:05 +08:00
|
|
|
|
2019-05-22 17:06:42 +08:00
|
|
|
// Comdat groups define "link once" sections. If two comdat groups have the
|
|
|
|
// same name, only one of them is linked, and the other is ignored. This map
|
|
|
|
// is used to uniquify them.
|
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, const InputFile *> comdatGroups;
|
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
private:
|
2021-12-24 08:49:38 +08:00
|
|
|
SmallVector<Symbol *, 0> findByVersion(SymbolVersion ver);
|
|
|
|
SmallVector<Symbol *, 0> findAllByVersion(SymbolVersion ver,
|
|
|
|
bool includeNonDefault);
|
2016-11-16 02:41:52 +08:00
|
|
|
|
2021-12-24 08:49:38 +08:00
|
|
|
llvm::StringMap<SmallVector<Symbol *, 0>> &getDemangledSyms();
|
2021-08-05 14:52:55 +08:00
|
|
|
bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
|
|
|
StringRef versionName, bool includeNonDefault);
|
|
|
|
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
|
|
|
|
bool includeNonDefault);
|
2016-07-16 20:26:39 +08:00
|
|
|
|
2015-09-18 02:26:25 +08:00
|
|
|
// The order the global symbols are in is not defined. We can use an arbitrary
|
|
|
|
// order, but it has to be reproducible. That is true even when cross linking.
|
|
|
|
// The default hashing of StringRef produces different results on 32 and 64
|
2016-04-15 04:42:43 +08:00
|
|
|
// bit systems so we use a map to a vector. That is arbitrary, deterministic
|
|
|
|
// but a bit inefficient.
|
2015-09-18 02:26:25 +08:00
|
|
|
// FIXME: Experiment with passing in a custom hashing or sorting the symbols
|
|
|
|
// once symbol resolution is finished.
|
2017-11-28 07:16:06 +08:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, int> symMap;
|
2021-12-24 08:49:38 +08:00
|
|
|
SmallVector<Symbol *, 0> symVector;
|
2015-09-04 02:56:20 +08:00
|
|
|
|
2016-11-16 02:41:52 +08:00
|
|
|
// A map from demangled symbol names to their symbol objects.
|
|
|
|
// This mapping is 1:N because two symbols with different versions
|
|
|
|
// can have the same name. We use this map to handle "extern C++ {}"
|
|
|
|
// directive in version scripts.
|
2021-12-24 08:49:38 +08:00
|
|
|
llvm::Optional<llvm::StringMap<SmallVector<Symbol *, 0>>> demangledSyms;
|
2015-07-25 05:03:07 +08:00
|
|
|
};
|
|
|
|
|
2021-12-23 06:36:14 +08:00
|
|
|
extern std::unique_ptr<SymbolTable> symtab;
|
2019-05-17 09:55:20 +08:00
|
|
|
|
2016-02-28 08:25:54 +08:00
|
|
|
} // namespace elf
|
2015-07-25 05:03:07 +08:00
|
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif
|