2015-08-14 22:12:54 +08:00
|
|
|
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
#define LLD_ELF_SYMBOL_TABLE_H
|
|
|
|
|
|
|
|
#include "InputFiles.h"
|
2016-03-23 04:52:10 +08:00
|
|
|
#include "LTO.h"
|
2018-03-01 01:38:19 +08:00
|
|
|
#include "lld/Common/Strings.h"
|
2016-10-19 01:50:36 +08:00
|
|
|
#include "llvm/ADT/CachedHashString.h"
|
2016-04-15 04:42:43 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2015-07-25 05:03:07 +08:00
|
|
|
|
|
|
|
namespace lld {
|
2016-02-28 08:25:54 +08:00
|
|
|
namespace elf {
|
2019-05-16 10:14:00 +08:00
|
|
|
|
2019-05-16 11:29:03 +08:00
|
|
|
class CommonSymbol;
|
2017-12-10 00:56:18 +08:00
|
|
|
class Defined;
|
2019-05-16 10:14:00 +08:00
|
|
|
class LazyArchive;
|
|
|
|
class LazyObject;
|
2017-12-10 00:56:18 +08:00
|
|
|
class SectionBase;
|
2019-05-16 10:14:00 +08:00
|
|
|
class SharedSymbol;
|
|
|
|
class Undefined;
|
2017-06-30 08:34:35 +08:00
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
// SymbolTable is a bucket of all known symbols, including defined,
|
|
|
|
// undefined, or lazy symbols (the last one is symbols in archive
|
|
|
|
// files whose archive members are not yet loaded).
|
|
|
|
//
|
|
|
|
// We put all symbols of all files to a SymbolTable, and the
|
|
|
|
// SymbolTable selects the "best" symbols if there are name
|
|
|
|
// conflicts. For example, obviously, a defined symbol is better than
|
|
|
|
// an undefined symbol. Or, if there's a conflict between a lazy and a
|
|
|
|
// undefined, it'll read an archive member to read a real definition
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 12:55:03 +08:00
|
|
|
// to replace the lazy symbol. The logic is implemented in the
|
|
|
|
// add*() functions, which are called by input files as they are parsed. There
|
|
|
|
// is one add* function per symbol type.
|
2017-07-27 02:42:48 +08:00
|
|
|
class SymbolTable {
|
2015-07-25 05:03:07 +08:00
|
|
|
public:
|
2017-07-27 02:42:48 +08:00
|
|
|
template <class ELFT> void addCombinedLTOObject();
|
Change how we handle -wrap.
We have an issue with -wrap that the option doesn't work well when
renamed symbols get PLT entries. I'll explain what is the issue and
how this patch solves it.
For one -wrap option, we have three symbols: foo, wrap_foo and real_foo.
Currently, we use memcpy to overwrite wrapped symbols so that they get
the same contents. This works in most cases but doesn't when the relocation
processor sets some flags in the symbol. memcpy'ed symbols are just
aliases, so they always have to have the same contents, but the
relocation processor breaks that assumption.
r336609 is an attempt to fix the issue by memcpy'ing again after
processing relocations, so that symbols that are out of sync get the
same contents again. That works in most cases as well, but it breaks
ASan build in a mysterious way.
We could probably fix the issue by choosing symbol attributes that need
to be copied after they are updated. But it feels too complicated to me.
So, in this patch, I fixed it once and for all. With this patch, we no
longer memcpy symbols. All references to renamed symbols point to new
symbols after wrapSymbols() is done.
Differential Revision: https://reviews.llvm.org/D50569
llvm-svn: 340387
2018-08-22 15:02:26 +08:00
|
|
|
void wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap);
|
2015-07-25 05:03:07 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
ArrayRef<Symbol *> getSymbols() const { return SymVector; }
|
2015-09-09 03:43:27 +08:00
|
|
|
|
2019-05-16 11:45:13 +08:00
|
|
|
Symbol *addUndefined(const Undefined &New);
|
2019-05-16 11:29:03 +08:00
|
|
|
Symbol *addDefined(const Defined &New);
|
2019-05-16 10:14:00 +08:00
|
|
|
void addShared(const SharedSymbol &New);
|
2019-05-16 11:45:13 +08:00
|
|
|
void addLazyArchive(const LazyArchive &New);
|
|
|
|
void addLazyObject(const LazyObject &New);
|
2019-05-16 10:14:00 +08:00
|
|
|
Symbol *addBitcode(const Defined &New);
|
2019-05-16 11:29:03 +08:00
|
|
|
Symbol *addCommon(const CommonSymbol &New);
|
ELF: New symbol table design.
This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.
Performance numbers:
old(s) new(s)
Without debug info:
chrome 7.178 6.432 (-11.5%)
LLVMgold.so 0.505 0.502 (-0.5%)
clang 0.954 0.827 (-15.4%)
llvm-as 0.052 0.045 (-15.5%)
With debug info:
scylla 5.695 5.613 (-1.5%)
clang 14.396 14.143 (-1.8%)
Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.
The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.
In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.
I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.
This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.
[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html
Differential Revision: http://reviews.llvm.org/D19752
llvm-svn: 268178
2016-05-01 12:55:03 +08:00
|
|
|
|
2019-05-16 11:29:03 +08:00
|
|
|
Symbol *insert(const Symbol &New);
|
|
|
|
void mergeProperties(Symbol *Old, const Symbol &New);
|
2017-02-22 06:32:51 +08:00
|
|
|
|
2019-05-16 11:45:13 +08:00
|
|
|
void fetchLazy(Symbol *Sym);
|
2018-04-04 01:16:52 +08:00
|
|
|
|
2016-04-23 04:21:26 +08:00
|
|
|
void scanVersionScript();
|
2016-06-23 15:00:17 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *find(StringRef Name);
|
2016-07-18 01:50:09 +08:00
|
|
|
|
|
|
|
void trace(StringRef Name);
|
2015-10-02 05:22:26 +08:00
|
|
|
|
2017-09-09 02:16:59 +08:00
|
|
|
void handleDynamicList();
|
|
|
|
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 10:25:05 +08:00
|
|
|
// Set of .so files to not link the same shared object file more than once.
|
2019-04-09 01:35:55 +08:00
|
|
|
llvm::DenseMap<StringRef, SharedFile *> SoNames;
|
[ELF] Support --{,no-}allow-shlib-undefined
Summary:
In ld.bfd/gold, --no-allow-shlib-undefined is the default when linking
an executable. This patch implements a check to error on undefined
symbols in a shared object, if all of its DT_NEEDED entries are seen.
Our approach resembles the one used in gold, achieves a good balance to
be useful but not too smart (ld.bfd traces all DSOs and emulates the
behavior of a dynamic linker to catch more cases).
The error is issued based on the symbol table, different from undefined
reference errors issued for relocations. It is most effective when there
are DSOs that were not linked with -z defs (e.g. when static sanitizers
runtime is used).
gold has a comment that some system libraries on GNU/Linux may have
spurious undefined references and thus system libraries should be
excluded (https://sourceware.org/bugzilla/show_bug.cgi?id=6811). The
story may have changed now but we make --allow-shlib-undefined the
default for now. Its interaction with -shared can be discussed in the
future.
Reviewers: ruiu, grimar, pcc, espindola
Reviewed By: ruiu
Subscribers: joerg, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D57385
llvm-svn: 352826
2019-02-01 10:25:05 +08:00
|
|
|
|
2015-07-25 05:03:07 +08:00
|
|
|
private:
|
2019-05-16 11:45:13 +08:00
|
|
|
template <class LazyT> void addLazy(const LazyT &New);
|
2018-10-11 06:49:29 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
std::vector<Symbol *> findByVersion(SymbolVersion Ver);
|
|
|
|
std::vector<Symbol *> findAllByVersion(SymbolVersion Ver);
|
2016-11-16 02:41:52 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
|
2016-09-14 04:51:30 +08:00
|
|
|
void handleAnonymousVersion();
|
2016-11-17 11:39:21 +08:00
|
|
|
void assignExactVersion(SymbolVersion Ver, uint16_t VersionId,
|
2016-11-17 10:09:42 +08:00
|
|
|
StringRef VersionName);
|
2016-11-17 11:39:21 +08:00
|
|
|
void assignWildcardVersion(SymbolVersion Ver, uint16_t VersionId);
|
2016-07-16 20:26:39 +08:00
|
|
|
|
2015-09-18 02:26:25 +08:00
|
|
|
// The order the global symbols are in is not defined. We can use an arbitrary
|
|
|
|
// order, but it has to be reproducible. That is true even when cross linking.
|
|
|
|
// The default hashing of StringRef produces different results on 32 and 64
|
2016-04-15 04:42:43 +08:00
|
|
|
// bit systems so we use a map to a vector. That is arbitrary, deterministic
|
|
|
|
// but a bit inefficient.
|
2015-09-18 02:26:25 +08:00
|
|
|
// FIXME: Experiment with passing in a custom hashing or sorting the symbols
|
|
|
|
// once symbol resolution is finished.
|
2017-11-28 07:16:06 +08:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, int> SymMap;
|
2017-11-04 05:21:47 +08:00
|
|
|
std::vector<Symbol *> SymVector;
|
2015-09-04 02:56:20 +08:00
|
|
|
|
2016-01-09 06:14:15 +08:00
|
|
|
// Comdat groups define "link once" sections. If two comdat groups have the
|
|
|
|
// same name, only one of them is linked, and the other is ignored. This set
|
|
|
|
// is used to uniquify them.
|
2017-05-26 05:53:02 +08:00
|
|
|
llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups;
|
2015-10-10 03:25:07 +08:00
|
|
|
|
2016-11-16 02:41:52 +08:00
|
|
|
// A map from demangled symbol names to their symbol objects.
|
|
|
|
// This mapping is 1:N because two symbols with different versions
|
|
|
|
// can have the same name. We use this map to handle "extern C++ {}"
|
|
|
|
// directive in version scripts.
|
2017-11-04 05:21:47 +08:00
|
|
|
llvm::Optional<llvm::StringMap<std::vector<Symbol *>>> DemangledSyms;
|
2016-11-16 02:41:52 +08:00
|
|
|
|
|
|
|
// For LTO.
|
2016-11-26 13:37:04 +08:00
|
|
|
std::unique_ptr<BitcodeCompiler> LTO;
|
2015-07-25 05:03:07 +08:00
|
|
|
};
|
|
|
|
|
2017-07-27 02:42:48 +08:00
|
|
|
extern SymbolTable *Symtab;
|
2016-02-28 08:25:54 +08:00
|
|
|
} // namespace elf
|
2015-07-25 05:03:07 +08:00
|
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif
|