2015-08-14 22:12:54 +08:00
|
|
|
//===- SymbolTable.h --------------------------------------------*- C++ -*-===//
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLD_COFF_SYMBOL_TABLE_H
|
|
|
|
#define LLD_COFF_SYMBOL_TABLE_H
|
|
|
|
|
|
|
|
#include "InputFiles.h"
|
2017-02-03 07:58:14 +08:00
|
|
|
#include "LTO.h"
|
2016-12-12 06:15:30 +08:00
|
|
|
#include "llvm/ADT/CachedHashString.h"
|
2015-06-27 10:05:40 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/DenseMapInfo.h"
|
2015-06-27 02:58:24 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2015-06-10 01:52:17 +08:00
|
|
|
namespace llvm {
|
|
|
|
struct LTOCodeGenerator;
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
|
|
|
|
2015-06-30 02:50:11 +08:00
|
|
|
class Chunk;
|
2016-12-10 05:55:24 +08:00
|
|
|
class CommonChunk;
|
2015-06-30 02:50:11 +08:00
|
|
|
class Defined;
|
2016-12-09 04:20:22 +08:00
|
|
|
class DefinedAbsolute;
|
2019-01-30 10:17:27 +08:00
|
|
|
class DefinedRegular;
|
2016-12-09 04:20:22 +08:00
|
|
|
class DefinedRelative;
|
2015-06-30 02:50:11 +08:00
|
|
|
class Lazy;
|
2016-11-22 01:22:35 +08:00
|
|
|
class SectionChunk;
|
2017-11-04 05:21:47 +08:00
|
|
|
class Symbol;
|
2015-06-30 02:50:11 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// SymbolTable is a bucket of all known symbols, including defined,
|
|
|
|
// undefined, or lazy symbols (the last one is symbols in archive
|
|
|
|
// files whose archive members are not yet loaded).
|
|
|
|
//
|
|
|
|
// We put all symbols of all files to a SymbolTable, and the
|
|
|
|
// SymbolTable selects the "best" symbols if there are name
|
|
|
|
// conflicts. For example, obviously, a defined symbol is better than
|
|
|
|
// an undefined symbol. Or, if there's a conflict between a lazy and a
|
|
|
|
// undefined, it'll read an archive member to read a real definition
|
2016-12-10 05:55:24 +08:00
|
|
|
// to replace the lazy symbol. The logic is implemented in the
|
|
|
|
// add*() functions, which are called by input files as they are parsed.
|
|
|
|
// There is one add* function per symbol type.
|
2015-05-29 03:09:30 +08:00
|
|
|
class SymbolTable {
|
|
|
|
public:
|
2019-07-11 13:40:30 +08:00
|
|
|
void addFile(InputFile *file);
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2019-07-27 01:56:45 +08:00
|
|
|
// Emit errors for symbols that cannot be resolved.
|
|
|
|
void reportUnresolvable();
|
|
|
|
|
2016-12-10 05:55:24 +08:00
|
|
|
// Try to resolve any undefined symbols and update the symbol table
|
|
|
|
// accordingly, then print an error message for any remaining undefined
|
2019-07-27 01:56:45 +08:00
|
|
|
// symbols and warn about imported local symbols.
|
|
|
|
void resolveRemainingUndefines();
|
2015-05-29 03:09:30 +08:00
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
void loadMinGWAutomaticImports();
|
2019-07-11 13:40:30 +08:00
|
|
|
bool handleMinGWAutomaticImport(Symbol *sym, StringRef name);
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
// Returns a list of chunks of selected symbols.
|
|
|
|
std::vector<Chunk *> getChunks();
|
|
|
|
|
2015-07-02 11:59:04 +08:00
|
|
|
// Returns a symbol for a given name. Returns a nullptr if not found.
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *find(StringRef name);
|
|
|
|
Symbol *findUnderscore(StringRef name);
|
2015-05-31 11:34:08 +08:00
|
|
|
|
2015-07-02 08:04:14 +08:00
|
|
|
// Occasionally we have to resolve an undefined symbol to its
|
|
|
|
// mangled symbol. This function tries to find a mangled name
|
|
|
|
// for U from the symbol table, and if found, set the symbol as
|
|
|
|
// a weak alias for U.
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *findMangle(StringRef name);
|
2015-06-29 06:16:41 +08:00
|
|
|
|
2015-08-29 06:16:09 +08:00
|
|
|
// Build a set of COFF objects representing the combined contents of
|
|
|
|
// BitcodeFiles and add them to the symbol table. Called after all files are
|
|
|
|
// added and before the writer writes results to a file.
|
|
|
|
void addCombinedLTOObjects();
|
2017-02-07 04:47:55 +08:00
|
|
|
std::vector<StringRef> compileBitcodeFiles();
|
2015-06-02 04:10:10 +08:00
|
|
|
|
2015-06-01 03:55:40 +08:00
|
|
|
// Creates an Undefined symbol for a given name.
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *addUndefined(StringRef name);
|
2016-12-10 05:55:24 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *addSynthetic(StringRef n, Chunk *c);
|
|
|
|
Symbol *addAbsolute(StringRef n, uint64_t va);
|
2016-12-10 05:55:24 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias);
|
2019-07-19 21:29:10 +08:00
|
|
|
void addLazy(ArchiveFile *f, const Archive::Symbol &sym);
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *addAbsolute(StringRef n, COFFSymbolRef s);
|
|
|
|
Symbol *addRegular(InputFile *f, StringRef n,
|
|
|
|
const llvm::object::coff_symbol_generic *s = nullptr,
|
|
|
|
SectionChunk *c = nullptr);
|
2019-01-30 10:17:27 +08:00
|
|
|
std::pair<DefinedRegular *, bool>
|
2019-07-11 13:40:30 +08:00
|
|
|
addComdat(InputFile *f, StringRef n,
|
|
|
|
const llvm::object::coff_symbol_generic *s = nullptr);
|
|
|
|
Symbol *addCommon(InputFile *f, StringRef n, uint64_t size,
|
|
|
|
const llvm::object::coff_symbol_generic *s = nullptr,
|
|
|
|
CommonChunk *c = nullptr);
|
|
|
|
Symbol *addImportData(StringRef n, ImportFile *f);
|
|
|
|
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
|
|
|
|
uint16_t machine);
|
2019-08-23 03:40:07 +08:00
|
|
|
void addLibcall(StringRef name);
|
2016-12-10 05:55:24 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
void reportDuplicate(Symbol *existing, InputFile *newFile);
|
2015-06-01 03:55:40 +08:00
|
|
|
|
2015-06-25 11:31:47 +08:00
|
|
|
// A list of chunks which to be added to .rdata.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Chunk *> localImportChunks;
|
2015-06-25 11:31:47 +08:00
|
|
|
|
2017-07-28 02:25:59 +08:00
|
|
|
// Iterates symbols in non-determinstic hash table order.
|
2019-07-11 13:40:30 +08:00
|
|
|
template <typename T> void forEachSymbol(T callback) {
|
|
|
|
for (auto &pair : symMap)
|
|
|
|
callback(pair.second);
|
2017-07-28 02:25:59 +08:00
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
private:
|
2019-07-27 01:56:45 +08:00
|
|
|
/// Given a name without "__imp_" prefix, returns a defined symbol
|
|
|
|
/// with the "__imp_" prefix, if it exists.
|
|
|
|
Defined *impSymbol(StringRef name);
|
2018-09-07 04:23:56 +08:00
|
|
|
/// Inserts symbol if not already present.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::pair<Symbol *, bool> insert(StringRef name);
|
2019-07-16 16:26:38 +08:00
|
|
|
/// Same as insert(Name), but also sets isUsedInRegularObj.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::pair<Symbol *, bool> insert(StringRef name, InputFile *f);
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Symbol *> getSymsWithPrefix(StringRef prefix);
|
2015-07-01 03:35:21 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
|
|
|
|
std::unique_ptr<BitcodeCompiler> lto;
|
2015-05-29 03:09:30 +08:00
|
|
|
};
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
extern SymbolTable *symtab;
|
2016-12-10 05:55:24 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex);
|
2018-11-09 02:38:17 +08:00
|
|
|
|
2015-06-13 05:37:55 +08:00
|
|
|
} // namespace coff
|
2015-05-29 03:09:30 +08:00
|
|
|
} // namespace lld
|
|
|
|
|
|
|
|
#endif
|