2015-05-29 03:09:30 +08:00
|
|
|
//===- Symbols.cpp --------------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-12-08 07:17:02 +08:00
|
|
|
#include "Symbols.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "InputFiles.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-29 04:39:17 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2018-03-01 01:38:19 +08:00
|
|
|
#include "lld/Common/Strings.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2019-09-27 20:23:45 +08:00
|
|
|
#include "llvm/Demangle/Demangle.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
2016-12-08 07:17:02 +08:00
|
|
|
using namespace llvm;
|
2015-05-29 03:09:30 +08:00
|
|
|
using namespace llvm::object;
|
|
|
|
|
2019-03-12 07:02:18 +08:00
|
|
|
using namespace lld::coff;
|
|
|
|
|
2019-07-19 21:29:10 +08:00
|
|
|
namespace lld {
|
|
|
|
|
2019-04-20 06:51:49 +08:00
|
|
|
static_assert(sizeof(SymbolUnion) <= 48,
|
|
|
|
"symbols should be optimized for memory usage");
|
|
|
|
|
2017-01-06 18:15:47 +08:00
|
|
|
// Returns a symbol name for an error message.
|
2019-09-27 20:23:45 +08:00
|
|
|
static std::string maybeDemangleSymbol(StringRef symName) {
|
2019-09-02 21:25:46 +08:00
|
|
|
if (config->demangle) {
|
2019-09-27 20:23:45 +08:00
|
|
|
std::string prefix;
|
2019-10-05 03:47:59 +08:00
|
|
|
StringRef prefixless = symName;
|
|
|
|
if (prefixless.consume_front("__imp_"))
|
2019-09-27 20:23:45 +08:00
|
|
|
prefix = "__declspec(dllimport) ";
|
2019-10-05 03:47:59 +08:00
|
|
|
StringRef demangleInput = prefixless;
|
2019-09-27 20:23:45 +08:00
|
|
|
if (config->machine == I386)
|
|
|
|
demangleInput.consume_front("_");
|
2020-01-29 03:23:46 +08:00
|
|
|
std::string demangled = demangle(std::string(demangleInput));
|
2019-09-27 20:23:45 +08:00
|
|
|
if (demangled != demangleInput)
|
2020-01-29 03:23:46 +08:00
|
|
|
return prefix + demangle(std::string(demangleInput));
|
2019-10-05 03:47:59 +08:00
|
|
|
return (prefix + prefixless).str();
|
2019-09-02 21:25:46 +08:00
|
|
|
}
|
2020-01-29 03:23:46 +08:00
|
|
|
return std::string(symName);
|
2017-01-06 18:15:47 +08:00
|
|
|
}
|
2019-09-27 20:23:45 +08:00
|
|
|
std::string toString(coff::Symbol &b) {
|
|
|
|
return maybeDemangleSymbol(b.getName());
|
|
|
|
}
|
2019-07-24 03:00:01 +08:00
|
|
|
std::string toCOFFString(const Archive::Symbol &b) {
|
2019-09-27 20:23:45 +08:00
|
|
|
return maybeDemangleSymbol(b.getName());
|
2019-07-24 03:00:01 +08:00
|
|
|
}
|
2017-01-06 18:15:47 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
namespace coff {
|
|
|
|
|
2020-05-03 10:53:49 +08:00
|
|
|
void Symbol::computeName() {
|
|
|
|
assert(nameData == nullptr &&
|
|
|
|
"should only compute the name once for DefinedCOFF symbols");
|
|
|
|
auto *d = cast<DefinedCOFF>(this);
|
2020-05-09 01:41:05 +08:00
|
|
|
StringRef nameStr =
|
|
|
|
check(cast<ObjFile>(d->file)->getCOFFObj()->getSymbolName(d->sym));
|
2020-05-03 10:53:49 +08:00
|
|
|
nameData = nameStr.data();
|
|
|
|
nameSize = nameStr.size();
|
|
|
|
assert(nameSize == nameStr.size() && "name length truncated");
|
[opt] Devirtualize the SymbolBody type hierarchy and start compacting
its members into the base class.
First, to help motivate this kind of change, understand that in
a self-link, LLD creates 5.5 million defined regular symbol bodies (and
6 million symbol bodies total). A significant portion of its time is
spent allocating the memory for these symbols, and befor ethis patch
the defined regular symbol body objects alone consumed some 420mb of
memory during the self link.
As a consequence, I think it is worth expending considerable effort to
make these objects as memory efficient as possible. This is the first of
several components of that. This change starts with the goal of removing
the virtual functins from SymbolBody so that it can avoid having a vptr
embedded in it when it already contains a "kind" member, and that member
can be much more compact than a vptr.
The primary way of doing this is to sink as much of the logic that we
would have to dispatch for into data in the base class. As part of this,
I made the various flags bits that will pack into a bitfield with the
kind tag. I also sank the Name down to eliminate the dispatch for that,
and used LLVM's RTTI-style dispatch for everything else (most of which
is cold and so doesn't matter terribly if we get minutely worse lowering
than a vtable dispatch).
As I was doing this, I wanted to make the RTTI-dispatch (which would
become much hotter than before) as efficient as possible, so I've
re-organized the tags somewhat. Notably, the common case (regular
defined symbols) is now zero which we can test for faster.
I also needed to rewrite the comparison routine used during resolving
symbols. This proved to be quite complex as the semantics of the
existing one were very subtle due to the back-and-forth virtual dispatch
caused by re-dispatching with reversed operands. I've consolidated it to
a single function and tried to comment it quite a bit more to help
explain what is going on. However, this may need more comments or other
explanations. It at least passes all the regression tests. I'm not
working on Windows, so I can't fully test it.
With all of these changes, the size of a DefinedRegular symbol on
a 64-bit build goes from 80 bytes to 64 bytes, and we save approximately
84mb or 20% of the memory consumed by these symbol bodies during the
link.
The link time appears marginally faster as well, and the profile hotness
of the memory allocation subsystem got a bit better, but there is still
a lot of allocation traffic.
Differential Revision: http://reviews.llvm.org/D10792
llvm-svn: 241001
2015-06-30 05:35:48 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
InputFile *Symbol::getFile() {
|
2016-12-08 07:17:02 +08:00
|
|
|
if (auto *sym = dyn_cast<DefinedCOFF>(this))
|
|
|
|
return sym->file;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (auto *sym = dyn_cast<LazyArchive>(this))
|
|
|
|
return sym->file;
|
|
|
|
if (auto *sym = dyn_cast<LazyObject>(this))
|
2016-12-08 07:17:02 +08:00
|
|
|
return sym->file;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
bool Symbol::isLive() const {
|
2017-07-28 02:25:59 +08:00
|
|
|
if (auto *r = dyn_cast<DefinedRegular>(this))
|
2018-08-31 15:45:20 +08:00
|
|
|
return r->getChunk()->live;
|
2017-07-28 02:25:59 +08:00
|
|
|
if (auto *imp = dyn_cast<DefinedImportData>(this))
|
|
|
|
return imp->file->live;
|
|
|
|
if (auto *imp = dyn_cast<DefinedImportThunk>(this))
|
2018-05-11 03:01:28 +08:00
|
|
|
return imp->wrappedSym->file->thunkLive;
|
2017-07-28 02:25:59 +08:00
|
|
|
// Assume any other kind of symbol is live.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
// MinGW specific.
|
|
|
|
void Symbol::replaceKeepingName(Symbol *other, size_t size) {
|
2019-04-20 06:51:49 +08:00
|
|
|
StringRef origName = getName();
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
memcpy(this, other, size);
|
2019-04-20 06:51:49 +08:00
|
|
|
nameData = origName.data();
|
|
|
|
nameSize = origName.size();
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
}
|
|
|
|
|
2015-07-10 01:43:50 +08:00
|
|
|
COFFSymbolRef DefinedCOFF::getCOFFSymbol() {
|
2017-07-27 07:05:24 +08:00
|
|
|
size_t symSize = cast<ObjFile>(file)->getCOFFObj()->getSymbolTableEntrySize();
|
2015-07-10 01:43:50 +08:00
|
|
|
if (symSize == sizeof(coff_symbol16))
|
|
|
|
return COFFSymbolRef(reinterpret_cast<const coff_symbol16 *>(sym));
|
|
|
|
assert(symSize == sizeof(coff_symbol32));
|
|
|
|
return COFFSymbolRef(reinterpret_cast<const coff_symbol32 *>(sym));
|
|
|
|
}
|
|
|
|
|
2018-02-18 04:41:38 +08:00
|
|
|
uint16_t DefinedAbsolute::numOutputSections;
|
2017-06-23 07:33:04 +08:00
|
|
|
|
2017-05-25 01:12:53 +08:00
|
|
|
static Chunk *makeImportThunk(DefinedImportData *s, uint16_t machine) {
|
|
|
|
if (machine == AMD64)
|
|
|
|
return make<ImportThunkChunkX64>(s);
|
|
|
|
if (machine == I386)
|
|
|
|
return make<ImportThunkChunkX86>(s);
|
2017-07-11 15:22:44 +08:00
|
|
|
if (machine == ARM64)
|
|
|
|
return make<ImportThunkChunkARM64>(s);
|
2017-05-25 01:12:53 +08:00
|
|
|
assert(machine == ARMNT);
|
|
|
|
return make<ImportThunkChunkARM>(s);
|
|
|
|
}
|
|
|
|
|
2015-07-25 09:16:06 +08:00
|
|
|
DefinedImportThunk::DefinedImportThunk(StringRef name, DefinedImportData *s,
|
2015-07-26 05:54:50 +08:00
|
|
|
uint16_t machine)
|
2017-05-25 06:30:06 +08:00
|
|
|
: Defined(DefinedImportThunkKind, name), wrappedSym(s),
|
2017-05-25 01:12:53 +08:00
|
|
|
data(makeImportThunk(s, machine)) {}
|
2015-07-25 09:16:06 +08:00
|
|
|
|
2015-07-04 13:28:41 +08:00
|
|
|
Defined *Undefined::getWeakAlias() {
|
|
|
|
// A weak alias may be a weak alias to another symbol, so check recursively.
|
2017-11-04 05:21:47 +08:00
|
|
|
for (Symbol *a = weakAlias; a; a = cast<Undefined>(a)->weakAlias)
|
2016-12-10 05:55:24 +08:00
|
|
|
if (auto *d = dyn_cast<Defined>(a))
|
2015-07-04 13:28:41 +08:00
|
|
|
return d;
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-08-23 03:40:07 +08:00
|
|
|
|
2019-09-04 04:32:16 +08:00
|
|
|
MemoryBufferRef LazyArchive::getMemberBuffer() {
|
2019-08-23 03:40:07 +08:00
|
|
|
Archive::Child c =
|
|
|
|
CHECK(sym.getMember(),
|
|
|
|
"could not get the member for symbol " + toCOFFString(sym));
|
|
|
|
return CHECK(c.getMemoryBufferRef(),
|
|
|
|
"could not get the buffer for the member defining symbol " +
|
|
|
|
toCOFFString(sym));
|
|
|
|
}
|
2017-01-06 18:04:08 +08:00
|
|
|
} // namespace coff
|
2015-05-29 03:09:30 +08:00
|
|
|
} // namespace lld
|