2015-05-29 03:09:30 +08:00
|
|
|
//===- SymbolTable.cpp ----------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-12-09 04:20:22 +08:00
|
|
|
#include "SymbolTable.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "Config.h"
|
|
|
|
#include "Driver.h"
|
2017-02-03 07:58:14 +08:00
|
|
|
#include "LTO.h"
|
2018-04-18 07:32:33 +08:00
|
|
|
#include "PDB.h"
|
2015-06-30 02:50:11 +08:00
|
|
|
#include "Symbols.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-29 04:39:17 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2018-01-18 03:16:26 +08:00
|
|
|
#include "lld/Common/Timer.h"
|
2015-12-04 10:42:47 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2019-06-12 19:32:43 +08:00
|
|
|
#include "llvm/Object/WindowsMachineFlag.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-06-29 06:16:41 +08:00
|
|
|
#include <utility>
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2015-05-31 11:57:30 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
static Timer ltoTimer("LTO", Timer::root());
|
2018-01-18 03:16:26 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
SymbolTable *symtab;
|
2015-09-22 03:12:36 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
void SymbolTable::addFile(InputFile *file) {
|
|
|
|
log("Reading " + toString(file));
|
|
|
|
file->parse();
|
2016-12-12 06:15:25 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
MachineTypes mt = file->getMachineType();
|
|
|
|
if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
|
|
|
|
config->machine = mt;
|
|
|
|
} else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
|
|
|
|
error(toString(file) + ": machine type " + machineToStr(mt) +
|
|
|
|
" conflicts with " + machineToStr(config->machine));
|
2018-05-23 04:20:25 +08:00
|
|
|
return;
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
2016-12-12 06:15:25 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (auto *f = dyn_cast<ObjFile>(file)) {
|
|
|
|
ObjFile::instances.push_back(f);
|
|
|
|
} else if (auto *f = dyn_cast<BitcodeFile>(file)) {
|
|
|
|
BitcodeFile::instances.push_back(f);
|
|
|
|
} else if (auto *f = dyn_cast<ImportFile>(file)) {
|
|
|
|
ImportFile::instances.push_back(f);
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
driver->parseDirectives(file);
|
2015-07-02 10:38:59 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
static void errorOrWarn(const Twine &s) {
|
|
|
|
if (config->forceUnresolved)
|
|
|
|
warn(s);
|
2017-10-07 07:43:54 +08:00
|
|
|
else
|
2019-07-11 13:40:30 +08:00
|
|
|
error(s);
|
2017-10-07 07:43:54 +08:00
|
|
|
}
|
|
|
|
|
2018-09-16 02:27:09 +08:00
|
|
|
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
|
|
|
|
// This is generally the global variable or function whose definition contains
|
|
|
|
// Addr.
|
2019-07-11 13:40:30 +08:00
|
|
|
static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
|
|
|
|
DefinedRegular *candidate = nullptr;
|
2018-04-18 07:32:33 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
for (Symbol *s : sc->file->getSymbols()) {
|
|
|
|
auto *d = dyn_cast_or_null<DefinedRegular>(s);
|
2019-07-16 01:51:02 +08:00
|
|
|
if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
|
2019-07-11 13:40:30 +08:00
|
|
|
(candidate && d->getValue() < candidate->getValue()))
|
2018-04-18 07:32:33 +08:00
|
|
|
continue;
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
candidate = d;
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
return candidate;
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
|
2019-06-25 17:55:55 +08:00
|
|
|
// Given a file and the index of a symbol in that file, returns a description
|
|
|
|
// of all references to that symbol from that file. If no debug information is
|
|
|
|
// available, returns just the name of the file, else one string per actual
|
|
|
|
// reference as described in the debug info.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
|
2018-04-18 07:32:33 +08:00
|
|
|
struct Location {
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *sym;
|
|
|
|
std::pair<StringRef, uint32_t> fileLine;
|
2018-04-18 07:32:33 +08:00
|
|
|
};
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Location> locations;
|
2018-04-18 07:32:33 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
for (Chunk *c : file->getChunks()) {
|
|
|
|
auto *sc = dyn_cast<SectionChunk>(c);
|
|
|
|
if (!sc)
|
2018-04-18 07:32:33 +08:00
|
|
|
continue;
|
2019-07-11 13:40:30 +08:00
|
|
|
for (const coff_relocation &r : sc->getRelocs()) {
|
|
|
|
if (r.SymbolTableIndex != symIndex)
|
2018-04-18 07:32:33 +08:00
|
|
|
continue;
|
2019-07-11 13:40:30 +08:00
|
|
|
std::pair<StringRef, uint32_t> fileLine =
|
|
|
|
getFileLine(sc, r.VirtualAddress);
|
|
|
|
Symbol *sym = getSymbol(sc, r.VirtualAddress);
|
|
|
|
if (!fileLine.first.empty() || sym)
|
|
|
|
locations.push_back({sym, fileLine});
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (locations.empty())
|
|
|
|
return std::vector<std::string>({"\n>>> referenced by " + toString(file)});
|
2018-04-18 07:32:33 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<std::string> symbolLocations(locations.size());
|
|
|
|
size_t i = 0;
|
|
|
|
for (Location loc : locations) {
|
|
|
|
llvm::raw_string_ostream os(symbolLocations[i++]);
|
|
|
|
os << "\n>>> referenced by ";
|
|
|
|
if (!loc.fileLine.first.empty())
|
|
|
|
os << loc.fileLine.first << ":" << loc.fileLine.second
|
2018-04-18 07:32:33 +08:00
|
|
|
<< "\n>>> ";
|
2019-07-11 13:40:30 +08:00
|
|
|
os << toString(file);
|
|
|
|
if (loc.sym)
|
|
|
|
os << ":(" << toString(*loc.sym) << ')';
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
return symbolLocations;
|
2019-06-25 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// For an undefined symbol, stores all files referencing it and the index of
|
|
|
|
// the undefined symbol in each file.
|
|
|
|
struct UndefinedDiag {
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *sym;
|
2019-06-25 17:55:55 +08:00
|
|
|
struct File {
|
2019-07-11 13:40:30 +08:00
|
|
|
ObjFile *oFile;
|
|
|
|
uint64_t symIndex;
|
2019-06-25 17:55:55 +08:00
|
|
|
};
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<File> files;
|
2019-06-25 17:55:55 +08:00
|
|
|
};
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
|
|
|
|
std::string out;
|
|
|
|
llvm::raw_string_ostream os(out);
|
|
|
|
os << "undefined symbol: " << toString(*undefDiag.sym);
|
|
|
|
|
|
|
|
const size_t maxUndefReferences = 10;
|
|
|
|
size_t i = 0, numRefs = 0;
|
|
|
|
for (const UndefinedDiag::File &ref : undefDiag.files) {
|
|
|
|
std::vector<std::string> symbolLocations =
|
|
|
|
getSymbolLocations(ref.oFile, ref.symIndex);
|
|
|
|
numRefs += symbolLocations.size();
|
|
|
|
for (const std::string &s : symbolLocations) {
|
|
|
|
if (i >= maxUndefReferences)
|
2019-06-25 17:55:55 +08:00
|
|
|
break;
|
2019-07-11 13:40:30 +08:00
|
|
|
os << s;
|
|
|
|
i++;
|
2019-06-25 17:55:55 +08:00
|
|
|
}
|
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
if (i < numRefs)
|
|
|
|
os << "\n>>> referenced " << numRefs - i << " more times";
|
|
|
|
errorOrWarn(os.str());
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
void SymbolTable::loadMinGWAutomaticImports() {
|
2019-07-11 13:40:30 +08:00
|
|
|
for (auto &i : symMap) {
|
|
|
|
Symbol *sym = i.second;
|
|
|
|
auto *undef = dyn_cast<Undefined>(sym);
|
|
|
|
if (!undef)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
2019-07-11 13:40:30 +08:00
|
|
|
if (!sym->isUsedInRegularObj)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
StringRef name = undef->getName();
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (name.startswith("__imp_"))
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
// If we have an undefined symbol, but we have a Lazy representing a
|
|
|
|
// symbol we could load from file, make sure to load that.
|
2019-07-11 13:40:30 +08:00
|
|
|
Lazy *l = dyn_cast_or_null<Lazy>(find(("__imp_" + name).str()));
|
|
|
|
if (!l || l->pendingArchiveLoad)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
log("Loading lazy " + l->getName() + " from " + l->file->getName() +
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
" for automatic import");
|
2019-07-11 13:40:30 +08:00
|
|
|
l->pendingArchiveLoad = true;
|
2019-07-19 21:29:10 +08:00
|
|
|
l->file->addMember(l->sym);
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
|
|
|
|
if (name.startswith("__imp_"))
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
return false;
|
2019-07-11 13:40:30 +08:00
|
|
|
Defined *imp = dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
|
|
|
|
if (!imp)
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Replace the reference directly to a variable with a reference
|
|
|
|
// to the import address table instead. This obviously isn't right,
|
2019-07-16 16:26:38 +08:00
|
|
|
// but we mark the symbol as isRuntimePseudoReloc, and a later pass
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
// will add runtime pseudo relocations for every relocation against
|
|
|
|
// this Symbol. The runtime pseudo relocation framework expects the
|
|
|
|
// reference itself to point at the IAT entry.
|
2019-07-11 13:40:30 +08:00
|
|
|
size_t impSize = 0;
|
|
|
|
if (isa<DefinedImportData>(imp)) {
|
|
|
|
log("Automatically importing " + name + " from " +
|
|
|
|
cast<DefinedImportData>(imp)->getDLLName());
|
|
|
|
impSize = sizeof(DefinedImportData);
|
|
|
|
} else if (isa<DefinedRegular>(imp)) {
|
|
|
|
log("Automatically importing " + name + " from " +
|
|
|
|
toString(cast<DefinedRegular>(imp)->file));
|
|
|
|
impSize = sizeof(DefinedRegular);
|
2018-09-26 14:13:39 +08:00
|
|
|
} else {
|
2019-07-11 13:40:30 +08:00
|
|
|
warn("unable to automatically import " + name + " from " + imp->getName() +
|
|
|
|
" from " + toString(cast<DefinedRegular>(imp)->file) +
|
2018-09-26 14:13:39 +08:00
|
|
|
"; unexpected symbol type");
|
|
|
|
return false;
|
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
sym->replaceKeepingName(imp, impSize);
|
|
|
|
sym->isRuntimePseudoReloc = true;
|
2018-08-31 15:45:20 +08:00
|
|
|
|
|
|
|
// There may exist symbols named .refptr.<name> which only consist
|
|
|
|
// of a single pointer to <name>. If it turns out <name> is
|
|
|
|
// automatically imported, we don't need to keep the .refptr.<name>
|
|
|
|
// pointer at all, but redirect all accesses to it to the IAT entry
|
|
|
|
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
|
2019-07-11 13:40:30 +08:00
|
|
|
DefinedRegular *refptr =
|
|
|
|
dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
|
|
|
|
if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
|
|
|
|
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
|
|
|
|
if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
|
|
|
|
log("Replacing .refptr." + name + " with " + imp->getName());
|
|
|
|
refptr->getChunk()->live = false;
|
|
|
|
refptr->replaceKeepingName(imp, impSize);
|
2018-08-31 15:45:20 +08:00
|
|
|
}
|
|
|
|
}
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-12-10 05:55:24 +08:00
|
|
|
void SymbolTable::reportRemainingUndefines() {
|
2019-07-11 13:40:30 +08:00
|
|
|
SmallPtrSet<Symbol *, 8> undefs;
|
|
|
|
DenseMap<Symbol *, Symbol *> localImports;
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
for (auto &i : symMap) {
|
|
|
|
Symbol *sym = i.second;
|
|
|
|
auto *undef = dyn_cast<Undefined>(sym);
|
|
|
|
if (!undef)
|
2015-05-29 03:09:30 +08:00
|
|
|
continue;
|
2019-07-11 13:40:30 +08:00
|
|
|
if (!sym->isUsedInRegularObj)
|
2016-12-10 05:55:24 +08:00
|
|
|
continue;
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
StringRef name = undef->getName();
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2015-07-04 13:28:41 +08:00
|
|
|
// A weak alias may have been resolved, so check for that.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (Defined *d = undef->getWeakAlias()) {
|
2017-11-01 05:26:42 +08:00
|
|
|
// We want to replace Sym with D. However, we can't just blindly
|
|
|
|
// copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
|
|
|
|
// internal symbol, and internal symbols are stored as "unparented"
|
|
|
|
// Symbols. For that reason we need to check which type of symbol we
|
|
|
|
// are dealing with and copy the correct number of bytes.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (isa<DefinedRegular>(d))
|
|
|
|
memcpy(sym, d, sizeof(DefinedRegular));
|
|
|
|
else if (isa<DefinedAbsolute>(d))
|
|
|
|
memcpy(sym, d, sizeof(DefinedAbsolute));
|
2017-11-01 05:26:42 +08:00
|
|
|
else
|
2019-07-11 13:40:30 +08:00
|
|
|
memcpy(sym, d, sizeof(SymbolUnion));
|
2015-07-04 13:28:41 +08:00
|
|
|
continue;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2015-06-25 10:21:44 +08:00
|
|
|
// If we can resolve a symbol by removing __imp_ prefix, do that.
|
|
|
|
// This odd rule is for compatibility with MSVC linker.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (name.startswith("__imp_")) {
|
|
|
|
Symbol *imp = find(name.substr(strlen("__imp_")));
|
|
|
|
if (imp && isa<Defined>(imp)) {
|
|
|
|
auto *d = cast<Defined>(imp);
|
|
|
|
replaceSymbol<DefinedLocalImport>(sym, name, d);
|
|
|
|
localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
|
|
|
|
localImports[sym] = d;
|
2015-06-25 10:21:44 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2018-11-08 22:42:37 +08:00
|
|
|
// We don't want to report missing Microsoft precompiled headers symbols.
|
|
|
|
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
|
2019-07-11 13:40:30 +08:00
|
|
|
if (name.contains("_PchSym_"))
|
2018-11-08 22:42:37 +08:00
|
|
|
continue;
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (config->mingw && handleMinGWAutomaticImport(sym, name))
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
|
2015-06-29 03:35:15 +08:00
|
|
|
// Remaining undefined symbols are not fatal if /force is specified.
|
|
|
|
// They are replaced with dummy defined symbols.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (config->forceUnresolved)
|
|
|
|
replaceSymbol<DefinedAbsolute>(sym, name, 0);
|
|
|
|
undefs.insert(sym);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (undefs.empty() && localImports.empty())
|
2015-08-06 22:58:50 +08:00
|
|
|
return;
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-12 14:12:27 +08:00
|
|
|
for (Symbol *b : config->gcroot) {
|
2019-07-11 13:40:30 +08:00
|
|
|
if (undefs.count(b))
|
|
|
|
errorOrWarn("<root>: undefined symbol: " + toString(*b));
|
|
|
|
if (config->warnLocallyDefinedImported)
|
|
|
|
if (Symbol *imp = localImports.lookup(b))
|
|
|
|
warn("<root>: locally defined symbol imported: " + toString(*imp) +
|
|
|
|
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
|
2017-12-15 15:49:21 +08:00
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<UndefinedDiag> undefDiags;
|
|
|
|
DenseMap<Symbol *, int> firstDiag;
|
2019-06-25 17:55:55 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
|
|
|
size_t symIndex = (size_t)-1;
|
|
|
|
for (Symbol *sym : file->getSymbols()) {
|
|
|
|
++symIndex;
|
|
|
|
if (!sym)
|
2017-12-15 15:49:21 +08:00
|
|
|
continue;
|
2019-07-11 13:40:30 +08:00
|
|
|
if (undefs.count(sym)) {
|
|
|
|
auto it = firstDiag.find(sym);
|
|
|
|
if (it == firstDiag.end()) {
|
|
|
|
firstDiag[sym] = undefDiags.size();
|
|
|
|
undefDiags.push_back({sym, {{file, symIndex}}});
|
2019-06-25 17:55:55 +08:00
|
|
|
} else {
|
2019-07-11 13:40:30 +08:00
|
|
|
undefDiags[it->second].files.push_back({file, symIndex});
|
2019-06-25 17:55:55 +08:00
|
|
|
}
|
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
if (config->warnLocallyDefinedImported)
|
|
|
|
if (Symbol *imp = localImports.lookup(sym))
|
|
|
|
warn(toString(file) +
|
|
|
|
": locally defined symbol imported: " + toString(*imp) +
|
|
|
|
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
|
2017-12-15 15:49:21 +08:00
|
|
|
}
|
|
|
|
}
|
2019-06-25 17:55:55 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
for (const UndefinedDiag& undefDiag : undefDiags)
|
|
|
|
reportUndefinedSymbol(undefDiag);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
|
|
|
|
bool inserted = false;
|
|
|
|
Symbol *&sym = symMap[CachedHashStringRef(name)];
|
|
|
|
if (!sym) {
|
|
|
|
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
|
|
|
sym->isUsedInRegularObj = false;
|
|
|
|
sym->pendingArchiveLoad = false;
|
|
|
|
inserted = true;
|
2018-08-03 04:39:19 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
return {sym, inserted};
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
|
|
|
|
std::pair<Symbol *, bool> result = insert(name);
|
|
|
|
if (!file || !isa<BitcodeFile>(file))
|
|
|
|
result.first->isUsedInRegularObj = true;
|
|
|
|
return result;
|
2018-09-07 04:23:56 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
|
|
|
|
bool isWeakAlias) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(name, f);
|
|
|
|
if (wasInserted || (isa<Lazy>(s) && isWeakAlias)) {
|
|
|
|
replaceSymbol<Undefined>(s, name);
|
|
|
|
return s;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
if (auto *l = dyn_cast<Lazy>(s)) {
|
|
|
|
if (!s->pendingArchiveLoad) {
|
|
|
|
s->pendingArchiveLoad = true;
|
2019-07-19 21:29:10 +08:00
|
|
|
l->file->addMember(l->sym);
|
2016-12-15 12:02:23 +08:00
|
|
|
}
|
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
return s;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-19 21:29:10 +08:00
|
|
|
void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) {
|
2019-07-11 13:40:30 +08:00
|
|
|
StringRef name = sym.getName();
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(name);
|
|
|
|
if (wasInserted) {
|
|
|
|
replaceSymbol<Lazy>(s, f, sym);
|
2015-08-06 22:58:50 +08:00
|
|
|
return;
|
2015-06-01 10:58:15 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
auto *u = dyn_cast<Undefined>(s);
|
|
|
|
if (!u || u->weakAlias || s->pendingArchiveLoad)
|
2016-12-10 05:55:24 +08:00
|
|
|
return;
|
2019-07-11 13:40:30 +08:00
|
|
|
s->pendingArchiveLoad = true;
|
2019-07-19 21:29:10 +08:00
|
|
|
f->addMember(sym);
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) {
|
|
|
|
std::string msg = "duplicate symbol: " + toString(*existing) + " in " +
|
|
|
|
toString(existing->getFile()) + " and in " +
|
|
|
|
toString(newFile);
|
2018-09-14 06:05:10 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (config->forceMultiple)
|
|
|
|
warn(msg);
|
2018-09-14 06:05:10 +08:00
|
|
|
else
|
2019-07-11 13:40:30 +08:00
|
|
|
error(msg);
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
|
|
|
s->isUsedInRegularObj = true;
|
|
|
|
if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
|
|
|
|
replaceSymbol<DefinedAbsolute>(s, n, sym);
|
|
|
|
else if (!isa<DefinedCOFF>(s))
|
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
2015-09-20 08:00:05 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
|
|
|
s->isUsedInRegularObj = true;
|
|
|
|
if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
|
|
|
|
replaceSymbol<DefinedAbsolute>(s, n, va);
|
|
|
|
else if (!isa<DefinedCOFF>(s))
|
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
|
|
|
s->isUsedInRegularObj = true;
|
|
|
|
if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
|
|
|
|
replaceSymbol<DefinedSynthetic>(s, n, c);
|
|
|
|
else if (!isa<DefinedCOFF>(s))
|
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
|
|
|
|
const coff_symbol_generic *sym,
|
|
|
|
SectionChunk *c) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
|
|
|
if (wasInserted || !isa<DefinedRegular>(s))
|
|
|
|
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
|
|
|
|
/*IsExternal*/ true, sym, c);
|
2017-11-28 09:30:07 +08:00
|
|
|
else
|
2019-07-11 13:40:30 +08:00
|
|
|
reportDuplicate(s, f);
|
|
|
|
return s;
|
2017-11-28 04:42:34 +08:00
|
|
|
}
|
|
|
|
|
2019-01-30 10:17:27 +08:00
|
|
|
std::pair<DefinedRegular *, bool>
|
2019-07-11 13:40:30 +08:00
|
|
|
SymbolTable::addComdat(InputFile *f, StringRef n,
|
|
|
|
const coff_symbol_generic *sym) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
|
|
|
if (wasInserted || !isa<DefinedRegular>(s)) {
|
|
|
|
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
|
|
|
|
/*IsExternal*/ true, sym, nullptr);
|
|
|
|
return {cast<DefinedRegular>(s), true};
|
2017-11-28 09:30:07 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
auto *existingSymbol = cast<DefinedRegular>(s);
|
|
|
|
if (!existingSymbol->isCOMDAT)
|
|
|
|
reportDuplicate(s, f);
|
|
|
|
return {existingSymbol, false};
|
2017-11-28 09:30:07 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
|
|
|
|
const coff_symbol_generic *sym, CommonChunk *c) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
|
|
|
if (wasInserted || !isa<DefinedCOFF>(s))
|
|
|
|
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
|
|
|
|
else if (auto *dc = dyn_cast<DefinedCommon>(s))
|
|
|
|
if (size > dc->getSize())
|
|
|
|
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
|
|
|
|
return s;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
|
|
|
s->isUsedInRegularObj = true;
|
|
|
|
if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
|
|
|
|
replaceSymbol<DefinedImportData>(s, n, f);
|
|
|
|
return s;
|
2017-09-02 06:12:10 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
reportDuplicate(s, f);
|
2017-09-02 06:12:10 +08:00
|
|
|
return nullptr;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
|
|
|
|
uint16_t machine) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(name, nullptr);
|
|
|
|
s->isUsedInRegularObj = true;
|
|
|
|
if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
|
|
|
|
replaceSymbol<DefinedImportThunk>(s, name, id, machine);
|
|
|
|
return s;
|
2017-09-02 06:12:10 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
reportDuplicate(s, id->file);
|
2017-09-02 06:12:10 +08:00
|
|
|
return nullptr;
|
2015-07-03 06:52:33 +08:00
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
std::vector<Chunk *> SymbolTable::getChunks() {
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Chunk *> res;
|
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
|
|
|
ArrayRef<Chunk *> v = file->getChunks();
|
|
|
|
res.insert(res.end(), v.begin(), v.end());
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
return res;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::find(StringRef name) {
|
|
|
|
return symMap.lookup(CachedHashStringRef(name));
|
2015-06-29 09:03:53 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::findUnderscore(StringRef name) {
|
|
|
|
if (config->machine == I386)
|
|
|
|
return find(("_" + name).str());
|
|
|
|
return find(name);
|
2015-07-29 06:56:02 +08:00
|
|
|
}
|
|
|
|
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
// Return all symbols that start with Prefix, possibly ignoring the first
|
|
|
|
// character of Prefix or the first character symbol.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
|
|
|
|
std::vector<Symbol *> syms;
|
|
|
|
for (auto pair : symMap) {
|
|
|
|
StringRef name = pair.first.val();
|
|
|
|
if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
|
|
|
|
name.drop_front().startswith(prefix) ||
|
|
|
|
name.drop_front().startswith(prefix.drop_front())) {
|
|
|
|
syms.push_back(pair.second);
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
}
|
2015-07-14 10:58:13 +08:00
|
|
|
}
|
2019-07-11 13:40:30 +08:00
|
|
|
return syms;
|
2015-07-14 10:58:13 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::findMangle(StringRef name) {
|
|
|
|
if (Symbol *sym = find(name))
|
|
|
|
if (!isa<Undefined>(sym))
|
|
|
|
return sym;
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
|
|
|
|
// Efficient fuzzy string lookup is impossible with a hash table, so iterate
|
|
|
|
// the symbol table once and collect all possibly matching symbols into this
|
|
|
|
// vector. Then compare each possibly matching symbol with each possible
|
|
|
|
// mangling.
|
2019-07-11 13:40:30 +08:00
|
|
|
std::vector<Symbol *> syms = getSymsWithPrefix(name);
|
|
|
|
auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
|
|
|
|
std::string prefix = t.str();
|
|
|
|
for (auto *s : syms)
|
|
|
|
if (s->getName().startswith(prefix))
|
|
|
|
return s;
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
// For non-x86, just look for C++ functions.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (config->machine != I386)
|
|
|
|
return findByPrefix("?" + name + "@@Y");
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
if (!name.startswith("_"))
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return nullptr;
|
2017-10-23 17:08:24 +08:00
|
|
|
// Search for x86 stdcall function.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (Symbol *s = findByPrefix(name + "@"))
|
|
|
|
return s;
|
2017-10-23 17:08:24 +08:00
|
|
|
// Search for x86 fastcall function.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
|
|
|
|
return s;
|
2017-10-23 17:08:24 +08:00
|
|
|
// Search for x86 vectorcall function.
|
2019-07-11 13:40:30 +08:00
|
|
|
if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
|
|
|
|
return s;
|
2015-07-14 10:58:13 +08:00
|
|
|
// Search for x86 C++ non-member function.
|
2019-07-11 13:40:30 +08:00
|
|
|
return findByPrefix("?" + name.substr(1) + "@@Y");
|
2015-06-29 06:16:41 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name) {
|
|
|
|
return addUndefined(name, nullptr, false);
|
2015-07-03 08:02:19 +08:00
|
|
|
}
|
|
|
|
|
2017-02-07 04:47:55 +08:00
|
|
|
std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
|
2019-07-11 13:40:30 +08:00
|
|
|
lto.reset(new BitcodeCompiler);
|
|
|
|
for (BitcodeFile *f : BitcodeFile::instances)
|
|
|
|
lto->add(*f);
|
|
|
|
return lto->compile();
|
2017-02-07 04:47:55 +08:00
|
|
|
}
|
2015-06-02 04:10:10 +08:00
|
|
|
|
2017-02-07 04:47:55 +08:00
|
|
|
void SymbolTable::addCombinedLTOObjects() {
|
2019-07-11 13:40:30 +08:00
|
|
|
if (BitcodeFile::instances.empty())
|
2017-02-07 04:47:55 +08:00
|
|
|
return;
|
2018-01-18 03:16:26 +08:00
|
|
|
|
2019-07-11 13:40:30 +08:00
|
|
|
ScopedTimer t(ltoTimer);
|
|
|
|
for (StringRef object : compileBitcodeFiles()) {
|
|
|
|
auto *obj = make<ObjFile>(MemoryBufferRef(object, "lto.tmp"));
|
|
|
|
obj->parse();
|
|
|
|
ObjFile::instances.push_back(obj);
|
2015-08-29 06:16:09 +08:00
|
|
|
}
|
2015-06-10 01:52:17 +08:00
|
|
|
}
|
2017-02-07 04:47:55 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
} // namespace coff
|
|
|
|
} // namespace lld
|