2015-05-29 03:09:30 +08:00
|
|
|
//===- SymbolTable.cpp ----------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-05-29 03:09:30 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-12-09 04:20:22 +08:00
|
|
|
#include "SymbolTable.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "Config.h"
|
|
|
|
#include "Driver.h"
|
2017-02-03 07:58:14 +08:00
|
|
|
#include "LTO.h"
|
2018-04-18 07:32:33 +08:00
|
|
|
#include "PDB.h"
|
2015-06-30 02:50:11 +08:00
|
|
|
#include "Symbols.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-29 04:39:17 +08:00
|
|
|
#include "lld/Common/Memory.h"
|
2018-01-18 03:16:26 +08:00
|
|
|
#include "lld/Common/Timer.h"
|
2019-09-25 19:03:48 +08:00
|
|
|
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
|
2015-12-04 10:42:47 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
2019-11-15 05:46:00 +08:00
|
|
|
#include "llvm/LTO/LTO.h"
|
2019-06-12 19:32:43 +08:00
|
|
|
#include "llvm/Object/WindowsMachineFlag.h"
|
2015-05-29 03:09:30 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-06-29 06:16:41 +08:00
|
|
|
#include <utility>
|
2015-05-29 03:09:30 +08:00
|
|
|
|
2015-05-31 11:57:30 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
namespace lld {
|
|
|
|
namespace coff {
|
|
|
|
|
[LLD] [COFF] Fix up missing stdcall decorations in MinGW mode
If linking directly against a DLL without an import library, the
DLL export symbols might not contain stdcall decorations.
If we have an undefined symbol with decoration, and we happen to have
a matching undecorated symbol (which either is lazy and can be loaded,
or already defined), then alias it against that instead.
This matches what's done in reverse, when we have a def file
declaring to export a symbol without decoration, but we only have
a defined decorated symbol. In that case we do a fuzzy match
(SymbolTable::findMangle). This case is more straightforward; if we
have a decorated undefined symbol, just strip the decoration and look
for the corresponding undecorated symbol name.
Add warnings and options for either silencing the warning or disabling
the whole feature, corresponding to how ld.bfd does it.
(This feature works for any symbol decoration mismatch, not only when
linking against a DLL directly; ld.bfd also tolerates it anywhere,
and also fixes up mismatches in the other direction, like
SymbolTable::findMangle, for any symbol, not only exports. But in
practice, at least for lld, it would primarily end up used for linking
against DLLs.)
Differential Revision: https://reviews.llvm.org/D104532
2021-06-18 02:51:37 +08:00
|
|
|
StringRef ltrim1(StringRef s, const char *chars) {
|
|
|
|
if (!s.empty() && strchr(chars, s[0]))
|
|
|
|
return s.substr(1);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2018-01-18 03:16:26 +08:00
|
|
|
static Timer ltoTimer("LTO", Timer::root());
|
|
|
|
|
2016-12-10 05:55:24 +08:00
|
|
|
SymbolTable *symtab;
|
2015-09-22 03:12:36 +08:00
|
|
|
|
2016-12-10 05:55:24 +08:00
|
|
|
void SymbolTable::addFile(InputFile *file) {
|
2017-02-22 07:22:56 +08:00
|
|
|
log("Reading " + toString(file));
|
2016-12-12 06:15:25 +08:00
|
|
|
file->parse();
|
|
|
|
|
|
|
|
MachineTypes mt = file->getMachineType();
|
|
|
|
if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
|
|
|
|
config->machine = mt;
|
|
|
|
} else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
|
2018-05-23 04:20:25 +08:00
|
|
|
error(toString(file) + ": machine type " + machineToStr(mt) +
|
2016-12-12 06:15:25 +08:00
|
|
|
" conflicts with " + machineToStr(config->machine));
|
2018-05-23 04:20:25 +08:00
|
|
|
return;
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
2016-12-12 06:15:25 +08:00
|
|
|
|
2017-07-27 07:05:24 +08:00
|
|
|
if (auto *f = dyn_cast<ObjFile>(file)) {
|
2017-07-27 08:45:26 +08:00
|
|
|
ObjFile::instances.push_back(f);
|
2015-07-01 03:35:21 +08:00
|
|
|
} else if (auto *f = dyn_cast<BitcodeFile>(file)) {
|
2017-07-27 08:45:26 +08:00
|
|
|
BitcodeFile::instances.push_back(f);
|
2016-12-12 06:15:25 +08:00
|
|
|
} else if (auto *f = dyn_cast<ImportFile>(file)) {
|
2017-07-27 08:45:26 +08:00
|
|
|
ImportFile::instances.push_back(f);
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
|
|
|
|
2019-03-07 04:18:38 +08:00
|
|
|
driver->parseDirectives(file);
|
2015-07-02 10:38:59 +08:00
|
|
|
}
|
|
|
|
|
2017-10-07 07:43:54 +08:00
|
|
|
static void errorOrWarn(const Twine &s) {
|
2018-09-14 06:05:10 +08:00
|
|
|
if (config->forceUnresolved)
|
2017-10-07 07:43:54 +08:00
|
|
|
warn(s);
|
|
|
|
else
|
|
|
|
error(s);
|
|
|
|
}
|
|
|
|
|
2019-09-04 04:32:16 +08:00
|
|
|
// Causes the file associated with a lazy symbol to be linked in.
|
|
|
|
static void forceLazy(Symbol *s) {
|
|
|
|
s->pendingArchiveLoad = true;
|
|
|
|
switch (s->kind()) {
|
|
|
|
case Symbol::Kind::LazyArchiveKind: {
|
|
|
|
auto *l = cast<LazyArchive>(s);
|
|
|
|
l->file->addMember(l->sym);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Symbol::Kind::LazyObjectKind:
|
|
|
|
cast<LazyObject>(s)->file->fetch();
|
|
|
|
break;
|
[LLD] [COFF] Support linking directly against DLLs in MinGW mode
GNU ld.bfd supports linking directly against DLLs without using an
import library, and some projects have picked up on this habit.
(There's no one single unsurmountable issue with using import
libraries, but this is a regularly surfacing missing feature.)
As long as one is linking by name (instead of by ordinal), the DLL
export table contains most of the information needed. (One can
inspect what section a symbol points at, to see if it's a function
or data symbol. The practical implementation of this loops over all
sections for each symbol, but as long as they're not very many, that
should hopefully be tolerable performance wise.)
One exception where the information in the DLL isn't entirely enough
is on i386 with stdcall functions; depending on how they're done,
the exported function name can be a plain undecorated name, while
the import library would contain the full decorated symbol name. This
issue is addressed separately in a different patch.
This is implemented mimicing the structure of a regular import library,
with one InputFile corresponding to the static archive that just adds
lazy symbols, which then are fetched when they are needed. When such
a symbol is fetched, we synthesize a coff_import_header structure
in memory and create a regular ImportFile out of it.
The implementation could be even smaller by just creating ImportFiles
for every symbol available immediately, but that would have the
drawback of actually ending up importing all symbols unless running
with GC enabled (and mingw mode defaults to having it disabled for
historical reasons).
Differential Revision: https://reviews.llvm.org/D104530
2021-06-16 21:59:46 +08:00
|
|
|
case Symbol::Kind::LazyDLLSymbolKind: {
|
|
|
|
auto *l = cast<LazyDLLSymbol>(s);
|
|
|
|
l->file->makeImport(l->sym);
|
|
|
|
break;
|
|
|
|
}
|
2019-09-04 04:32:16 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable(
|
|
|
|
"symbol passed to forceLazy is not a LazyArchive or LazyObject");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-16 02:27:09 +08:00
|
|
|
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
|
|
|
|
// This is generally the global variable or function whose definition contains
|
|
|
|
// Addr.
|
|
|
|
static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
|
2018-04-18 07:32:33 +08:00
|
|
|
DefinedRegular *candidate = nullptr;
|
|
|
|
|
|
|
|
for (Symbol *s : sc->file->getSymbols()) {
|
|
|
|
auto *d = dyn_cast_or_null<DefinedRegular>(s);
|
2019-07-27 01:56:45 +08:00
|
|
|
if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
|
|
|
|
d->getValue() > addr ||
|
2018-04-18 07:32:33 +08:00
|
|
|
(candidate && d->getValue() < candidate->getValue()))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
candidate = d;
|
|
|
|
}
|
|
|
|
|
2018-09-16 02:27:09 +08:00
|
|
|
return candidate;
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
|
2019-07-27 01:56:45 +08:00
|
|
|
static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
|
|
|
|
std::string res("\n>>> referenced by ");
|
|
|
|
StringRef source = file->obj->getSourceFileName();
|
|
|
|
if (!source.empty())
|
|
|
|
res += source.str() + "\n>>> ";
|
|
|
|
res += toString(file);
|
|
|
|
return {res};
|
|
|
|
}
|
|
|
|
|
2019-10-15 17:18:18 +08:00
|
|
|
static Optional<std::pair<StringRef, uint32_t>>
|
|
|
|
getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
|
2019-10-21 16:01:59 +08:00
|
|
|
Optional<DILineInfo> optionalLineInfo =
|
|
|
|
c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
|
|
|
|
if (!optionalLineInfo)
|
2019-10-15 17:18:18 +08:00
|
|
|
return None;
|
2019-10-21 16:01:59 +08:00
|
|
|
const DILineInfo &lineInfo = *optionalLineInfo;
|
2019-09-25 19:03:48 +08:00
|
|
|
if (lineInfo.FileName == DILineInfo::BadString)
|
2019-10-15 17:18:18 +08:00
|
|
|
return None;
|
|
|
|
return std::make_pair(saver.save(lineInfo.FileName), lineInfo.Line);
|
2019-09-25 19:03:48 +08:00
|
|
|
}
|
|
|
|
|
2019-10-15 17:18:18 +08:00
|
|
|
static Optional<std::pair<StringRef, uint32_t>>
|
|
|
|
getFileLine(const SectionChunk *c, uint32_t addr) {
|
2019-09-25 19:03:48 +08:00
|
|
|
// MinGW can optionally use codeview, even if the default is dwarf.
|
2019-10-15 17:18:18 +08:00
|
|
|
Optional<std::pair<StringRef, uint32_t>> fileLine =
|
|
|
|
getFileLineCodeView(c, addr);
|
2019-09-25 19:03:48 +08:00
|
|
|
// If codeview didn't yield any result, check dwarf in MinGW mode.
|
2019-10-15 17:18:18 +08:00
|
|
|
if (!fileLine && config->mingw)
|
2019-09-25 19:03:48 +08:00
|
|
|
fileLine = getFileLineDwarf(c, addr);
|
|
|
|
return fileLine;
|
|
|
|
}
|
|
|
|
|
2019-06-25 17:55:55 +08:00
|
|
|
// Given a file and the index of a symbol in that file, returns a description
|
|
|
|
// of all references to that symbol from that file. If no debug information is
|
|
|
|
// available, returns just the name of the file, else one string per actual
|
|
|
|
// reference as described in the debug info.
|
2020-07-10 05:12:02 +08:00
|
|
|
// Returns up to maxStrings string descriptions, along with the total number of
|
|
|
|
// locations found.
|
|
|
|
static std::pair<std::vector<std::string>, size_t>
|
|
|
|
getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
|
2018-04-18 07:32:33 +08:00
|
|
|
struct Location {
|
2018-09-16 02:27:09 +08:00
|
|
|
Symbol *sym;
|
2018-04-18 07:32:33 +08:00
|
|
|
std::pair<StringRef, uint32_t> fileLine;
|
|
|
|
};
|
|
|
|
std::vector<Location> locations;
|
2020-07-10 05:12:02 +08:00
|
|
|
size_t numLocations = 0;
|
2018-04-18 07:32:33 +08:00
|
|
|
|
|
|
|
for (Chunk *c : file->getChunks()) {
|
|
|
|
auto *sc = dyn_cast<SectionChunk>(c);
|
|
|
|
if (!sc)
|
|
|
|
continue;
|
2019-05-04 04:17:14 +08:00
|
|
|
for (const coff_relocation &r : sc->getRelocs()) {
|
2018-04-18 07:32:33 +08:00
|
|
|
if (r.SymbolTableIndex != symIndex)
|
|
|
|
continue;
|
2020-07-10 05:12:02 +08:00
|
|
|
numLocations++;
|
|
|
|
if (locations.size() >= maxStrings)
|
|
|
|
continue;
|
|
|
|
|
2019-10-15 17:18:18 +08:00
|
|
|
Optional<std::pair<StringRef, uint32_t>> fileLine =
|
2018-04-18 07:32:33 +08:00
|
|
|
getFileLine(sc, r.VirtualAddress);
|
2018-09-16 02:27:09 +08:00
|
|
|
Symbol *sym = getSymbol(sc, r.VirtualAddress);
|
2019-10-15 17:18:18 +08:00
|
|
|
if (fileLine)
|
|
|
|
locations.push_back({sym, *fileLine});
|
|
|
|
else if (sym)
|
2019-10-15 17:33:14 +08:00
|
|
|
locations.push_back({sym, {"", 0}});
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-10 05:12:02 +08:00
|
|
|
if (maxStrings == 0)
|
|
|
|
return std::make_pair(std::vector<std::string>(), numLocations);
|
|
|
|
|
|
|
|
if (numLocations == 0)
|
|
|
|
return std::make_pair(
|
|
|
|
std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
|
2018-04-18 07:32:33 +08:00
|
|
|
|
2019-06-25 17:55:55 +08:00
|
|
|
std::vector<std::string> symbolLocations(locations.size());
|
|
|
|
size_t i = 0;
|
2018-04-18 07:32:33 +08:00
|
|
|
for (Location loc : locations) {
|
2019-06-25 17:55:55 +08:00
|
|
|
llvm::raw_string_ostream os(symbolLocations[i++]);
|
2018-04-18 07:32:33 +08:00
|
|
|
os << "\n>>> referenced by ";
|
|
|
|
if (!loc.fileLine.first.empty())
|
|
|
|
os << loc.fileLine.first << ":" << loc.fileLine.second
|
|
|
|
<< "\n>>> ";
|
|
|
|
os << toString(file);
|
2018-09-16 02:27:09 +08:00
|
|
|
if (loc.sym)
|
|
|
|
os << ":(" << toString(*loc.sym) << ')';
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
2020-07-10 05:12:02 +08:00
|
|
|
return std::make_pair(symbolLocations, numLocations);
|
2019-06-25 17:55:55 +08:00
|
|
|
}
|
|
|
|
|
2020-07-10 05:12:02 +08:00
|
|
|
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
|
|
|
|
return getSymbolLocations(file, symIndex, SIZE_MAX).first;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::pair<std::vector<std::string>, size_t>
|
|
|
|
getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
|
2019-07-27 01:56:45 +08:00
|
|
|
if (auto *o = dyn_cast<ObjFile>(file))
|
2020-07-10 05:12:02 +08:00
|
|
|
return getSymbolLocations(o, symIndex, maxStrings);
|
|
|
|
if (auto *b = dyn_cast<BitcodeFile>(file)) {
|
|
|
|
std::vector<std::string> symbolLocations = getSymbolLocations(b);
|
|
|
|
size_t numLocations = symbolLocations.size();
|
|
|
|
if (symbolLocations.size() > maxStrings)
|
|
|
|
symbolLocations.resize(maxStrings);
|
|
|
|
return std::make_pair(symbolLocations, numLocations);
|
|
|
|
}
|
2019-07-27 01:56:45 +08:00
|
|
|
llvm_unreachable("unsupported file type passed to getSymbolLocations");
|
2020-07-10 05:12:02 +08:00
|
|
|
return std::make_pair(std::vector<std::string>(), (size_t)0);
|
2019-07-27 01:56:45 +08:00
|
|
|
}
|
|
|
|
|
2019-06-25 17:55:55 +08:00
|
|
|
// For an undefined symbol, stores all files referencing it and the index of
|
|
|
|
// the undefined symbol in each file.
|
|
|
|
struct UndefinedDiag {
|
|
|
|
Symbol *sym;
|
|
|
|
struct File {
|
2019-07-27 01:56:45 +08:00
|
|
|
InputFile *file;
|
|
|
|
uint32_t symIndex;
|
2019-06-25 17:55:55 +08:00
|
|
|
};
|
|
|
|
std::vector<File> files;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
|
|
|
|
std::string out;
|
|
|
|
llvm::raw_string_ostream os(out);
|
|
|
|
os << "undefined symbol: " << toString(*undefDiag.sym);
|
2019-07-11 13:40:30 +08:00
|
|
|
|
2020-03-30 01:02:21 +08:00
|
|
|
const size_t maxUndefReferences = 3;
|
2020-07-10 05:12:02 +08:00
|
|
|
size_t numDisplayedRefs = 0, numRefs = 0;
|
2019-06-25 17:55:55 +08:00
|
|
|
for (const UndefinedDiag::File &ref : undefDiag.files) {
|
2020-07-10 05:12:02 +08:00
|
|
|
std::vector<std::string> symbolLocations;
|
|
|
|
size_t totalLocations = 0;
|
|
|
|
std::tie(symbolLocations, totalLocations) = getSymbolLocations(
|
|
|
|
ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
|
|
|
|
|
|
|
|
numRefs += totalLocations;
|
|
|
|
numDisplayedRefs += symbolLocations.size();
|
2019-06-25 17:55:55 +08:00
|
|
|
for (const std::string &s : symbolLocations) {
|
|
|
|
os << s;
|
|
|
|
}
|
|
|
|
}
|
2020-07-10 05:12:02 +08:00
|
|
|
if (numDisplayedRefs < numRefs)
|
|
|
|
os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
|
2019-06-25 17:55:55 +08:00
|
|
|
errorOrWarn(os.str());
|
2018-04-18 07:32:33 +08:00
|
|
|
}
|
|
|
|
|
[LLD] [COFF] Fix up missing stdcall decorations in MinGW mode
If linking directly against a DLL without an import library, the
DLL export symbols might not contain stdcall decorations.
If we have an undefined symbol with decoration, and we happen to have
a matching undecorated symbol (which either is lazy and can be loaded,
or already defined), then alias it against that instead.
This matches what's done in reverse, when we have a def file
declaring to export a symbol without decoration, but we only have
a defined decorated symbol. In that case we do a fuzzy match
(SymbolTable::findMangle). This case is more straightforward; if we
have a decorated undefined symbol, just strip the decoration and look
for the corresponding undecorated symbol name.
Add warnings and options for either silencing the warning or disabling
the whole feature, corresponding to how ld.bfd does it.
(This feature works for any symbol decoration mismatch, not only when
linking against a DLL directly; ld.bfd also tolerates it anywhere,
and also fixes up mismatches in the other direction, like
SymbolTable::findMangle, for any symbol, not only exports. But in
practice, at least for lld, it would primarily end up used for linking
against DLLs.)
Differential Revision: https://reviews.llvm.org/D104532
2021-06-18 02:51:37 +08:00
|
|
|
void SymbolTable::loadMinGWSymbols() {
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
for (auto &i : symMap) {
|
|
|
|
Symbol *sym = i.second;
|
|
|
|
auto *undef = dyn_cast<Undefined>(sym);
|
|
|
|
if (!undef)
|
|
|
|
continue;
|
2019-08-02 19:02:34 +08:00
|
|
|
if (undef->getWeakAlias())
|
|
|
|
continue;
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
|
|
|
|
StringRef name = undef->getName();
|
|
|
|
|
[LLD] [COFF] Fix up missing stdcall decorations in MinGW mode
If linking directly against a DLL without an import library, the
DLL export symbols might not contain stdcall decorations.
If we have an undefined symbol with decoration, and we happen to have
a matching undecorated symbol (which either is lazy and can be loaded,
or already defined), then alias it against that instead.
This matches what's done in reverse, when we have a def file
declaring to export a symbol without decoration, but we only have
a defined decorated symbol. In that case we do a fuzzy match
(SymbolTable::findMangle). This case is more straightforward; if we
have a decorated undefined symbol, just strip the decoration and look
for the corresponding undecorated symbol name.
Add warnings and options for either silencing the warning or disabling
the whole feature, corresponding to how ld.bfd does it.
(This feature works for any symbol decoration mismatch, not only when
linking against a DLL directly; ld.bfd also tolerates it anywhere,
and also fixes up mismatches in the other direction, like
SymbolTable::findMangle, for any symbol, not only exports. But in
practice, at least for lld, it would primarily end up used for linking
against DLLs.)
Differential Revision: https://reviews.llvm.org/D104532
2021-06-18 02:51:37 +08:00
|
|
|
if (config->machine == I386 && config->stdcallFixup) {
|
|
|
|
// Check if we can resolve an undefined decorated symbol by finding
|
|
|
|
// the indended target as an undecorated symbol (only with a leading
|
|
|
|
// underscore).
|
|
|
|
StringRef origName = name;
|
|
|
|
StringRef baseName = name;
|
|
|
|
// Trim down stdcall/fastcall/vectorcall symbols to the base name.
|
|
|
|
baseName = ltrim1(baseName, "_@");
|
|
|
|
baseName = baseName.substr(0, baseName.find('@'));
|
|
|
|
// Add a leading underscore, as it would be in cdecl form.
|
|
|
|
std::string newName = ("_" + baseName).str();
|
|
|
|
Symbol *l;
|
|
|
|
if (newName != origName && (l = find(newName)) != nullptr) {
|
|
|
|
// If we found a symbol and it is lazy; load it.
|
|
|
|
if (l->isLazy() && !l->pendingArchiveLoad) {
|
|
|
|
log("Loading lazy " + l->getName() + " from " +
|
|
|
|
l->getFile()->getName() + " for stdcall fixup");
|
|
|
|
forceLazy(l);
|
|
|
|
}
|
|
|
|
// If it's lazy or already defined, hook it up as weak alias.
|
|
|
|
if (l->isLazy() || isa<Defined>(l)) {
|
|
|
|
if (config->warnStdcallFixup)
|
|
|
|
warn("Resolving " + origName + " by linking to " + newName);
|
|
|
|
else
|
|
|
|
log("Resolving " + origName + " by linking to " + newName);
|
|
|
|
undef->weakAlias = l;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
|
[LLD] [COFF] Fix up missing stdcall decorations in MinGW mode
If linking directly against a DLL without an import library, the
DLL export symbols might not contain stdcall decorations.
If we have an undefined symbol with decoration, and we happen to have
a matching undecorated symbol (which either is lazy and can be loaded,
or already defined), then alias it against that instead.
This matches what's done in reverse, when we have a def file
declaring to export a symbol without decoration, but we only have
a defined decorated symbol. In that case we do a fuzzy match
(SymbolTable::findMangle). This case is more straightforward; if we
have a decorated undefined symbol, just strip the decoration and look
for the corresponding undecorated symbol name.
Add warnings and options for either silencing the warning or disabling
the whole feature, corresponding to how ld.bfd does it.
(This feature works for any symbol decoration mismatch, not only when
linking against a DLL directly; ld.bfd also tolerates it anywhere,
and also fixes up mismatches in the other direction, like
SymbolTable::findMangle, for any symbol, not only exports. But in
practice, at least for lld, it would primarily end up used for linking
against DLLs.)
Differential Revision: https://reviews.llvm.org/D104532
2021-06-18 02:51:37 +08:00
|
|
|
if (config->autoImport) {
|
|
|
|
if (name.startswith("__imp_"))
|
|
|
|
continue;
|
|
|
|
// If we have an undefined symbol, but we have a lazy symbol we could
|
|
|
|
// load, load it.
|
|
|
|
Symbol *l = find(("__imp_" + name).str());
|
|
|
|
if (!l || l->pendingArchiveLoad || !l->isLazy())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
|
|
|
|
" for automatic import");
|
|
|
|
forceLazy(l);
|
|
|
|
}
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-27 01:56:45 +08:00
|
|
|
Defined *SymbolTable::impSymbol(StringRef name) {
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
if (name.startswith("__imp_"))
|
2019-07-27 01:56:45 +08:00
|
|
|
return nullptr;
|
|
|
|
return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
|
|
|
|
Defined *imp = impSymbol(name);
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
if (!imp)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Replace the reference directly to a variable with a reference
|
|
|
|
// to the import address table instead. This obviously isn't right,
|
2019-07-16 16:26:38 +08:00
|
|
|
// but we mark the symbol as isRuntimePseudoReloc, and a later pass
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
// will add runtime pseudo relocations for every relocation against
|
|
|
|
// this Symbol. The runtime pseudo relocation framework expects the
|
|
|
|
// reference itself to point at the IAT entry.
|
2018-09-26 14:13:39 +08:00
|
|
|
size_t impSize = 0;
|
|
|
|
if (isa<DefinedImportData>(imp)) {
|
|
|
|
log("Automatically importing " + name + " from " +
|
|
|
|
cast<DefinedImportData>(imp)->getDLLName());
|
|
|
|
impSize = sizeof(DefinedImportData);
|
|
|
|
} else if (isa<DefinedRegular>(imp)) {
|
|
|
|
log("Automatically importing " + name + " from " +
|
|
|
|
toString(cast<DefinedRegular>(imp)->file));
|
|
|
|
impSize = sizeof(DefinedRegular);
|
|
|
|
} else {
|
|
|
|
warn("unable to automatically import " + name + " from " + imp->getName() +
|
|
|
|
" from " + toString(cast<DefinedRegular>(imp)->file) +
|
|
|
|
"; unexpected symbol type");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
sym->replaceKeepingName(imp, impSize);
|
|
|
|
sym->isRuntimePseudoReloc = true;
|
2018-08-31 15:45:20 +08:00
|
|
|
|
|
|
|
// There may exist symbols named .refptr.<name> which only consist
|
|
|
|
// of a single pointer to <name>. If it turns out <name> is
|
|
|
|
// automatically imported, we don't need to keep the .refptr.<name>
|
|
|
|
// pointer at all, but redirect all accesses to it to the IAT entry
|
|
|
|
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
|
|
|
|
DefinedRegular *refptr =
|
|
|
|
dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
|
2018-10-12 01:45:58 +08:00
|
|
|
if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
|
2018-08-31 15:45:20 +08:00
|
|
|
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
|
2019-05-04 04:17:14 +08:00
|
|
|
if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
|
2018-09-18 15:21:55 +08:00
|
|
|
log("Replacing .refptr." + name + " with " + imp->getName());
|
|
|
|
refptr->getChunk()->live = false;
|
2018-09-26 14:13:39 +08:00
|
|
|
refptr->replaceKeepingName(imp, impSize);
|
2018-08-31 15:45:20 +08:00
|
|
|
}
|
|
|
|
}
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-27 01:56:45 +08:00
|
|
|
/// Helper function for reportUnresolvable and resolveRemainingUndefines.
|
|
|
|
/// This function emits an "undefined symbol" diagnostic for each symbol in
|
|
|
|
/// undefs. If localImports is not nullptr, it also emits a "locally
|
|
|
|
/// defined symbol imported" diagnostic for symbols in localImports.
|
|
|
|
/// objFiles and bitcodeFiles (if not nullptr) are used to report where
|
|
|
|
/// undefined symbols are referenced.
|
|
|
|
static void
|
|
|
|
reportProblemSymbols(const SmallPtrSetImpl<Symbol *> &undefs,
|
|
|
|
const DenseMap<Symbol *, Symbol *> *localImports,
|
|
|
|
const std::vector<ObjFile *> objFiles,
|
|
|
|
const std::vector<BitcodeFile *> *bitcodeFiles) {
|
|
|
|
|
|
|
|
// Return early if there is nothing to report (which should be
|
|
|
|
// the common case).
|
|
|
|
if (undefs.empty() && (!localImports || localImports->empty()))
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (Symbol *b : config->gcroot) {
|
|
|
|
if (undefs.count(b))
|
|
|
|
errorOrWarn("<root>: undefined symbol: " + toString(*b));
|
|
|
|
if (localImports)
|
|
|
|
if (Symbol *imp = localImports->lookup(b))
|
|
|
|
warn("<root>: locally defined symbol imported: " + toString(*imp) +
|
|
|
|
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<UndefinedDiag> undefDiags;
|
|
|
|
DenseMap<Symbol *, int> firstDiag;
|
|
|
|
|
|
|
|
auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
|
|
|
|
uint32_t symIndex = (uint32_t)-1;
|
|
|
|
for (Symbol *sym : symbols) {
|
|
|
|
++symIndex;
|
|
|
|
if (!sym)
|
|
|
|
continue;
|
|
|
|
if (undefs.count(sym)) {
|
|
|
|
auto it = firstDiag.find(sym);
|
|
|
|
if (it == firstDiag.end()) {
|
|
|
|
firstDiag[sym] = undefDiags.size();
|
|
|
|
undefDiags.push_back({sym, {{file, symIndex}}});
|
|
|
|
} else {
|
|
|
|
undefDiags[it->second].files.push_back({file, symIndex});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (localImports)
|
|
|
|
if (Symbol *imp = localImports->lookup(sym))
|
|
|
|
warn(toString(file) +
|
|
|
|
": locally defined symbol imported: " + toString(*imp) +
|
|
|
|
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
for (ObjFile *file : objFiles)
|
|
|
|
processFile(file, file->getSymbols());
|
|
|
|
|
|
|
|
if (bitcodeFiles)
|
|
|
|
for (BitcodeFile *file : *bitcodeFiles)
|
|
|
|
processFile(file, file->getSymbols());
|
|
|
|
|
|
|
|
for (const UndefinedDiag &undefDiag : undefDiags)
|
|
|
|
reportUndefinedSymbol(undefDiag);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SymbolTable::reportUnresolvable() {
|
|
|
|
SmallPtrSet<Symbol *, 8> undefs;
|
|
|
|
for (auto &i : symMap) {
|
|
|
|
Symbol *sym = i.second;
|
|
|
|
auto *undef = dyn_cast<Undefined>(sym);
|
2020-10-06 18:54:49 +08:00
|
|
|
if (!undef || sym->deferUndefined)
|
2019-07-27 01:56:45 +08:00
|
|
|
continue;
|
2019-08-02 10:51:20 +08:00
|
|
|
if (undef->getWeakAlias())
|
2019-07-27 01:56:45 +08:00
|
|
|
continue;
|
|
|
|
StringRef name = undef->getName();
|
|
|
|
if (name.startswith("__imp_")) {
|
|
|
|
Symbol *imp = find(name.substr(strlen("__imp_")));
|
|
|
|
if (imp && isa<Defined>(imp))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (name.contains("_PchSym_"))
|
|
|
|
continue;
|
2020-10-07 15:46:29 +08:00
|
|
|
if (config->autoImport && impSymbol(name))
|
2019-07-27 01:56:45 +08:00
|
|
|
continue;
|
|
|
|
undefs.insert(sym);
|
|
|
|
}
|
|
|
|
|
|
|
|
reportProblemSymbols(undefs,
|
|
|
|
/* localImports */ nullptr, ObjFile::instances,
|
|
|
|
&BitcodeFile::instances);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SymbolTable::resolveRemainingUndefines() {
|
2017-11-04 05:21:47 +08:00
|
|
|
SmallPtrSet<Symbol *, 8> undefs;
|
2017-12-15 15:49:21 +08:00
|
|
|
DenseMap<Symbol *, Symbol *> localImports;
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2017-11-28 07:16:06 +08:00
|
|
|
for (auto &i : symMap) {
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *sym = i.second;
|
2017-11-01 00:10:24 +08:00
|
|
|
auto *undef = dyn_cast<Undefined>(sym);
|
2015-05-29 03:09:30 +08:00
|
|
|
if (!undef)
|
|
|
|
continue;
|
2016-12-10 05:55:24 +08:00
|
|
|
if (!sym->isUsedInRegularObj)
|
|
|
|
continue;
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2015-06-25 10:21:44 +08:00
|
|
|
StringRef name = undef->getName();
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2015-07-04 13:28:41 +08:00
|
|
|
// A weak alias may have been resolved, so check for that.
|
|
|
|
if (Defined *d = undef->getWeakAlias()) {
|
2017-11-01 05:26:42 +08:00
|
|
|
// We want to replace Sym with D. However, we can't just blindly
|
|
|
|
// copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
|
|
|
|
// internal symbol, and internal symbols are stored as "unparented"
|
|
|
|
// Symbols. For that reason we need to check which type of symbol we
|
|
|
|
// are dealing with and copy the correct number of bytes.
|
|
|
|
if (isa<DefinedRegular>(d))
|
|
|
|
memcpy(sym, d, sizeof(DefinedRegular));
|
|
|
|
else if (isa<DefinedAbsolute>(d))
|
|
|
|
memcpy(sym, d, sizeof(DefinedAbsolute));
|
|
|
|
else
|
|
|
|
memcpy(sym, d, sizeof(SymbolUnion));
|
2015-07-04 13:28:41 +08:00
|
|
|
continue;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2015-06-25 10:21:44 +08:00
|
|
|
// If we can resolve a symbol by removing __imp_ prefix, do that.
|
|
|
|
// This odd rule is for compatibility with MSVC linker.
|
|
|
|
if (name.startswith("__imp_")) {
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *imp = find(name.substr(strlen("__imp_")));
|
2017-11-01 00:10:24 +08:00
|
|
|
if (imp && isa<Defined>(imp)) {
|
|
|
|
auto *d = cast<Defined>(imp);
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedLocalImport>(sym, name, d);
|
2017-11-01 00:10:24 +08:00
|
|
|
localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
|
2017-12-15 15:49:21 +08:00
|
|
|
localImports[sym] = d;
|
2015-06-25 10:21:44 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2018-11-08 22:42:37 +08:00
|
|
|
// We don't want to report missing Microsoft precompiled headers symbols.
|
|
|
|
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
|
|
|
|
if (name.contains("_PchSym_"))
|
|
|
|
continue;
|
|
|
|
|
2020-04-26 05:49:44 +08:00
|
|
|
if (config->autoImport && handleMinGWAutomaticImport(sym, name))
|
[COFF] Support MinGW automatic dllimport of data
Normally, in order to reference exported data symbols from a different
DLL, the declarations need to have the dllimport attribute, in order to
use the __imp_<var> symbol (which contains an address to the actual
variable) instead of the variable itself directly. This isn't an issue
in the same way for functions, since any reference to the function without
the dllimport attribute will end up as a reference to a thunk which loads
the actual target function from the import address table (IAT).
GNU ld, in MinGW environments, supports automatically importing data
symbols from DLLs, even if the references didn't have the appropriate
dllimport attribute. Since the PE/COFF format doesn't support the kind
of relocations that this would require, the MinGW's CRT startup code
has an custom framework of their own for manually fixing the missing
relocations once module is loaded and the target addresses in the IAT
are known.
For this to work, the linker (originall in GNU ld) creates a list of
remaining references needing fixup, which the runtime processes on
startup before handing over control to user code.
While this feature is rather controversial, it's one of the main features
allowing unix style libraries to be used on windows without any extra
porting effort.
Some sort of automatic fixing of data imports is also necessary for the
itanium C++ ABI on windows (as clang implements it right now) for importing
vtable pointers in certain cases, see D43184 for some discussion on that.
The runtime pseudo relocation handler supports 8/16/32/64 bit addresses,
either PC relative references (like IMAGE_REL_*_REL32*) or absolute
references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32,
IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a
relocation against the corresponding IAT slot. For the absolute references,
a normal base relocation is created, to update the embedded address
in case the image is loaded at a different address.
The list of runtime pseudo relocations contains the RVA of the
imported symbol (the IAT slot), the RVA of the location the relocation
should be applied to, and a size of the memory location. When the
relocations are fixed at runtime, the difference between the actual
IAT slot value and the IAT slot address is added to the reference,
doing the right thing for both absolute and relative references.
With this patch alone, things work fine for i386 binaries, and mostly
for x86_64 binaries, with feature parity with GNU ld. Despite this,
there are a few gotchas:
- References to data from within code works fine on both x86 architectures,
since their relocations consist of plain 32 or 64 bit absolute/relative
references. On ARM and AArch64, references to data doesn't consist of
a plain 32 or 64 bit embedded address or offset in the code. On ARMNT,
it's usually a MOVW+MOVT instruction pair represented by a
IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of
the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR
instruction pair with an even more complex encoding, storing a PC
relative address (with a range of +/- 4 GB). This could theoretically
be remedied by extending the runtime pseudo relocation handler with new
relocation types, to support these instruction encodings. This isn't an
issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64.
- For x86_64, if references in code are encoded as 32 bit PC relative
offsets, the runtime relocation will fail if the target turns out to be
out of range for a 32 bit offset.
- Fixing up the relocations at runtime requires making sections writable
if necessary, with the VirtualProtect function. In Windows Store/UWP apps,
this function is forbidden.
These limitations are addressed by a few later patches in lld and
llvm.
Differential Revision: https://reviews.llvm.org/D50917
llvm-svn: 340726
2018-08-27 16:43:31 +08:00
|
|
|
continue;
|
|
|
|
|
2015-06-29 03:35:15 +08:00
|
|
|
// Remaining undefined symbols are not fatal if /force is specified.
|
|
|
|
// They are replaced with dummy defined symbols.
|
2018-09-14 06:05:10 +08:00
|
|
|
if (config->forceUnresolved)
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedAbsolute>(sym, name, 0);
|
2017-11-01 00:10:24 +08:00
|
|
|
undefs.insert(sym);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
2017-10-07 07:43:54 +08:00
|
|
|
|
2019-07-27 01:56:45 +08:00
|
|
|
reportProblemSymbols(
|
|
|
|
undefs, config->warnLocallyDefinedImported ? &localImports : nullptr,
|
|
|
|
ObjFile::instances, /* bitcode files no longer needed */ nullptr);
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2018-09-07 04:23:56 +08:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
|
2018-08-03 04:39:19 +08:00
|
|
|
bool inserted = false;
|
2017-11-28 07:16:06 +08:00
|
|
|
Symbol *&sym = symMap[CachedHashStringRef(name)];
|
2018-08-03 04:39:19 +08:00
|
|
|
if (!sym) {
|
|
|
|
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
|
|
|
sym->isUsedInRegularObj = false;
|
|
|
|
sym->pendingArchiveLoad = false;
|
2020-10-06 18:54:49 +08:00
|
|
|
sym->canInline = true;
|
2018-08-03 04:39:19 +08:00
|
|
|
inserted = true;
|
|
|
|
}
|
|
|
|
return {sym, inserted};
|
2015-07-01 03:35:21 +08:00
|
|
|
}
|
|
|
|
|
2018-09-07 04:23:56 +08:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
|
|
|
|
std::pair<Symbol *, bool> result = insert(name);
|
|
|
|
if (!file || !isa<BitcodeFile>(file))
|
|
|
|
result.first->isUsedInRegularObj = true;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
|
|
|
|
bool isWeakAlias) {
|
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, f);
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || (s->isLazy() && isWeakAlias)) {
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<Undefined>(s, name);
|
2016-12-10 05:55:24 +08:00
|
|
|
return s;
|
|
|
|
}
|
2019-09-04 04:32:16 +08:00
|
|
|
if (s->isLazy())
|
|
|
|
forceLazy(s);
|
2016-12-10 05:55:24 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-09-04 04:32:16 +08:00
|
|
|
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
|
2016-12-10 05:55:24 +08:00
|
|
|
StringRef name = sym.getName();
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-09-07 04:23:56 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name);
|
2016-12-10 05:55:24 +08:00
|
|
|
if (wasInserted) {
|
2019-09-04 04:32:16 +08:00
|
|
|
replaceSymbol<LazyArchive>(s, f, sym);
|
2015-08-06 22:58:50 +08:00
|
|
|
return;
|
2015-06-01 10:58:15 +08:00
|
|
|
}
|
2017-11-01 00:10:24 +08:00
|
|
|
auto *u = dyn_cast<Undefined>(s);
|
2016-12-15 12:02:23 +08:00
|
|
|
if (!u || u->weakAlias || s->pendingArchiveLoad)
|
2016-12-10 05:55:24 +08:00
|
|
|
return;
|
2016-12-15 12:02:23 +08:00
|
|
|
s->pendingArchiveLoad = true;
|
2019-07-19 21:29:10 +08:00
|
|
|
f->addMember(sym);
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2019-09-04 04:32:16 +08:00
|
|
|
void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
|
|
|
if (wasInserted) {
|
|
|
|
replaceSymbol<LazyObject>(s, f, n);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
auto *u = dyn_cast<Undefined>(s);
|
|
|
|
if (!u || u->weakAlias || s->pendingArchiveLoad)
|
|
|
|
return;
|
|
|
|
s->pendingArchiveLoad = true;
|
|
|
|
f->fetch();
|
|
|
|
}
|
|
|
|
|
[LLD] [COFF] Support linking directly against DLLs in MinGW mode
GNU ld.bfd supports linking directly against DLLs without using an
import library, and some projects have picked up on this habit.
(There's no one single unsurmountable issue with using import
libraries, but this is a regularly surfacing missing feature.)
As long as one is linking by name (instead of by ordinal), the DLL
export table contains most of the information needed. (One can
inspect what section a symbol points at, to see if it's a function
or data symbol. The practical implementation of this loops over all
sections for each symbol, but as long as they're not very many, that
should hopefully be tolerable performance wise.)
One exception where the information in the DLL isn't entirely enough
is on i386 with stdcall functions; depending on how they're done,
the exported function name can be a plain undecorated name, while
the import library would contain the full decorated symbol name. This
issue is addressed separately in a different patch.
This is implemented mimicing the structure of a regular import library,
with one InputFile corresponding to the static archive that just adds
lazy symbols, which then are fetched when they are needed. When such
a symbol is fetched, we synthesize a coff_import_header structure
in memory and create a regular ImportFile out of it.
The implementation could be even smaller by just creating ImportFiles
for every symbol available immediately, but that would have the
drawback of actually ending up importing all symbols unless running
with GC enabled (and mingw mode defaults to having it disabled for
historical reasons).
Differential Revision: https://reviews.llvm.org/D104530
2021-06-16 21:59:46 +08:00
|
|
|
void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
|
|
|
|
StringRef n) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
|
|
|
std::tie(s, wasInserted) = insert(n);
|
|
|
|
if (wasInserted) {
|
|
|
|
replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
auto *u = dyn_cast<Undefined>(s);
|
|
|
|
if (!u || u->weakAlias || s->pendingArchiveLoad)
|
|
|
|
return;
|
|
|
|
s->pendingArchiveLoad = true;
|
|
|
|
f->makeImport(sym);
|
|
|
|
}
|
|
|
|
|
2019-10-18 18:43:15 +08:00
|
|
|
static std::string getSourceLocationBitcode(BitcodeFile *file) {
|
|
|
|
std::string res("\n>>> defined at ");
|
|
|
|
StringRef source = file->obj->getSourceFileName();
|
|
|
|
if (!source.empty())
|
|
|
|
res += source.str() + "\n>>> ";
|
|
|
|
res += toString(file);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
|
|
|
|
uint32_t offset, StringRef name) {
|
|
|
|
Optional<std::pair<StringRef, uint32_t>> fileLine;
|
|
|
|
if (sc)
|
|
|
|
fileLine = getFileLine(sc, offset);
|
|
|
|
if (!fileLine)
|
|
|
|
fileLine = file->getVariableLocation(name);
|
|
|
|
|
|
|
|
std::string res;
|
|
|
|
llvm::raw_string_ostream os(res);
|
|
|
|
os << "\n>>> defined at ";
|
|
|
|
if (fileLine)
|
|
|
|
os << fileLine->first << ":" << fileLine->second << "\n>>> ";
|
|
|
|
os << toString(file);
|
|
|
|
return os.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
|
|
|
|
uint32_t offset, StringRef name) {
|
2019-12-19 05:58:51 +08:00
|
|
|
if (!file)
|
|
|
|
return "";
|
2019-10-18 18:43:15 +08:00
|
|
|
if (auto *o = dyn_cast<ObjFile>(file))
|
|
|
|
return getSourceLocationObj(o, sc, offset, name);
|
|
|
|
if (auto *b = dyn_cast<BitcodeFile>(file))
|
|
|
|
return getSourceLocationBitcode(b);
|
|
|
|
return "\n>>> defined at " + toString(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct and print an error message in the form of:
|
|
|
|
//
|
|
|
|
// lld-link: error: duplicate symbol: foo
|
|
|
|
// >>> defined at bar.c:30
|
|
|
|
// >>> bar.o
|
|
|
|
// >>> defined at baz.c:563
|
|
|
|
// >>> baz.o
|
|
|
|
void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
|
|
|
|
SectionChunk *newSc,
|
|
|
|
uint32_t newSectionOffset) {
|
|
|
|
std::string msg;
|
|
|
|
llvm::raw_string_ostream os(msg);
|
|
|
|
os << "duplicate symbol: " << toString(*existing);
|
|
|
|
|
2019-12-19 05:58:51 +08:00
|
|
|
DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
|
2019-10-18 18:43:15 +08:00
|
|
|
if (d && isa<ObjFile>(d->getFile())) {
|
|
|
|
os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
|
|
|
|
existing->getName());
|
|
|
|
} else {
|
|
|
|
os << getSourceLocation(existing->getFile(), nullptr, 0, "");
|
|
|
|
}
|
|
|
|
os << getSourceLocation(newFile, newSc, newSectionOffset,
|
|
|
|
existing->getName());
|
2018-09-14 06:05:10 +08:00
|
|
|
|
|
|
|
if (config->forceMultiple)
|
2019-10-18 18:43:15 +08:00
|
|
|
warn(os.str());
|
2018-09-14 06:05:10 +08:00
|
|
|
else
|
2019-10-18 18:43:15 +08:00
|
|
|
error(os.str());
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
|
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
2016-12-10 05:55:24 +08:00
|
|
|
s->isUsedInRegularObj = true;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) || s->isLazy())
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedAbsolute>(s, n, sym);
|
2019-12-30 06:32:22 +08:00
|
|
|
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
|
2020-01-06 19:54:12 +08:00
|
|
|
if (da->getVA() != sym.getValue())
|
2019-12-30 06:32:22 +08:00
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
} else if (!isa<DefinedCOFF>(s))
|
2016-12-10 05:55:24 +08:00
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
|
|
|
}
|
2015-09-20 08:00:05 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
|
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
2016-12-10 05:55:24 +08:00
|
|
|
s->isUsedInRegularObj = true;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) || s->isLazy())
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedAbsolute>(s, n, va);
|
2019-12-30 06:32:22 +08:00
|
|
|
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
|
2020-01-06 19:54:12 +08:00
|
|
|
if (da->getVA() != va)
|
2019-12-30 06:32:22 +08:00
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
} else if (!isa<DefinedCOFF>(s))
|
2016-12-10 05:55:24 +08:00
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
2015-05-29 03:09:30 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
|
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
2016-12-10 05:55:24 +08:00
|
|
|
s->isUsedInRegularObj = true;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) || s->isLazy())
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedSynthetic>(s, n, c);
|
2017-11-01 00:10:24 +08:00
|
|
|
else if (!isa<DefinedCOFF>(s))
|
2016-12-10 05:55:24 +08:00
|
|
|
reportDuplicate(s, nullptr);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2017-11-28 09:30:07 +08:00
|
|
|
Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
|
2019-10-18 18:43:15 +08:00
|
|
|
const coff_symbol_generic *sym, SectionChunk *c,
|
|
|
|
uint32_t sectionOffset) {
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
2017-11-28 09:30:07 +08:00
|
|
|
if (wasInserted || !isa<DefinedRegular>(s))
|
|
|
|
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
|
|
|
|
/*IsExternal*/ true, sym, c);
|
|
|
|
else
|
2019-10-18 18:43:15 +08:00
|
|
|
reportDuplicate(s, f, c, sectionOffset);
|
2017-11-28 05:37:51 +08:00
|
|
|
return s;
|
2017-11-28 04:42:34 +08:00
|
|
|
}
|
|
|
|
|
2019-01-30 10:17:27 +08:00
|
|
|
std::pair<DefinedRegular *, bool>
|
2017-11-28 09:30:07 +08:00
|
|
|
SymbolTable::addComdat(InputFile *f, StringRef n,
|
|
|
|
const coff_symbol_generic *sym) {
|
|
|
|
Symbol *s;
|
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
2017-11-28 09:30:07 +08:00
|
|
|
if (wasInserted || !isa<DefinedRegular>(s)) {
|
|
|
|
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
|
|
|
|
/*IsExternal*/ true, sym, nullptr);
|
2019-01-30 10:17:27 +08:00
|
|
|
return {cast<DefinedRegular>(s), true};
|
2017-11-28 09:30:07 +08:00
|
|
|
}
|
2019-01-30 10:17:27 +08:00
|
|
|
auto *existingSymbol = cast<DefinedRegular>(s);
|
2019-07-10 17:10:01 +08:00
|
|
|
if (!existingSymbol->isCOMDAT)
|
2017-11-28 09:30:07 +08:00
|
|
|
reportDuplicate(s, f);
|
2019-01-30 10:17:27 +08:00
|
|
|
return {existingSymbol, false};
|
2017-11-28 09:30:07 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
|
|
|
|
const coff_symbol_generic *sym, CommonChunk *c) {
|
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, f);
|
2017-11-01 00:10:24 +08:00
|
|
|
if (wasInserted || !isa<DefinedCOFF>(s))
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
|
2017-11-01 00:10:24 +08:00
|
|
|
else if (auto *dc = dyn_cast<DefinedCommon>(s))
|
2017-02-03 07:58:14 +08:00
|
|
|
if (size > dc->getSize())
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
|
2016-12-10 05:55:24 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2018-07-10 18:40:11 +08:00
|
|
|
Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(n, nullptr);
|
2016-12-10 05:55:24 +08:00
|
|
|
s->isUsedInRegularObj = true;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedImportData>(s, n, f);
|
2018-07-10 18:40:11 +08:00
|
|
|
return s;
|
2017-09-02 06:12:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
reportDuplicate(s, f);
|
|
|
|
return nullptr;
|
2016-12-10 05:55:24 +08:00
|
|
|
}
|
|
|
|
|
2018-07-10 18:40:11 +08:00
|
|
|
Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
|
|
|
|
uint16_t machine) {
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *s;
|
2016-12-10 05:55:24 +08:00
|
|
|
bool wasInserted;
|
2018-08-03 04:39:19 +08:00
|
|
|
std::tie(s, wasInserted) = insert(name, nullptr);
|
2016-12-10 05:55:24 +08:00
|
|
|
s->isUsedInRegularObj = true;
|
2019-09-04 04:32:16 +08:00
|
|
|
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
|
2017-11-04 06:48:47 +08:00
|
|
|
replaceSymbol<DefinedImportThunk>(s, name, id, machine);
|
2018-07-10 18:40:11 +08:00
|
|
|
return s;
|
2017-09-02 06:12:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
reportDuplicate(s, id->file);
|
|
|
|
return nullptr;
|
2015-07-03 06:52:33 +08:00
|
|
|
}
|
|
|
|
|
2019-08-23 03:40:07 +08:00
|
|
|
void SymbolTable::addLibcall(StringRef name) {
|
|
|
|
Symbol *sym = findUnderscore(name);
|
|
|
|
if (!sym)
|
|
|
|
return;
|
|
|
|
|
2019-09-04 04:32:16 +08:00
|
|
|
if (auto *l = dyn_cast<LazyArchive>(sym)) {
|
2019-08-23 03:40:07 +08:00
|
|
|
MemoryBufferRef mb = l->getMemberBuffer();
|
2019-09-04 04:32:16 +08:00
|
|
|
if (isBitcode(mb))
|
|
|
|
addUndefined(sym->getName());
|
|
|
|
} else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
|
|
|
|
if (isBitcode(o->file->mb))
|
2019-08-23 03:40:07 +08:00
|
|
|
addUndefined(sym->getName());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
std::vector<Chunk *> SymbolTable::getChunks() {
|
|
|
|
std::vector<Chunk *> res;
|
2017-07-27 08:45:26 +08:00
|
|
|
for (ObjFile *file : ObjFile::instances) {
|
2017-12-08 09:09:21 +08:00
|
|
|
ArrayRef<Chunk *> v = file->getChunks();
|
2015-05-29 03:09:30 +08:00
|
|
|
res.insert(res.end(), v.begin(), v.end());
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::find(StringRef name) {
|
2018-03-01 07:03:06 +08:00
|
|
|
return symMap.lookup(CachedHashStringRef(name));
|
2015-06-29 09:03:53 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::findUnderscore(StringRef name) {
|
2015-07-29 06:56:02 +08:00
|
|
|
if (config->machine == I386)
|
|
|
|
return find(("_" + name).str());
|
|
|
|
return find(name);
|
|
|
|
}
|
|
|
|
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
// Return all symbols that start with Prefix, possibly ignoring the first
|
|
|
|
// character of Prefix or the first character symbol.
|
|
|
|
std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
|
|
|
|
std::vector<Symbol *> syms;
|
2017-11-28 07:16:06 +08:00
|
|
|
for (auto pair : symMap) {
|
2016-12-12 06:15:30 +08:00
|
|
|
StringRef name = pair.first.val();
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
|
|
|
|
name.drop_front().startswith(prefix) ||
|
|
|
|
name.drop_front().startswith(prefix.drop_front())) {
|
|
|
|
syms.push_back(pair.second);
|
|
|
|
}
|
2015-07-14 10:58:13 +08:00
|
|
|
}
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return syms;
|
2015-07-14 10:58:13 +08:00
|
|
|
}
|
|
|
|
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
Symbol *SymbolTable::findMangle(StringRef name) {
|
2017-11-04 05:21:47 +08:00
|
|
|
if (Symbol *sym = find(name))
|
2017-11-01 00:10:24 +08:00
|
|
|
if (!isa<Undefined>(sym))
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return sym;
|
|
|
|
|
|
|
|
// Efficient fuzzy string lookup is impossible with a hash table, so iterate
|
|
|
|
// the symbol table once and collect all possibly matching symbols into this
|
|
|
|
// vector. Then compare each possibly matching symbol with each possible
|
|
|
|
// mangling.
|
|
|
|
std::vector<Symbol *> syms = getSymsWithPrefix(name);
|
|
|
|
auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
|
|
|
|
std::string prefix = t.str();
|
|
|
|
for (auto *s : syms)
|
|
|
|
if (s->getName().startswith(prefix))
|
|
|
|
return s;
|
|
|
|
return nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
// For non-x86, just look for C++ functions.
|
2015-07-26 05:54:50 +08:00
|
|
|
if (config->machine != I386)
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return findByPrefix("?" + name + "@@Y");
|
|
|
|
|
2015-07-14 10:58:13 +08:00
|
|
|
if (!name.startswith("_"))
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return nullptr;
|
2017-10-23 17:08:24 +08:00
|
|
|
// Search for x86 stdcall function.
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
if (Symbol *s = findByPrefix(name + "@"))
|
2017-10-23 17:08:24 +08:00
|
|
|
return s;
|
|
|
|
// Search for x86 fastcall function.
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
|
2017-10-23 17:08:24 +08:00
|
|
|
return s;
|
|
|
|
// Search for x86 vectorcall function.
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
|
2015-07-14 10:58:13 +08:00
|
|
|
return s;
|
|
|
|
// Search for x86 C++ non-member function.
|
[COFF] Fix /export:foo=bar when bar is a weak alias
Summary:
When handling exports from the command line or from .def files, the
linker does a "fuzzy" string lookup to allow finding mangled symbols.
However, when the symbol is re-exported under a new name, the linker has
to transfer the decorations from the exported symbol over to the new
name. This is implemented by taking the mangled symbol that was found in
the object and replacing the original symbol name with the export name.
Before this patch, LLD implemented the fuzzy search by adding an
undefined symbol with the unmangled name, and then during symbol
resolution, checking if similar mangled symbols had been added after the
last round of symbol resolution. If so, LLD makes the original symbol a
weak alias of the mangled symbol. Later, to get the original symbol
name, LLD would look through the weak alias and forward it on to the
import library writer, which copies the symbol decorations. This
approach doesn't work when bar is itself a weak alias, as is the case in
asan. It's especially bad when the aliasee of bar contains the string
"bar", consider "bar_default". In this case, we would end up exporting
the symbol "foo_default" when we should've exported just "foo".
To fix this, don't look through weak aliases to find the mangled name.
Save the mangled name earlier during fuzzy symbol lookup.
Fixes PR42074
Reviewers: mstorsjo, ruiu
Subscribers: thakis, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62984
llvm-svn: 362849
2019-06-08 06:05:12 +08:00
|
|
|
return findByPrefix("?" + name.substr(1) + "@@Y");
|
2015-06-29 06:16:41 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name) {
|
2017-11-01 00:10:24 +08:00
|
|
|
return addUndefined(name, nullptr, false);
|
2015-07-03 08:02:19 +08:00
|
|
|
}
|
|
|
|
|
2017-02-07 04:47:55 +08:00
|
|
|
void SymbolTable::addCombinedLTOObjects() {
|
2017-07-27 08:45:26 +08:00
|
|
|
if (BitcodeFile::instances.empty())
|
2017-02-07 04:47:55 +08:00
|
|
|
return;
|
2018-01-18 03:16:26 +08:00
|
|
|
|
|
|
|
ScopedTimer t(ltoTimer);
|
2020-04-16 00:54:22 +08:00
|
|
|
lto.reset(new BitcodeCompiler);
|
|
|
|
for (BitcodeFile *f : BitcodeFile::instances)
|
|
|
|
lto->add(*f);
|
|
|
|
for (InputFile *newObj : lto->compile()) {
|
|
|
|
ObjFile *obj = cast<ObjFile>(newObj);
|
2017-02-03 07:58:14 +08:00
|
|
|
obj->parse();
|
2017-07-27 08:45:26 +08:00
|
|
|
ObjFile::instances.push_back(obj);
|
2015-08-29 06:16:09 +08:00
|
|
|
}
|
2015-06-10 01:52:17 +08:00
|
|
|
}
|
2017-02-07 04:47:55 +08:00
|
|
|
|
2015-05-29 03:09:30 +08:00
|
|
|
} // namespace coff
|
|
|
|
} // namespace lld
|