2015-07-25 05:03:07 +08:00
|
|
|
//===- Symbols.cpp --------------------------------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-07-25 05:03:07 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "Symbols.h"
|
2015-07-29 06:58:25 +08:00
|
|
|
#include "InputFiles.h"
|
2016-02-02 05:00:35 +08:00
|
|
|
#include "InputSection.h"
|
|
|
|
#include "OutputSections.h"
|
2016-11-06 07:05:47 +08:00
|
|
|
#include "SyntheticSections.h"
|
2016-02-02 05:00:35 +08:00
|
|
|
#include "Target.h"
|
2016-12-20 01:01:01 +08:00
|
|
|
#include "Writer.h"
|
[lld] unified COFF and ELF error handling on new Common/ErrorHandler
Summary:
The COFF linker and the ELF linker have long had similar but separate
Error.h and Error.cpp files to implement error handling. This change
introduces new error handling code in Common/ErrorHandler.h, changes the
COFF and ELF linkers to use it, and removes the old, separate
implementations.
Reviewers: ruiu
Reviewed By: ruiu
Subscribers: smeenai, jyknight, emaste, sdardis, nemanjai, nhaehnle, mgorny, javed.absar, kbarton, fedor.sergeev, llvm-commits
Differential Revision: https://reviews.llvm.org/D39259
llvm-svn: 316624
2017-10-26 06:28:38 +08:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
2017-11-28 10:15:26 +08:00
|
|
|
#include "lld/Common/Strings.h"
|
2015-09-05 06:28:10 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
[ELF] Add --warn-backrefs-exclude=<glob>
D77522 changed --warn-backrefs to not warn for linking sandwich
problems (-ldef1 -lref -ldef2). This removed lots of false positives.
However, glibc still has some problems. libc.a defines some symbols
which are normally in libm.a and libpthread.a, e.g. __isnanl/raise.
For a linking order `-lm -lpthread -lc`, I have seen:
```
// different resolutions: GNU ld/gold select libc.a(s_isnan.o) as the definition
backward reference detected: __isnanl in libc.a(printf_fp.o) refers to libm.a(m_isnanl.o)
// different resolutions: GNU ld/gold select libc.a(raise.o) as the definition
backward reference detected: raise in libc.a(abort.o) refers to libpthread.a(pt-raise.o)
```
To facilitate deployment of --warn-backrefs, add --warn-backrefs-exclude= so that
certain known issues (which may be impractical to fix) can be whitelisted.
Deliberate choices:
* Not a comma-separated list (`--warn-backrefs-exclude=liba.a,libb.a`).
-Wl, splits the argument at commas, so we cannot use commas.
--export-dynamic-symbol is similar.
* Not in the style of `--warn-backrefs='*' --warn-backrefs=-liba.a`.
We just need exclusion, not inclusion. For easier build system
integration, we should avoid order dependency. With the current
scheme, we enable --warn-backrefs, and indivial libraries can add
--warn-backrefs-exclude=<glob> to their LDFLAGS.
Reviewed By: psmith
Differential Revision: https://reviews.llvm.org/D77512
2020-04-05 12:31:36 +08:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2016-10-12 16:19:30 +08:00
|
|
|
#include "llvm/Support/Path.h"
|
2016-11-23 12:57:25 +08:00
|
|
|
#include <cstring>
|
2015-09-05 06:28:10 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
2015-07-25 05:03:07 +08:00
|
|
|
using namespace llvm::object;
|
2015-09-02 07:12:52 +08:00
|
|
|
using namespace llvm::ELF;
|
2020-05-15 13:18:58 +08:00
|
|
|
using namespace lld;
|
|
|
|
using namespace lld::elf;
|
2015-07-25 05:03:07 +08:00
|
|
|
|
2019-10-07 16:31:18 +08:00
|
|
|
// Returns a symbol for an error message.
|
|
|
|
static std::string demangle(StringRef symName) {
|
|
|
|
if (elf::config->demangle)
|
|
|
|
return demangleItanium(symName);
|
2020-01-29 03:23:46 +08:00
|
|
|
return std::string(symName);
|
2019-10-07 16:31:18 +08:00
|
|
|
}
|
2015-07-25 05:03:07 +08:00
|
|
|
|
2020-05-15 13:18:58 +08:00
|
|
|
std::string lld::toString(const elf::Symbol &sym) {
|
2020-03-29 06:48:38 +08:00
|
|
|
StringRef name = sym.getName();
|
|
|
|
std::string ret = demangle(name);
|
|
|
|
|
2020-12-02 00:54:01 +08:00
|
|
|
const char *suffix = sym.getVersionSuffix();
|
|
|
|
if (*suffix == '@')
|
|
|
|
ret += suffix;
|
2020-03-29 06:48:38 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-05-15 13:18:58 +08:00
|
|
|
std::string lld::toELFString(const Archive::Symbol &b) {
|
2019-10-07 16:31:18 +08:00
|
|
|
return demangle(b.getName());
|
|
|
|
}
|
|
|
|
|
2017-11-06 12:35:31 +08:00
|
|
|
Defined *ElfSym::bss;
|
|
|
|
Defined *ElfSym::etext1;
|
|
|
|
Defined *ElfSym::etext2;
|
|
|
|
Defined *ElfSym::edata1;
|
|
|
|
Defined *ElfSym::edata2;
|
|
|
|
Defined *ElfSym::end1;
|
|
|
|
Defined *ElfSym::end2;
|
|
|
|
Defined *ElfSym::globalOffsetTable;
|
|
|
|
Defined *ElfSym::mipsGp;
|
|
|
|
Defined *ElfSym::mipsGpDisp;
|
|
|
|
Defined *ElfSym::mipsLocalGp;
|
2019-01-16 02:30:23 +08:00
|
|
|
Defined *ElfSym::relaIpltStart;
|
2018-04-20 00:54:30 +08:00
|
|
|
Defined *ElfSym::relaIpltEnd;
|
2019-06-14 10:14:53 +08:00
|
|
|
Defined *ElfSym::riscvGlobalPointer;
|
2019-05-30 18:00:20 +08:00
|
|
|
Defined *ElfSym::tlsModuleBase;
|
2020-10-23 06:26:52 +08:00
|
|
|
DenseMap<const Symbol *, std::pair<const InputFile *, const InputFile *>>
|
|
|
|
elf::backwardReferences;
|
2021-09-21 00:52:30 +08:00
|
|
|
SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
|
|
|
|
elf::whyExtract;
|
[Coding style change] Rename variables so that they start with a lowercase letter
This patch is mechanically generated by clang-llvm-rename tool that I wrote
using Clang Refactoring Engine just for creating this patch. You can see the
source code of the tool at https://reviews.llvm.org/D64123. There's no manual
post-processing; you can generate the same patch by re-running the tool against
lld's code base.
Here is the main discussion thread to change the LLVM coding style:
https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html
In the discussion thread, I proposed we use lld as a testbed for variable
naming scheme change, and this patch does that.
I chose to rename variables so that they are in camelCase, just because that
is a minimal change to make variables to start with a lowercase letter.
Note to downstream patch maintainers: if you are maintaining a downstream lld
repo, just rebasing ahead of this commit would cause massive merge conflicts
because this patch essentially changes every line in the lld subdirectory. But
there's a remedy.
clang-llvm-rename tool is a batch tool, so you can rename variables in your
downstream repo with the tool. Given that, here is how to rebase your repo to
a commit after the mass renaming:
1. rebase to the commit just before the mass variable renaming,
2. apply the tool to your downstream repo to mass-rename variables locally, and
3. rebase again to the head.
Most changes made by the tool should be identical for a downstream repo and
for the head, so at the step 3, almost all changes should be merged and
disappear. I'd expect that there would be some lines that you need to merge by
hand, but that shouldn't be too many.
Differential Revision: https://reviews.llvm.org/D64121
llvm-svn: 365595
2019-07-10 13:00:37 +08:00
|
|
|
|
2021-12-13 11:12:01 +08:00
|
|
|
static uint64_t getSymVA(const Symbol &sym, int64_t addend) {
|
2017-11-04 08:31:04 +08:00
|
|
|
switch (sym.kind()) {
|
2017-11-06 12:35:31 +08:00
|
|
|
case Symbol::DefinedKind: {
|
|
|
|
auto &d = cast<Defined>(sym);
|
2017-03-09 06:36:28 +08:00
|
|
|
SectionBase *isec = d.section;
|
2016-02-02 05:00:35 +08:00
|
|
|
|
|
|
|
// This is an absolute symbol.
|
2016-12-15 08:57:53 +08:00
|
|
|
if (!isec)
|
2016-04-04 22:04:16 +08:00
|
|
|
return d.value;
|
2016-02-02 05:00:35 +08:00
|
|
|
|
2019-06-26 16:09:08 +08:00
|
|
|
assert(isec != &InputSection::discarded);
|
2018-04-20 01:26:50 +08:00
|
|
|
isec = isec->repl;
|
|
|
|
|
2017-03-08 23:21:32 +08:00
|
|
|
uint64_t offset = d.value;
|
2017-02-28 16:32:56 +08:00
|
|
|
|
|
|
|
// An object in an SHF_MERGE section might be referenced via a
|
|
|
|
// section symbol (as a hack for reducing the number of local
|
|
|
|
// symbols).
|
2017-03-01 12:44:04 +08:00
|
|
|
// Depending on the addend, the reference via a section symbol
|
|
|
|
// refers to a different object in the merge section.
|
|
|
|
// Since the objects in the merge section are not necessarily
|
|
|
|
// contiguous in the output, the addend can thus affect the final
|
|
|
|
// VA in a non-linear way.
|
|
|
|
// To make this work, we incorporate the addend into the section
|
|
|
|
// offset (and zero out the addend for later processing) so that
|
|
|
|
// we find the right object in the section.
|
2021-12-13 11:12:01 +08:00
|
|
|
if (d.isSection())
|
2016-03-11 22:21:37 +08:00
|
|
|
offset += addend;
|
2017-02-28 16:32:56 +08:00
|
|
|
|
2017-02-28 17:01:58 +08:00
|
|
|
// In the typical case, this is actually very simple and boils
|
|
|
|
// down to adding together 3 numbers:
|
|
|
|
// 1. The address of the output section.
|
|
|
|
// 2. The offset of the input section within the output section.
|
|
|
|
// 3. The offset within the input section (this addition happens
|
|
|
|
// inside InputSection::getOffset).
|
|
|
|
//
|
|
|
|
// If you understand the data structures involved with this next
|
|
|
|
// line (and how they get built), then you have a pretty good
|
|
|
|
// understanding of the linker.
|
2018-03-24 08:35:11 +08:00
|
|
|
uint64_t va = isec->getVA(offset);
|
2021-12-13 11:12:01 +08:00
|
|
|
if (d.isSection())
|
|
|
|
va -= addend;
|
2017-02-28 17:01:58 +08:00
|
|
|
|
2019-02-19 18:36:58 +08:00
|
|
|
// MIPS relocatable files can mix regular and microMIPS code.
|
|
|
|
// Linker needs to distinguish such code. To do so microMIPS
|
|
|
|
// symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other`
|
2020-01-23 13:39:16 +08:00
|
|
|
// field. Unfortunately, the `MIPS::relocate()` method has
|
2019-02-19 18:36:58 +08:00
|
|
|
// a symbol value only. To pass type of the symbol (regular/microMIPS)
|
|
|
|
// to that routine as well as other places where we write
|
|
|
|
// a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry`
|
|
|
|
// field etc) do the same trick as compiler uses to mark microMIPS
|
|
|
|
// for CPU - set the less-significant bit.
|
|
|
|
if (config->emachine == EM_MIPS && isMicroMips() &&
|
2021-12-15 08:28:41 +08:00
|
|
|
((sym.stOther & STO_MIPS_MICROMIPS) || sym.needsCopy))
|
2019-02-19 18:36:58 +08:00
|
|
|
va |= 1;
|
|
|
|
|
2016-10-04 16:52:51 +08:00
|
|
|
if (d.isTls() && !config->relocatable) {
|
[ELF] Set Out::TlsPhdr earlier for encoding packed reloc tables
Summary:
For --pack-dyn-relocs=android, finalizeSections calls
LinkerScript::assignAddresses and
AndroidPackedRelocationSection::updateAllocSize in a loop,
where assignAddresses lays out the ELF image, then updateAllocSize
determines the size of the Android packed relocation table by encoding it.
Encoding the table requires knowing the values of relocation addends.
To get the addend of a TLS relocation, updateAllocSize can call getSymVA
on a TLS symbol before setPhdrs has initialized Out::TlsPhdr, producing an
error:
<file> has an STT_TLS symbol but doesn't have an SHF_TLS section
Fix the problem by initializing Out::TlsPhdr immediately after the program
headers are created. The segment's p_vaddr field isn't initialized until
setPhdrs, so use FirstSec->Addr, which is what setPhdrs would use.
FirstSec will typically refer to the .tdata or .tbss output section, whose
(tentative) address was computed by assignAddresses.
Android currently avoids this problem because it uses emutls and doesn't
support ELF TLS. This problem doesn't apply to --pack-dyn-relocs=relr
because SHR_RELR only handles relative relocations without explicit addends
or info.
Fixes https://bugs.llvm.org/show_bug.cgi?id=37841.
Reviewers: ruiu, pcc, chh, javed.absar, espindola
Subscribers: emaste, arichardson, llvm-commits, srhines
Differential Revision: https://reviews.llvm.org/D51671
llvm-svn: 342432
2018-09-18 08:24:48 +08:00
|
|
|
// Use the address of the TLS segment's first section rather than the
|
|
|
|
// segment's address, because segment addresses aren't initialized until
|
|
|
|
// after sections are finalized. (e.g. Measuring the size of .rela.dyn
|
|
|
|
// for Android relocation packing requires knowing TLS symbol addresses
|
|
|
|
// during section finalization.)
|
|
|
|
if (!Out::tlsPhdr || !Out::tlsPhdr->firstSec)
|
2017-11-30 06:47:35 +08:00
|
|
|
fatal(toString(d.file) +
|
2017-07-14 08:22:46 +08:00
|
|
|
" has an STT_TLS symbol but doesn't have an SHF_TLS section");
|
[ELF] Set Out::TlsPhdr earlier for encoding packed reloc tables
Summary:
For --pack-dyn-relocs=android, finalizeSections calls
LinkerScript::assignAddresses and
AndroidPackedRelocationSection::updateAllocSize in a loop,
where assignAddresses lays out the ELF image, then updateAllocSize
determines the size of the Android packed relocation table by encoding it.
Encoding the table requires knowing the values of relocation addends.
To get the addend of a TLS relocation, updateAllocSize can call getSymVA
on a TLS symbol before setPhdrs has initialized Out::TlsPhdr, producing an
error:
<file> has an STT_TLS symbol but doesn't have an SHF_TLS section
Fix the problem by initializing Out::TlsPhdr immediately after the program
headers are created. The segment's p_vaddr field isn't initialized until
setPhdrs, so use FirstSec->Addr, which is what setPhdrs would use.
FirstSec will typically refer to the .tdata or .tbss output section, whose
(tentative) address was computed by assignAddresses.
Android currently avoids this problem because it uses emutls and doesn't
support ELF TLS. This problem doesn't apply to --pack-dyn-relocs=relr
because SHR_RELR only handles relative relocations without explicit addends
or info.
Fixes https://bugs.llvm.org/show_bug.cgi?id=37841.
Reviewers: ruiu, pcc, chh, javed.absar, espindola
Subscribers: emaste, arichardson, llvm-commits, srhines
Differential Revision: https://reviews.llvm.org/D51671
llvm-svn: 342432
2018-09-18 08:24:48 +08:00
|
|
|
return va - Out::tlsPhdr->firstSec->addr;
|
2016-10-04 16:52:51 +08:00
|
|
|
}
|
2016-03-11 22:21:37 +08:00
|
|
|
return va;
|
2016-02-02 05:00:35 +08:00
|
|
|
}
|
2018-04-27 01:58:58 +08:00
|
|
|
case Symbol::SharedKind:
|
2017-11-04 05:21:47 +08:00
|
|
|
case Symbol::UndefinedKind:
|
2016-02-02 05:00:35 +08:00
|
|
|
return 0;
|
2017-11-04 05:21:47 +08:00
|
|
|
case Symbol::LazyArchiveKind:
|
|
|
|
case Symbol::LazyObjectKind:
|
2018-09-12 07:00:36 +08:00
|
|
|
assert(sym.isUsedInRegularObj && "lazy symbol reached writer");
|
|
|
|
return 0;
|
2019-05-16 11:29:03 +08:00
|
|
|
case Symbol::CommonKind:
|
|
|
|
llvm_unreachable("common symbol reached writer");
|
2018-10-13 02:29:18 +08:00
|
|
|
case Symbol::PlaceholderKind:
|
|
|
|
llvm_unreachable("placeholder symbol reached writer");
|
2016-02-02 05:00:35 +08:00
|
|
|
}
|
2016-03-12 16:31:34 +08:00
|
|
|
llvm_unreachable("invalid symbol kind");
|
2016-02-02 05:00:35 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint64_t Symbol::getVA(int64_t addend) const {
|
2021-12-13 11:12:01 +08:00
|
|
|
return getSymVA(*this, addend) + addend;
|
2016-03-11 20:19:05 +08:00
|
|
|
}
|
|
|
|
|
2019-02-14 05:49:55 +08:00
|
|
|
uint64_t Symbol::getGotVA() const {
|
|
|
|
if (gotInIgot)
|
|
|
|
return in.igotPlt->getVA() + getGotPltOffset();
|
|
|
|
return in.got->getVA() + getGotOffset();
|
|
|
|
}
|
2016-04-07 23:20:56 +08:00
|
|
|
|
2021-05-17 07:13:00 +08:00
|
|
|
uint64_t Symbol::getGotOffset() const {
|
|
|
|
return gotIndex * target->gotEntrySize;
|
|
|
|
}
|
2016-02-02 05:00:35 +08:00
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint64_t Symbol::getGotPltVA() const {
|
2019-02-14 05:49:55 +08:00
|
|
|
if (isInIplt)
|
2018-09-26 03:26:58 +08:00
|
|
|
return in.igotPlt->getVA() + getGotPltOffset();
|
|
|
|
return in.gotPlt->getVA() + getGotPltOffset();
|
2016-04-07 23:20:56 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint64_t Symbol::getGotPltOffset() const {
|
2019-02-14 05:49:55 +08:00
|
|
|
if (isInIplt)
|
2021-05-17 07:13:00 +08:00
|
|
|
return pltIndex * target->gotEntrySize;
|
|
|
|
return (pltIndex + target->gotPltHeaderEntriesNum) * target->gotEntrySize;
|
2016-02-02 05:00:35 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint64_t Symbol::getPltVA() const {
|
2019-12-15 06:17:35 +08:00
|
|
|
uint64_t outVA = isInIplt
|
|
|
|
? in.iplt->getVA() + pltIndex * target->ipltEntrySize
|
|
|
|
: in.plt->getVA() + in.plt->headerSize +
|
|
|
|
pltIndex * target->pltEntrySize;
|
|
|
|
|
2019-02-19 18:36:58 +08:00
|
|
|
// While linking microMIPS code PLT code are always microMIPS
|
|
|
|
// code. Set the less-significant bit to track that fact.
|
|
|
|
// See detailed comment in the `getSymVA` function.
|
|
|
|
if (config->emachine == EM_MIPS && isMicroMips())
|
|
|
|
outVA |= 1;
|
|
|
|
return outVA;
|
2018-04-27 01:58:58 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint64_t Symbol::getSize() const {
|
2017-11-06 12:35:31 +08:00
|
|
|
if (const auto *dr = dyn_cast<Defined>(this))
|
2016-04-04 22:04:16 +08:00
|
|
|
return dr->size;
|
2018-07-17 19:35:28 +08:00
|
|
|
return cast<SharedSymbol>(this)->size;
|
2016-02-03 08:12:24 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
OutputSection *Symbol::getOutputSection() const {
|
2017-11-06 12:35:31 +08:00
|
|
|
if (auto *s = dyn_cast<Defined>(this)) {
|
2017-12-14 06:59:23 +08:00
|
|
|
if (auto *sec = s->section)
|
2021-12-21 16:39:16 +08:00
|
|
|
return sec->getOutputSection();
|
2017-02-28 12:02:42 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-11-23 13:48:40 +08:00
|
|
|
// If a symbol name contains '@', the characters after that is
|
|
|
|
// a symbol version name. This function parses that.
|
2017-11-04 05:21:47 +08:00
|
|
|
void Symbol::parseSymbolVersion() {
|
2021-08-05 14:52:55 +08:00
|
|
|
// Return if localized by a local: pattern in a version script.
|
|
|
|
if (versionId == VER_NDX_LOCAL)
|
|
|
|
return;
|
2016-11-23 13:48:40 +08:00
|
|
|
StringRef s = getName();
|
|
|
|
size_t pos = s.find('@');
|
2021-12-16 15:59:55 +08:00
|
|
|
if (pos == StringRef::npos)
|
2016-11-23 13:48:40 +08:00
|
|
|
return;
|
|
|
|
StringRef verstr = s.substr(pos + 1);
|
|
|
|
if (verstr.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Truncate the symbol name so that it doesn't include the version string.
|
2018-04-26 05:44:37 +08:00
|
|
|
nameSize = pos;
|
2016-11-23 13:48:40 +08:00
|
|
|
|
2017-01-18 00:08:06 +08:00
|
|
|
// If this is not in this DSO, it is not a definition.
|
2017-11-06 12:39:07 +08:00
|
|
|
if (!isDefined())
|
2017-01-07 06:30:35 +08:00
|
|
|
return;
|
|
|
|
|
2016-11-23 13:48:40 +08:00
|
|
|
// '@@' in a symbol name means the default version.
|
|
|
|
// It is usually the most recent one.
|
|
|
|
bool isDefault = (verstr[0] == '@');
|
|
|
|
if (isDefault)
|
|
|
|
verstr = verstr.substr(1);
|
|
|
|
|
2019-08-05 22:31:39 +08:00
|
|
|
for (const VersionDefinition &ver : namedVersionDefs()) {
|
2016-11-23 13:48:40 +08:00
|
|
|
if (ver.name != verstr)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (isDefault)
|
2017-11-01 00:07:41 +08:00
|
|
|
versionId = ver.id;
|
2016-11-23 13:48:40 +08:00
|
|
|
else
|
2017-11-01 00:07:41 +08:00
|
|
|
versionId = ver.id | VERSYM_HIDDEN;
|
2016-11-23 13:48:40 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// It is an error if the specified version is not defined.
|
2017-07-04 21:19:13 +08:00
|
|
|
// Usually version script is not provided when linking executable,
|
|
|
|
// but we may still want to override a versioned symbol from DSO,
|
2018-05-14 18:13:56 +08:00
|
|
|
// so we do not report error in this case. We also do not error
|
|
|
|
// if the symbol has a local version as it won't be in the dynamic
|
|
|
|
// symbol table.
|
|
|
|
if (config->shared && versionId != VER_NDX_LOCAL)
|
2017-11-30 06:47:35 +08:00
|
|
|
error(toString(file) + ": symbol " + s + " has undefined version " +
|
2017-07-04 21:19:13 +08:00
|
|
|
verstr);
|
2016-11-23 13:48:40 +08:00
|
|
|
}
|
|
|
|
|
2021-11-27 02:58:50 +08:00
|
|
|
void Symbol::extract() const {
|
2021-11-27 06:10:55 +08:00
|
|
|
if (auto *sym = dyn_cast<LazyArchive>(this))
|
2021-11-27 02:58:50 +08:00
|
|
|
cast<ArchiveFile>(sym->file)->extract(sym->sym);
|
2021-11-27 06:10:55 +08:00
|
|
|
else
|
|
|
|
cast<LazyObjFile>(this->file)->extract();
|
2019-05-16 10:14:00 +08:00
|
|
|
}
|
2016-04-08 03:24:51 +08:00
|
|
|
|
2018-08-09 07:48:12 +08:00
|
|
|
MemoryBufferRef LazyArchive::getMemberBuffer() {
|
2019-07-24 03:00:01 +08:00
|
|
|
Archive::Child c =
|
|
|
|
CHECK(sym.getMember(),
|
|
|
|
"could not get the member for symbol " + toELFString(sym));
|
2018-08-09 07:48:12 +08:00
|
|
|
|
|
|
|
return CHECK(c.getMemoryBufferRef(),
|
|
|
|
"could not get the buffer for the member defining symbol " +
|
2019-07-24 03:00:01 +08:00
|
|
|
toELFString(sym));
|
2018-08-09 07:48:12 +08:00
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
uint8_t Symbol::computeBinding() const {
|
2017-01-11 01:08:13 +08:00
|
|
|
if (config->relocatable)
|
|
|
|
return binding;
|
2019-08-12 01:03:00 +08:00
|
|
|
if ((visibility != STV_DEFAULT && visibility != STV_PROTECTED) ||
|
[lld:elf] Weaken the requirement for a computed binding to be STB_LOCAL
Given the following scenario:
```
// Cat.cpp
struct Animal { virtual void makeNoise() const = 0; };
struct Cat : Animal { void makeNoise() const override; };
extern "C" int puts(char const *);
void Cat::makeNoise() const { puts("Meow"); }
void doThingWithCat(Animal *a) { static_cast<Cat *>(a)->makeNoise(); }
// CatUser.cpp
struct Animal { virtual void makeNoise() const = 0; };
struct Cat : Animal { void makeNoise() const override; };
void doThingWithCat(Animal *a);
void useDoThingWithCat() {
Cat *d = new Cat;
doThingWithCat(d);
}
// cat.ver
{
global: _Z17useDoThingWithCatv;
local: *;
};
$ clang++ Cat.cpp CatUser.cpp -fpic -flto=thin -fwhole-program-vtables
-shared -O3 -fuse-ld=lld -Wl,--lto-whole-program-visibility
-Wl,--version-script,cat.ver
```
We cannot devirtualize `Cat::makeNoise`. The issue is complex:
Due to `-fsplit-lto-unit` and usage of type metadata, we place the Cat
vtable declaration into module 0 and the Cat vtable definition with type
metadata into module 1, causing duplicate entries (Undefined followed by
Defined) in the `lto::InputFile::symbols()` output.
In `BitcodeFile::parse`, after processing the `Undefined` then the
`Defined`, the final state is `Defined`.
In `BitcodeCompiler::add`, for the first symbol, `computeBinding`
returns `STB_LOCAL`, then we reset it to `Undefined` because it is
prevailing (`versionId` is `preserved`). For the second symbol, because
the state is now `Undefined`, `computeBinding` returns `STB_GLOBAL`,
causing `ExportDynamic` to be true and suppressing devirtualization.
In D77280, the `computeBinding` change used a stricter `isDefined()`
condition to make weak``Lazy` symbol work.
This patch relaxes the condition to weaker `!isLazy()` to keep it
working while making the devirtualization work as well.
Differential Revision: https://reviews.llvm.org/D98686
2021-03-16 16:33:50 +08:00
|
|
|
(versionId == VER_NDX_LOCAL && !isLazy()))
|
2017-01-11 01:08:13 +08:00
|
|
|
return STB_LOCAL;
|
2018-02-03 05:44:06 +08:00
|
|
|
if (!config->gnuUnique && binding == STB_GNU_UNIQUE)
|
2017-01-11 01:08:13 +08:00
|
|
|
return STB_GLOBAL;
|
|
|
|
return binding;
|
|
|
|
}
|
|
|
|
|
2017-11-04 05:21:47 +08:00
|
|
|
bool Symbol::includeInDynsym() const {
|
2017-09-16 02:05:02 +08:00
|
|
|
if (!config->hasDynSymTab)
|
|
|
|
return false;
|
2017-01-11 01:08:13 +08:00
|
|
|
if (computeBinding() == STB_LOCAL)
|
2016-04-22 05:44:25 +08:00
|
|
|
return false;
|
2020-01-10 07:53:52 +08:00
|
|
|
if (!isDefined() && !isCommon())
|
2020-01-24 03:52:03 +08:00
|
|
|
// This should unconditionally return true, unfortunately glibc -static-pie
|
|
|
|
// expects undefined weak symbols not to exist in .dynsym, e.g.
|
|
|
|
// __pthread_mutex_lock reference in _dl_add_to_namespace_list,
|
|
|
|
// __pthread_initialize_minimal reference in csu/libc-start.c.
|
|
|
|
return !(config->noDynamicLinker && isUndefWeak());
|
2019-06-25 14:58:07 +08:00
|
|
|
|
2020-01-10 07:53:52 +08:00
|
|
|
return exportDynamic || inDynamicList;
|
2016-04-22 04:35:25 +08:00
|
|
|
}
|
2016-07-18 01:50:09 +08:00
|
|
|
|
|
|
|
// Print out a log message for --trace-symbol.
|
2020-05-15 13:18:58 +08:00
|
|
|
void elf::printTraceSymbol(const Symbol *sym) {
|
2017-02-22 07:22:56 +08:00
|
|
|
std::string s;
|
2017-11-01 00:07:41 +08:00
|
|
|
if (sym->isUndefined())
|
2017-02-22 07:22:56 +08:00
|
|
|
s = ": reference to ";
|
2017-11-01 00:07:41 +08:00
|
|
|
else if (sym->isLazy())
|
2017-10-28 02:30:11 +08:00
|
|
|
s = ": lazy definition of ";
|
2017-11-01 00:07:41 +08:00
|
|
|
else if (sym->isShared())
|
2017-10-28 02:30:11 +08:00
|
|
|
s = ": shared definition of ";
|
2019-05-16 11:29:03 +08:00
|
|
|
else if (sym->isCommon())
|
2017-11-06 12:33:58 +08:00
|
|
|
s = ": common definition of ";
|
2016-07-18 01:50:09 +08:00
|
|
|
else
|
2017-02-22 07:22:56 +08:00
|
|
|
s = ": definition of ";
|
|
|
|
|
2017-11-01 00:07:41 +08:00
|
|
|
message(toString(sym->file) + s + sym->getName());
|
2016-07-18 01:50:09 +08:00
|
|
|
}
|
|
|
|
|
2021-09-21 00:52:30 +08:00
|
|
|
static void recordWhyExtract(const InputFile *reference,
|
|
|
|
const InputFile &extracted, const Symbol &sym) {
|
|
|
|
whyExtract.emplace_back(toString(reference), &extracted, sym);
|
|
|
|
}
|
|
|
|
|
2020-05-15 13:18:58 +08:00
|
|
|
void elf::maybeWarnUnorderableSymbol(const Symbol *sym) {
|
2018-04-18 07:30:05 +08:00
|
|
|
if (!config->warnSymbolOrdering)
|
|
|
|
return;
|
2018-04-26 09:38:29 +08:00
|
|
|
|
[ELF] Don't warn on undefined symbols if UnresolvedPolicy::Ignore is used
Summary:
Add a condition UnresolvedPolicy::Ignore to elf::warnUnorderedSymbol to suppress Sym->isUndefined() warnings from both
1) --symbol-ordering-file=
2) .llvm.call-graph-profile
If --unresolved-symbols=ignore-all is used,
no "undefined symbol" error/warning is emitted. It makes sense to not warn unorderable symbols.
Otherwise,
If an executable is linked, the default policy UnresolvedPolicy::ErrorOrWarn will issue a "undefined symbol" error. The unorderable symbol warning is redundant.
If a shared object is linked, it is possible that only part of object files are used and some symbols are left undefined. The warning is not very necessary.
In particular for .llvm.call-graph-profile, when linking a shared object, a call graph profile may contain undefined symbols. This case generated a warning before but it will be suppressed by this patch.
Reviewers: ruiu, davidxl, espindola
Reviewed By: ruiu
Subscribers: grimar, emaste, arichardson, llvm-commits
Differential Revision: https://reviews.llvm.org/D53044
llvm-svn: 344195
2018-10-11 06:48:57 +08:00
|
|
|
// If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning
|
|
|
|
// is emitted. It makes sense to not warn on undefined symbols.
|
|
|
|
//
|
|
|
|
// Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,
|
|
|
|
// but we don't have to be compatible here.
|
|
|
|
if (sym->isUndefined() &&
|
|
|
|
config->unresolvedSymbols == UnresolvedPolicy::Ignore)
|
|
|
|
return;
|
|
|
|
|
2018-04-18 07:30:05 +08:00
|
|
|
const InputFile *file = sym->file;
|
|
|
|
auto *d = dyn_cast<Defined>(sym);
|
[Coding style change] Rename variables so that they start with a lowercase letter
This patch is mechanically generated by clang-llvm-rename tool that I wrote
using Clang Refactoring Engine just for creating this patch. You can see the
source code of the tool at https://reviews.llvm.org/D64123. There's no manual
post-processing; you can generate the same patch by re-running the tool against
lld's code base.
Here is the main discussion thread to change the LLVM coding style:
https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html
In the discussion thread, I proposed we use lld as a testbed for variable
naming scheme change, and this patch does that.
I chose to rename variables so that they are in camelCase, just because that
is a minimal change to make variables to start with a lowercase letter.
Note to downstream patch maintainers: if you are maintaining a downstream lld
repo, just rebasing ahead of this commit would cause massive merge conflicts
because this patch essentially changes every line in the lld subdirectory. But
there's a remedy.
clang-llvm-rename tool is a batch tool, so you can rename variables in your
downstream repo with the tool. Given that, here is how to rebase your repo to
a commit after the mass renaming:
1. rebase to the commit just before the mass variable renaming,
2. apply the tool to your downstream repo to mass-rename variables locally, and
3. rebase again to the head.
Most changes made by the tool should be identical for a downstream repo and
for the head, so at the step 3, almost all changes should be merged and
disappear. I'd expect that there would be some lines that you need to merge by
hand, but that shouldn't be too many.
Differential Revision: https://reviews.llvm.org/D64121
llvm-svn: 365595
2019-07-10 13:00:37 +08:00
|
|
|
|
2019-07-03 14:11:50 +08:00
|
|
|
auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); };
|
[Coding style change] Rename variables so that they start with a lowercase letter
This patch is mechanically generated by clang-llvm-rename tool that I wrote
using Clang Refactoring Engine just for creating this patch. You can see the
source code of the tool at https://reviews.llvm.org/D64123. There's no manual
post-processing; you can generate the same patch by re-running the tool against
lld's code base.
Here is the main discussion thread to change the LLVM coding style:
https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html
In the discussion thread, I proposed we use lld as a testbed for variable
naming scheme change, and this patch does that.
I chose to rename variables so that they are in camelCase, just because that
is a minimal change to make variables to start with a lowercase letter.
Note to downstream patch maintainers: if you are maintaining a downstream lld
repo, just rebasing ahead of this commit would cause massive merge conflicts
because this patch essentially changes every line in the lld subdirectory. But
there's a remedy.
clang-llvm-rename tool is a batch tool, so you can rename variables in your
downstream repo with the tool. Given that, here is how to rebase your repo to
a commit after the mass renaming:
1. rebase to the commit just before the mass variable renaming,
2. apply the tool to your downstream repo to mass-rename variables locally, and
3. rebase again to the head.
Most changes made by the tool should be identical for a downstream repo and
for the head, so at the step 3, almost all changes should be merged and
disappear. I'd expect that there would be some lines that you need to merge by
hand, but that shouldn't be too many.
Differential Revision: https://reviews.llvm.org/D64121
llvm-svn: 365595
2019-07-10 13:00:37 +08:00
|
|
|
|
2018-04-18 07:30:05 +08:00
|
|
|
if (sym->isUndefined())
|
2019-07-03 14:11:50 +08:00
|
|
|
report(": unable to order undefined symbol: ");
|
2018-04-18 07:30:05 +08:00
|
|
|
else if (sym->isShared())
|
2019-07-03 14:11:50 +08:00
|
|
|
report(": unable to order shared symbol: ");
|
2018-04-18 07:30:05 +08:00
|
|
|
else if (d && !d->section)
|
2019-07-03 14:11:50 +08:00
|
|
|
report(": unable to order absolute symbol: ");
|
2018-04-18 07:30:05 +08:00
|
|
|
else if (d && isa<OutputSection>(d->section))
|
2019-07-03 14:11:50 +08:00
|
|
|
report(": unable to order synthetic symbol: ");
|
2019-05-29 11:55:20 +08:00
|
|
|
else if (d && !d->section->repl->isLive())
|
2019-07-03 14:11:50 +08:00
|
|
|
report(": unable to order discarded symbol: ");
|
2018-04-18 07:30:05 +08:00
|
|
|
}
|
|
|
|
|
2019-11-30 13:58:36 +08:00
|
|
|
// Returns true if a symbol can be replaced at load-time by a symbol
|
|
|
|
// with the same name defined in other ELF executable or DSO.
|
2020-05-15 13:18:58 +08:00
|
|
|
bool elf::computeIsPreemptible(const Symbol &sym) {
|
2019-11-30 13:58:36 +08:00
|
|
|
assert(!sym.isLocal());
|
|
|
|
|
|
|
|
// Only symbols with default visibility that appear in dynsym can be
|
|
|
|
// preempted. Symbols with protected visibility cannot be preempted.
|
|
|
|
if (!sym.includeInDynsym() || sym.visibility != STV_DEFAULT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// At this point copy relocations have not been created yet, so any
|
|
|
|
// symbol that is not defined locally is preemptible.
|
|
|
|
if (!sym.isDefined())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (!config->shared)
|
|
|
|
return false;
|
|
|
|
|
2020-06-02 02:27:53 +08:00
|
|
|
// If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is
|
|
|
|
// specified and the symbol is STT_FUNC, the symbol is preemptible iff it is
|
2021-07-30 05:46:53 +08:00
|
|
|
// in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of
|
|
|
|
// -Bsymbolic-functions.
|
|
|
|
if (config->symbolic ||
|
|
|
|
(config->bsymbolic == BsymbolicKind::Functions && sym.isFunc()) ||
|
|
|
|
(config->bsymbolic == BsymbolicKind::NonWeakFunctions && sym.isFunc() &&
|
|
|
|
sym.binding != STB_WEAK))
|
2019-11-30 13:58:36 +08:00
|
|
|
return sym.inDynamicList;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-05-15 13:18:58 +08:00
|
|
|
void elf::reportBackrefs() {
|
2020-04-06 13:27:46 +08:00
|
|
|
for (auto &it : backwardReferences) {
|
|
|
|
const Symbol &sym = *it.first;
|
2020-11-08 12:17:41 +08:00
|
|
|
std::string to = toString(it.second.second);
|
|
|
|
// Some libraries have known problems and can cause noise. Filter them out
|
|
|
|
// with --warn-backrefs-exclude=. to may look like *.o or *.a(*.o).
|
|
|
|
bool exclude = false;
|
|
|
|
for (const llvm::GlobPattern &pat : config->warnBackrefsExclude)
|
|
|
|
if (pat.match(to)) {
|
|
|
|
exclude = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!exclude)
|
|
|
|
warn("backward reference detected: " + sym.getName() + " in " +
|
|
|
|
toString(it.second.first) + " refers to " + to);
|
2020-04-06 13:27:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-23 17:58:08 +08:00
|
|
|
static uint8_t getMinVisibility(uint8_t va, uint8_t vb) {
|
|
|
|
if (va == STV_DEFAULT)
|
|
|
|
return vb;
|
|
|
|
if (vb == STV_DEFAULT)
|
|
|
|
return va;
|
|
|
|
return std::min(va, vb);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge symbol properties.
|
|
|
|
//
|
|
|
|
// When we have many symbols of the same name, we choose one of them,
|
|
|
|
// and that's the result of symbol resolution. However, symbols that
|
|
|
|
// were not chosen still affect some symbol properties.
|
|
|
|
void Symbol::mergeProperties(const Symbol &other) {
|
|
|
|
if (other.exportDynamic)
|
|
|
|
exportDynamic = true;
|
|
|
|
if (other.isUsedInRegularObj)
|
|
|
|
isUsedInRegularObj = true;
|
|
|
|
|
|
|
|
// DSO symbols do not affect visibility in the output.
|
|
|
|
if (!other.isShared())
|
|
|
|
visibility = getMinVisibility(visibility, other.visibility);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Symbol::resolve(const Symbol &other) {
|
|
|
|
mergeProperties(other);
|
|
|
|
|
|
|
|
if (isPlaceholder()) {
|
|
|
|
replace(other);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (other.kind()) {
|
|
|
|
case Symbol::UndefinedKind:
|
|
|
|
resolveUndefined(cast<Undefined>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::CommonKind:
|
|
|
|
resolveCommon(cast<CommonSymbol>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::DefinedKind:
|
|
|
|
resolveDefined(cast<Defined>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::LazyArchiveKind:
|
|
|
|
resolveLazy(cast<LazyArchive>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::LazyObjectKind:
|
|
|
|
resolveLazy(cast<LazyObject>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::SharedKind:
|
|
|
|
resolveShared(cast<SharedSymbol>(other));
|
|
|
|
break;
|
|
|
|
case Symbol::PlaceholderKind:
|
|
|
|
llvm_unreachable("bad symbol kind");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Symbol::resolveUndefined(const Undefined &other) {
|
|
|
|
// An undefined symbol with non default visibility must be satisfied
|
|
|
|
// in the same DSO.
|
|
|
|
//
|
|
|
|
// If this is a non-weak defined symbol in a discarded section, override the
|
|
|
|
// existing undefined symbol for better error message later.
|
|
|
|
if ((isShared() && other.visibility != STV_DEFAULT) ||
|
|
|
|
(isUndefined() && other.binding != STB_WEAK && other.discardedSecIdx)) {
|
|
|
|
replace(other);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-05-24 21:29:17 +08:00
|
|
|
if (traced)
|
|
|
|
printTraceSymbol(&other);
|
|
|
|
|
2019-07-04 18:38:04 +08:00
|
|
|
if (isLazy()) {
|
2021-11-27 02:58:50 +08:00
|
|
|
// An undefined weak will not extract archive members. See comment on Lazy
|
|
|
|
// in Symbols.h for the details.
|
2019-05-23 17:58:08 +08:00
|
|
|
if (other.binding == STB_WEAK) {
|
2019-07-02 19:37:21 +08:00
|
|
|
binding = STB_WEAK;
|
2019-05-23 17:58:08 +08:00
|
|
|
type = other.type;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do extra check for --warn-backrefs.
|
|
|
|
//
|
|
|
|
// --warn-backrefs is an option to prevent an undefined reference from
|
2021-11-27 02:58:50 +08:00
|
|
|
// extracting an archive member written earlier in the command line. It can
|
|
|
|
// be used to keep compatibility with GNU linkers to some degree. I'll
|
|
|
|
// explain the feature and why you may find it useful in this comment.
|
2019-05-23 17:58:08 +08:00
|
|
|
//
|
|
|
|
// lld's symbol resolution semantics is more relaxed than traditional Unix
|
|
|
|
// linkers. For example,
|
|
|
|
//
|
|
|
|
// ld.lld foo.a bar.o
|
|
|
|
//
|
|
|
|
// succeeds even if bar.o contains an undefined symbol that has to be
|
|
|
|
// resolved by some object file in foo.a. Traditional Unix linkers don't
|
|
|
|
// allow this kind of backward reference, as they visit each file only once
|
|
|
|
// from left to right in the command line while resolving all undefined
|
|
|
|
// symbols at the moment of visiting.
|
|
|
|
//
|
|
|
|
// In the above case, since there's no undefined symbol when a linker visits
|
|
|
|
// foo.a, no files are pulled out from foo.a, and because the linker forgets
|
|
|
|
// about foo.a after visiting, it can't resolve undefined symbols in bar.o
|
|
|
|
// that could have been resolved otherwise.
|
|
|
|
//
|
|
|
|
// That lld accepts more relaxed form means that (besides it'd make more
|
|
|
|
// sense) you can accidentally write a command line or a build file that
|
|
|
|
// works only with lld, even if you have a plan to distribute it to wider
|
|
|
|
// users who may be using GNU linkers. With --warn-backrefs, you can detect
|
|
|
|
// a library order that doesn't work with other Unix linkers.
|
|
|
|
//
|
|
|
|
// The option is also useful to detect cyclic dependencies between static
|
|
|
|
// archives. Again, lld accepts
|
|
|
|
//
|
|
|
|
// ld.lld foo.a bar.a
|
|
|
|
//
|
|
|
|
// even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
|
|
|
|
// handled as an error.
|
|
|
|
//
|
|
|
|
// Here is how the option works. We assign a group ID to each file. A file
|
|
|
|
// with a smaller group ID can pull out object files from an archive file
|
|
|
|
// with an equal or greater group ID. Otherwise, it is a reverse dependency
|
|
|
|
// and an error.
|
|
|
|
//
|
|
|
|
// A file outside --{start,end}-group gets a fresh ID when instantiated. All
|
|
|
|
// files within the same --{start,end}-group get the same group ID. E.g.
|
|
|
|
//
|
|
|
|
// ld.lld A B --start-group C D --end-group E
|
|
|
|
//
|
|
|
|
// A forms group 0. B form group 1. C and D (including their member object
|
|
|
|
// files) form group 2. E forms group 3. I think that you can see how this
|
|
|
|
// group assignment rule simulates the traditional linker's semantics.
|
|
|
|
bool backref = config->warnBackrefs && other.file &&
|
|
|
|
file->groupId < other.file->groupId;
|
2021-11-27 02:58:50 +08:00
|
|
|
extract();
|
2019-05-23 17:58:08 +08:00
|
|
|
|
2021-09-21 00:52:30 +08:00
|
|
|
if (!config->whyExtract.empty())
|
|
|
|
recordWhyExtract(other.file, *file, *this);
|
|
|
|
|
2019-05-23 17:58:08 +08:00
|
|
|
// We don't report backward references to weak symbols as they can be
|
|
|
|
// overridden later.
|
2020-04-06 13:27:46 +08:00
|
|
|
//
|
|
|
|
// A traditional linker does not error for -ldef1 -lref -ldef2 (linking
|
|
|
|
// sandwich), where def2 may or may not be the same as def1. We don't want
|
|
|
|
// to warn for this case, so dismiss the warning if we see a subsequent lazy
|
2020-10-23 06:26:52 +08:00
|
|
|
// definition. this->file needs to be saved because in the case of LTO it
|
|
|
|
// may be reset to nullptr or be replaced with a file named lto.tmp.
|
2019-05-23 17:58:08 +08:00
|
|
|
if (backref && !isWeak())
|
2020-10-23 06:26:52 +08:00
|
|
|
backwardReferences.try_emplace(this, std::make_pair(other.file, file));
|
2019-07-04 18:38:04 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Undefined symbols in a SharedFile do not change the binding.
|
|
|
|
if (dyn_cast_or_null<SharedFile>(other.file))
|
|
|
|
return;
|
|
|
|
|
[ELF] Make binding (weak or non-weak) logic consistent for Undefined and SharedSymbol
This is a case missed by D64136. If %t1.o has a weak reference on foo,
and %t2.so has a non-weak reference on foo:
```
0. ld.lld %t1.o %t2.so # ok; STB_WEAK; accepted since D64136
1. ld.lld %t2.so %t1.o # undefined symbol: foo; STB_GLOBAL
2. gold %t1.o %t2.so # ok; STB_WEAK
3. gold %t2.so %t1.o # undefined reference to 'foo'; STB_GLOBAL
4. ld.bfd %t1.o %t2.so # undefined reference to `foo'; STB_WEAK
5. ld.bfd %t2.so %t1.o # undefined reference to `foo'; STB_WEAK
```
It can be argued that in both cases, the binding of the undefined foo
should be set to STB_WEAK, because the binding should not be affected by
referenced from shared objects.
--allow-shlib-undefined doesn't suppress errors (3,4,5), but -shared or
--noinhibit-exec allows ld.bfd/gold to produce a binary:
```
3. gold -shared %t2.so %t1.o # ok; STB_GLOBAL
4. ld.bfd -shared %t2.so %t1.o # ok; STB_WEAK
5. ld.bfd -shared %t1.o %t1.o # ok; STB_WEAK
```
If %t2.so has DT_NEEDED entries, ld.bfd will load them (lld/gold don't
have the behavior). If one of the DSO defines foo and it is in the
link-time search path (e.g. DT_NEEDED entry is an absolute path, via
-rpath=, via -rpath-link=, etc),
`ld.bfd %t1.o %t2.so` and `ld.bfd %t1.o %t2.so` will not error.
In this patch, we make Undefined and SharedSymbol share the same binding
computing logic. Case 1 will be allowed:
```
0. ld.lld %t1.o %t2.so # ok; STB_WEAK; accepted since D64136
1. ld.lld %t2.so %t1.o # ok; STB_WEAK; changed by this patch
```
In the future, we can explore the option that turns both (0,1) into
errors if --no-allow-shlib-undefined (default when linking an
executable) is in action.
Reviewed By: ruiu
Differential Revision: https://reviews.llvm.org/D65584
llvm-svn: 368038
2019-08-06 22:03:45 +08:00
|
|
|
if (isUndefined() || isShared()) {
|
|
|
|
// The binding will be weak if there is at least one reference and all are
|
|
|
|
// weak. The binding has one opportunity to change to weak: if the first
|
|
|
|
// reference is weak.
|
|
|
|
if (other.binding != STB_WEAK || !referenced)
|
2019-07-04 18:38:04 +08:00
|
|
|
binding = other.binding;
|
2019-05-23 17:58:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare two symbols. Return 1 if the new symbol should win, -1 if
|
|
|
|
// the new symbol should lose, or 0 if there is a conflict.
|
|
|
|
int Symbol::compare(const Symbol *other) const {
|
|
|
|
assert(other->isDefined() || other->isCommon());
|
|
|
|
|
|
|
|
if (!isDefined() && !isCommon())
|
|
|
|
return 1;
|
|
|
|
|
2021-12-16 07:19:35 +08:00
|
|
|
// .symver foo,foo@@VER unfortunately creates two defined symbols: foo and
|
|
|
|
// foo@@VER. In GNU ld, if foo and foo@@VER are in the same file, foo is
|
|
|
|
// ignored. In our implementation, when this is foo, this->getName() may still
|
|
|
|
// contain @@, return 1 in this case as well.
|
|
|
|
if (file == other->file) {
|
|
|
|
if (other->getName().contains("@@"))
|
|
|
|
return 1;
|
|
|
|
if (getName().contains("@@"))
|
|
|
|
return -1;
|
|
|
|
}
|
2019-05-23 17:58:08 +08:00
|
|
|
|
|
|
|
if (other->isWeak())
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (isWeak())
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (isCommon() && other->isCommon()) {
|
|
|
|
if (config->warnCommon)
|
|
|
|
warn("multiple common of " + getName());
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isCommon()) {
|
|
|
|
if (config->warnCommon)
|
|
|
|
warn("common " + getName() + " is overridden");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (other->isCommon()) {
|
|
|
|
if (config->warnCommon)
|
|
|
|
warn("common " + getName() + " is overridden");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto *oldSym = cast<Defined>(this);
|
|
|
|
auto *newSym = cast<Defined>(other);
|
|
|
|
|
2019-07-27 00:29:15 +08:00
|
|
|
if (dyn_cast_or_null<BitcodeFile>(other->file))
|
2019-05-23 17:58:08 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!oldSym->section && !newSym->section && oldSym->value == newSym->value &&
|
|
|
|
newSym->binding == STB_GLOBAL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reportDuplicate(Symbol *sym, InputFile *newFile,
|
|
|
|
InputSectionBase *errSec, uint64_t errOffset) {
|
|
|
|
if (config->allowMultipleDefinition)
|
|
|
|
return;
|
|
|
|
|
|
|
|
Defined *d = cast<Defined>(sym);
|
|
|
|
if (!d->section || !errSec) {
|
|
|
|
error("duplicate symbol: " + toString(*sym) + "\n>>> defined in " +
|
|
|
|
toString(sym->file) + "\n>>> defined in " + toString(newFile));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct and print an error message in the form of:
|
|
|
|
//
|
|
|
|
// ld.lld: error: duplicate symbol: foo
|
|
|
|
// >>> defined at bar.c:30
|
|
|
|
// >>> bar.o (/home/alice/src/bar.o)
|
|
|
|
// >>> defined at baz.c:563
|
|
|
|
// >>> baz.o in archive libbaz.a
|
|
|
|
auto *sec1 = cast<InputSectionBase>(d->section);
|
|
|
|
std::string src1 = sec1->getSrcMsg(*sym, d->value);
|
|
|
|
std::string obj1 = sec1->getObjMsg(d->value);
|
|
|
|
std::string src2 = errSec->getSrcMsg(*sym, errOffset);
|
|
|
|
std::string obj2 = errSec->getObjMsg(errOffset);
|
[Coding style change] Rename variables so that they start with a lowercase letter
This patch is mechanically generated by clang-llvm-rename tool that I wrote
using Clang Refactoring Engine just for creating this patch. You can see the
source code of the tool at https://reviews.llvm.org/D64123. There's no manual
post-processing; you can generate the same patch by re-running the tool against
lld's code base.
Here is the main discussion thread to change the LLVM coding style:
https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html
In the discussion thread, I proposed we use lld as a testbed for variable
naming scheme change, and this patch does that.
I chose to rename variables so that they are in camelCase, just because that
is a minimal change to make variables to start with a lowercase letter.
Note to downstream patch maintainers: if you are maintaining a downstream lld
repo, just rebasing ahead of this commit would cause massive merge conflicts
because this patch essentially changes every line in the lld subdirectory. But
there's a remedy.
clang-llvm-rename tool is a batch tool, so you can rename variables in your
downstream repo with the tool. Given that, here is how to rebase your repo to
a commit after the mass renaming:
1. rebase to the commit just before the mass variable renaming,
2. apply the tool to your downstream repo to mass-rename variables locally, and
3. rebase again to the head.
Most changes made by the tool should be identical for a downstream repo and
for the head, so at the step 3, almost all changes should be merged and
disappear. I'd expect that there would be some lines that you need to merge by
hand, but that shouldn't be too many.
Differential Revision: https://reviews.llvm.org/D64121
llvm-svn: 365595
2019-07-10 13:00:37 +08:00
|
|
|
|
2019-05-23 17:58:08 +08:00
|
|
|
std::string msg = "duplicate symbol: " + toString(*sym) + "\n>>> defined at ";
|
|
|
|
if (!src1.empty())
|
|
|
|
msg += src1 + "\n>>> ";
|
|
|
|
msg += obj1 + "\n>>> defined at ";
|
|
|
|
if (!src2.empty())
|
|
|
|
msg += src2 + "\n>>> ";
|
|
|
|
msg += obj2;
|
|
|
|
error(msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Symbol::resolveCommon(const CommonSymbol &other) {
|
|
|
|
int cmp = compare(&other);
|
|
|
|
if (cmp < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (cmp > 0) {
|
2019-12-07 13:18:31 +08:00
|
|
|
if (auto *s = dyn_cast<SharedSymbol>(this)) {
|
|
|
|
// Increase st_size if the shared symbol has a larger st_size. The shared
|
|
|
|
// symbol may be created from common symbols. The fact that some object
|
|
|
|
// files were linked into a shared object first should not change the
|
|
|
|
// regular rule that picks the largest st_size.
|
|
|
|
uint64_t size = s->size;
|
|
|
|
replace(other);
|
|
|
|
if (size > cast<CommonSymbol>(this)->size)
|
|
|
|
cast<CommonSymbol>(this)->size = size;
|
|
|
|
} else {
|
|
|
|
replace(other);
|
|
|
|
}
|
2019-05-23 17:58:08 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
CommonSymbol *oldSym = cast<CommonSymbol>(this);
|
|
|
|
|
|
|
|
oldSym->alignment = std::max(oldSym->alignment, other.alignment);
|
|
|
|
if (oldSym->size < other.size) {
|
|
|
|
oldSym->file = other.file;
|
|
|
|
oldSym->size = other.size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Symbol::resolveDefined(const Defined &other) {
|
|
|
|
int cmp = compare(&other);
|
|
|
|
if (cmp > 0)
|
|
|
|
replace(other);
|
|
|
|
else if (cmp == 0)
|
|
|
|
reportDuplicate(this, other.file,
|
|
|
|
dyn_cast_or_null<InputSectionBase>(other.section),
|
|
|
|
other.value);
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:28:17 +08:00
|
|
|
template <class LazyT>
|
|
|
|
static void replaceCommon(Symbol &oldSym, const LazyT &newSym) {
|
|
|
|
backwardReferences.erase(&oldSym);
|
|
|
|
oldSym.replace(newSym);
|
2021-11-27 02:58:50 +08:00
|
|
|
newSym.extract();
|
2020-12-07 22:28:17 +08:00
|
|
|
}
|
|
|
|
|
2019-05-23 17:58:08 +08:00
|
|
|
template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
|
2020-12-07 22:28:17 +08:00
|
|
|
// For common objects, we want to look for global or weak definitions that
|
2021-11-27 02:58:50 +08:00
|
|
|
// should be extracted as the canonical definition instead.
|
2020-12-07 22:28:17 +08:00
|
|
|
if (isCommon() && elf::config->fortranCommon) {
|
|
|
|
if (auto *laSym = dyn_cast<LazyArchive>(&other)) {
|
|
|
|
ArchiveFile *archive = cast<ArchiveFile>(laSym->file);
|
|
|
|
const Archive::Symbol &archiveSym = laSym->sym;
|
2021-11-27 02:58:50 +08:00
|
|
|
if (archive->shouldExtractForCommon(archiveSym)) {
|
2020-12-07 22:28:17 +08:00
|
|
|
replaceCommon(*this, other);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
|
|
|
|
LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
|
2021-11-27 02:58:50 +08:00
|
|
|
if (obj->shouldExtractForCommon(loSym->getName())) {
|
2020-12-07 22:28:17 +08:00
|
|
|
replaceCommon(*this, other);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-06 13:27:46 +08:00
|
|
|
if (!isUndefined()) {
|
|
|
|
// See the comment in resolveUndefined().
|
|
|
|
if (isDefined())
|
|
|
|
backwardReferences.erase(this);
|
2019-05-23 17:58:08 +08:00
|
|
|
return;
|
2020-04-06 13:27:46 +08:00
|
|
|
}
|
2019-05-23 17:58:08 +08:00
|
|
|
|
2021-11-27 02:58:50 +08:00
|
|
|
// An undefined weak will not extract archive members. See comment on Lazy in
|
2019-05-23 17:58:08 +08:00
|
|
|
// Symbols.h for the details.
|
|
|
|
if (isWeak()) {
|
|
|
|
uint8_t ty = type;
|
|
|
|
replace(other);
|
|
|
|
type = ty;
|
|
|
|
binding = STB_WEAK;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-09-21 00:52:30 +08:00
|
|
|
const InputFile *oldFile = file;
|
2021-11-27 02:58:50 +08:00
|
|
|
other.extract();
|
2021-09-21 00:52:30 +08:00
|
|
|
if (!config->whyExtract.empty())
|
|
|
|
recordWhyExtract(oldFile, *file, *this);
|
2019-05-23 17:58:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Symbol::resolveShared(const SharedSymbol &other) {
|
2019-12-07 13:18:31 +08:00
|
|
|
if (isCommon()) {
|
|
|
|
// See the comment in resolveCommon() above.
|
|
|
|
if (other.size > cast<CommonSymbol>(this)->size)
|
|
|
|
cast<CommonSymbol>(this)->size = other.size;
|
|
|
|
return;
|
|
|
|
}
|
2019-05-23 17:58:08 +08:00
|
|
|
if (visibility == STV_DEFAULT && (isUndefined() || isLazy())) {
|
|
|
|
// An undefined symbol with non default visibility must be satisfied
|
|
|
|
// in the same DSO.
|
|
|
|
uint8_t bind = binding;
|
|
|
|
replace(other);
|
|
|
|
binding = bind;
|
2020-05-19 01:15:59 +08:00
|
|
|
} else if (traced)
|
|
|
|
printTraceSymbol(&other);
|
2019-05-23 17:58:08 +08:00
|
|
|
}
|