[lld-macho][reland] Add support for N_INDR symbols

This is similar to the `-alias` CLI option, but it gives finer-grained
control in that it allows the aliased symbols to be treated as private
externs.

While working on this, I realized that our `-alias` handling did not
cover the cases where the aliased symbol is a common or dylib symbol,
nor the case where we have an undefined that gets treated specially and
converted to a defined later on. My N_INDR handling neglects this too
for now; I've added checks and TODO messages for these.

`N_INDR` symbols cropped up as part of our attempt to link swift-stdlib.

Reviewed By: #lld-macho, thakis, thevinster

Differential Revision: https://reviews.llvm.org/D133825
This commit is contained in:
Jez Ng 2022-09-15 22:55:41 -04:00
parent 83a0ac12f5
commit d515575714
9 changed files with 228 additions and 21 deletions

View File

@ -1200,13 +1200,40 @@ static void createAliases() {
for (const auto &pair : config->aliasedSymbols) {
if (const auto &sym = symtab->find(pair.first)) {
if (const auto &defined = dyn_cast<Defined>(sym)) {
symtab->aliasDefined(defined, pair.second);
continue;
symtab->aliasDefined(defined, pair.second, defined->getFile());
} else {
error("TODO: support aliasing to symbols of kind " +
Twine(sym->kind()));
}
} else {
warn("undefined base symbol '" + pair.first + "' for alias '" +
pair.second + "'\n");
}
}
for (const InputFile *file : inputFiles) {
if (auto *objFile = dyn_cast<ObjFile>(file)) {
for (const AliasSymbol *alias : objFile->aliases) {
if (const auto &aliased = symtab->find(alias->getAliasedName())) {
if (const auto &defined = dyn_cast<Defined>(aliased)) {
symtab->aliasDefined(defined, alias->getName(), alias->getFile(),
alias->privateExtern);
} else {
// Common, dylib, and undefined symbols are all valid alias
// referents (undefineds can become valid Defined symbols later on
// in the link.)
error("TODO: support aliasing to symbols of kind " +
Twine(aliased->kind()));
}
} else {
// This shouldn't happen since MC generates undefined symbols to
// represent the alias referents. Thus we fatal() instead of just
// warning here.
fatal("unable to find alias referent " + alias->getAliasedName() +
" for " + alias->getName());
}
}
}
warn("undefined base symbol '" + pair.first + "' for alias '" +
pair.second + "'\n");
}
}

View File

@ -872,7 +872,8 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
template <class NList>
macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
StringRef name) {
const char *strtab) {
StringRef name = StringRef(strtab + sym.n_strx);
uint8_t type = sym.n_type & N_TYPE;
bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden;
switch (type) {
@ -884,9 +885,20 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
isPrivateExtern);
case N_ABS:
return createAbsolute(sym, this, name, forceHidden);
case N_INDR: {
// Not much point in making local aliases -- relocs in the current file can
// just refer to the actual symbol itself. ld64 ignores these symbols too.
if (!(sym.n_type & N_EXT))
return nullptr;
StringRef aliasedName = StringRef(strtab + sym.n_value);
// isPrivateExtern is the only symbol flag that has an impact on the final
// aliased symbol.
auto alias = make<AliasSymbol>(this, name, aliasedName, isPrivateExtern);
aliases.push_back(alias);
return alias;
}
case N_PBUD:
case N_INDR:
error("TODO: support symbols of type " + std::to_string(type));
error("TODO: support symbols of type N_PBUD");
return nullptr;
case N_SECT:
llvm_unreachable(
@ -927,7 +939,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
} else if (isUndef(sym)) {
undefineds.push_back(i);
} else {
symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx));
symbols[i] = parseNonSectionSymbol(sym, strtab);
}
}
@ -1026,11 +1038,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// symbol resolution behavior. In addition, a set of interconnected symbols
// will all be resolved to the same file, instead of being resolved to
// different files.
for (unsigned i : undefineds) {
const NList &sym = nList[i];
StringRef name = strtab + sym.n_strx;
symbols[i] = parseNonSectionSymbol(sym, name);
}
for (unsigned i : undefineds)
symbols[i] = parseNonSectionSymbol(nList[i], strtab);
}
OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,

View File

@ -44,6 +44,7 @@ struct PlatformInfo;
class ConcatInputSection;
class Symbol;
class Defined;
class AliasSymbol;
struct Reloc;
enum class RefState : uint8_t;
@ -176,6 +177,7 @@ public:
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
std::vector<OptimizationHint> optimizationHints;
std::vector<AliasSymbol *> aliases;
private:
llvm::once_flag initDwarf;
@ -186,7 +188,7 @@ private:
ArrayRef<typename LP::nlist> nList, const char *strtab,
bool subsectionsViaSymbols);
template <class NList>
Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
template <class SectionHeader>
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);

View File

@ -54,8 +54,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
bool overridesWeakDef = false;
auto [s, wasInserted] = insert(name, file);
assert(!isWeakDef || (isa<BitcodeFile>(file) && !isec) ||
(isa<ObjFile>(file) && file == isec->getFile()));
assert(!file || !isa<BitcodeFile>(file) || !isec);
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
@ -115,9 +114,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
return defined;
}
Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) {
return addDefined(target, src->getFile(), src->isec, src->value, src->size,
src->isWeakDef(), src->privateExtern, src->thumb,
Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
InputFile *newFile, bool makePrivateExtern) {
bool isPrivateExtern = makePrivateExtern || src->privateExtern;
return addDefined(target, newFile, src->isec, src->value, src->size,
src->isWeakDef(), isPrivateExtern, src->thumb,
src->referencedDynamically, src->noDeadStrip,
src->weakDefCanBeHidden);
}

View File

@ -42,7 +42,8 @@ public:
bool isReferencedDynamically, bool noDeadStrip,
bool isWeakDefCanBeHidden);
Defined *aliasDefined(Defined *src, StringRef target);
Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile,
bool makePrivateExtern = false);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);

View File

@ -39,6 +39,7 @@ public:
DylibKind,
LazyArchiveKind,
LazyObjectKind,
AliasKind,
};
virtual ~Symbol() {}
@ -321,6 +322,26 @@ public:
static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
};
// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
// then all AliasSymbol instances will be converted into one of the other Symbol
// types after `createAliases()` runs.
class AliasSymbol final : public Symbol {
public:
AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
bool isPrivateExtern)
: Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
aliasedName(aliasedName) {}
StringRef getAliasedName() const { return aliasedName; }
static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
const bool privateExtern;
private:
StringRef aliasedName;
};
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
@ -328,6 +349,7 @@ union SymbolUnion {
alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
alignas(LazyArchive) char e[sizeof(LazyArchive)];
alignas(LazyObject) char f[sizeof(LazyObject)];
alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
};
template <typename T, typename... ArgT>

View File

@ -37,3 +37,12 @@ archives.
symbol" error.
- LLD: Duplicate symbols, regardless of which archives they are from, will
raise errors.
Aliases
=======
ld64 treats all aliases as strong extern definitions. Having two aliases of the
same name, or an alias plus a regular extern symbol of the same name, both
result in duplicate symbol errors. LLD does not check for duplicate aliases;
instead we perform alias resolution first, and only then do we check for
duplicate symbols. In particular, we will not report a duplicate symbol error if
the aliased symbols turn out to be weak definitions, but ld64 will.

View File

@ -0,0 +1,136 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/aliases.s -o %t/aliases.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/definitions.s -o %t/definitions.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-weak.s -o %t/weak-extern-alias-to-weak.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-strong.s -o %t/weak-extern-alias-to-strong.o
# RUN: %lld -lSystem %t/aliases.o %t/definitions.o -o %t/out
# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s
## local aliases should be dropped entirely. --implicit-check-not doesn't seem
## to work well with -DAG matches, so we check for _local_alias' absence in a
## separate step.
# RUN: llvm-objdump --macho --syms %t/out | FileCheck /dev/null --implicit-check-not _local_alias
# CHECK-DAG: [[#%.16x,STRONG:]] g F __TEXT,__text _strong
# CHECK-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1
# CHECK-DAG: [[#%.16x,PEXT:]] l F __TEXT,__text _pext
# CHECK-DAG: [[#%.16x,DEAD:]] g F __TEXT,__text _dead
# CHECK-DAG: [[#STRONG]] l F __TEXT,__text _pext_alias
# CHECK-DAG: [[#PEXT]] l F __TEXT,__text _alias_to_pext
# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _extern_alias_to_strong
# CHECK-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak
# CHECK-DAG: [[#DEAD]] g F __TEXT,__text _no_dead_strip_alias
# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _weak_extern_alias_to_strong
# RUN: %lld -lSystem -dead_strip %t/aliases.o %t/definitions.o -o %t/dead-stripped
# RUN: llvm-objdump --macho --syms %t/dead-stripped | FileCheck %s --check-prefix=STRIPPED
# STRIPPED: SYMBOL TABLE:
# STRIPPED-NEXT: g F __TEXT,__text _main
# STRIPPED-NEXT: g F __TEXT,__text __mh_execute_header
# STRIPPED-NEXT: *UND* dyld_stub_binder
# STRIPPED-EMPTY:
# RUN: not %lld -lSystem %t/aliases.o %t/definitions.o \
# RUN: %t/weak-extern-alias-to-strong.o -o /dev/null 2>&1
## Verify that we preserve the file names of the aliases, rather than using the
## filename of the aliased symbols.
# DUP: error: duplicate symbol: _weak_extern_alias_to_weak
# DUP-NEXT: >>> defined in {{.*}}aliases.o
# DUP-NEXT: >>> defined in {{.*}}weak-extern-alias-to-weak.o
## The following cases are actually all dup symbol errors under ld64. Alias
## symbols are treated like strong extern symbols by ld64 even if the symbol they alias
## is actually weak. LLD OTOH does not check for dup symbols until after
## resolving the aliases; this makes for a simpler implementation.
## The following test cases are meant to elucidate what LLD's behavior is, but
## we should feel free to change it in the future should it be helpful for the
## implementation.
# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
# RUN: %t/weak-extern-alias-to-weak.o -o %t/alias-clash-1
# RUN: llvm-objdump --macho --syms %t/alias-clash-1 | FileCheck %s --check-prefix WEAK-1
# RUN: %lld -lSystem %t/weak-extern-alias-to-weak.o %t/aliases.o \
# RUN: %t/definitions.o -o %t/alias-clash-2
# RUN: llvm-objdump --macho --syms %t/alias-clash-2 | FileCheck %s --check-prefix WEAK-2
# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \
# RUN: -alias _weak_2 _weak_extern_alias_to_weak -o %t/opt-vs-symbol
# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
# RUN: %lld -lSystem -alias _weak_2 _weak_extern_alias_to_weak %t/aliases.o \
# RUN: %t/definitions.o -o %t/opt-vs-symbol
# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2
# WEAK-1-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1
# WEAK-1-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak
# WEAK-2-DAG: [[#%.16x,WEAK_2:]] w F __TEXT,__text _weak_2
# WEAK-2-DAG: [[#WEAK_2]] w F __TEXT,__text _weak_extern_alias_to_weak
#--- aliases.s
.globl _extern_alias_to_strong, _weak_extern_alias_to_weak
.weak_definition _weak_extern_alias_to_weak
## Private extern aliases result in local symbols in the output (i.e. it is as
## if the aliased symbol is also private extern.)
.private_extern _pext_alias
## This test case demonstrates that it doesn't matter whether the alias itself
## is strong or weak. Rather, what matters is whether the aliased symbol is
## strong or weak.
.globl _weak_extern_alias_to_strong
.weak_definition _weak_extern_alias_to_strong
## no_dead_strip doesn't retain the aliased symbol if it is dead
.globl _no_dead_strip_alias
.no_dead_strip _no_dead_strip_alias
.globl _alias_to_pext
_alias_to_pext = _pext
_extern_alias_to_strong = _strong
_weak_extern_alias_to_weak = _weak_1
_weak_extern_alias_to_strong = _strong
_pext_alias = _strong
_local_alias = _strong
_no_dead_strip_alias = _dead
.subsections_via_symbols
#--- weak-extern-alias-to-weak.s
.globl _weak_extern_alias_to_weak
.weak_definition _weak_extern_alias_to_weak
_weak_extern_alias_to_weak = _weak_2
#--- weak-extern-alias-to-strong.s
.globl _weak_extern_alias_to_strong
.weak_definition _weak_extern_alias_to_strong
_weak_extern_alias_to_strong = _strong
#--- definitions.s
.globl _strong, _weak_1, _weak_2, _dead
.private_extern _pext
.weak_definition _weak_1
.weak_definition _weak_2
_strong:
.space 1
_weak_1:
.space 1
_weak_2:
.space 1
_dead:
.space 1
_pext:
.space 1
.globl _main
_main:
.subsections_via_symbols