From d515575714b60b2f2b01fcf3cfc6a380b44a6cee Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Thu, 15 Sep 2022 22:55:41 -0400 Subject: [PATCH] [lld-macho][reland] Add support for N_INDR symbols This is similar to the `-alias` CLI option, but it gives finer-grained control in that it allows the aliased symbols to be treated as private externs. While working on this, I realized that our `-alias` handling did not cover the cases where the aliased symbol is a common or dylib symbol, nor the case where we have an undefined that gets treated specially and converted to a defined later on. My N_INDR handling neglects this too for now; I've added checks and TODO messages for these. `N_INDR` symbols cropped up as part of our attempt to link swift-stdlib. Reviewed By: #lld-macho, thakis, thevinster Differential Revision: https://reviews.llvm.org/D133825 --- lld/MachO/Driver.cpp | 37 ++++- lld/MachO/InputFiles.cpp | 27 ++-- lld/MachO/InputFiles.h | 4 +- lld/MachO/SymbolTable.cpp | 11 +- lld/MachO/SymbolTable.h | 3 +- lld/MachO/Symbols.h | 22 +++ lld/docs/MachO/ld64-vs-lld.rst | 9 ++ lld/test/MachO/{aliases.s => alias-option.s} | 0 lld/test/MachO/alias-symbols.s | 136 +++++++++++++++++++ 9 files changed, 228 insertions(+), 21 deletions(-) rename lld/test/MachO/{aliases.s => alias-option.s} (100%) create mode 100644 lld/test/MachO/alias-symbols.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 533273e73bb9..6bec1086f915 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1200,13 +1200,40 @@ static void createAliases() { for (const auto &pair : config->aliasedSymbols) { if (const auto &sym = symtab->find(pair.first)) { if (const auto &defined = dyn_cast(sym)) { - symtab->aliasDefined(defined, pair.second); - continue; + symtab->aliasDefined(defined, pair.second, defined->getFile()); + } else { + error("TODO: support aliasing to symbols of kind " + + Twine(sym->kind())); + } + } else { + warn("undefined base symbol '" + pair.first + "' for alias '" + + pair.second + "'\n"); + } + } + + for (const InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast(file)) { + for (const AliasSymbol *alias : objFile->aliases) { + if (const auto &aliased = symtab->find(alias->getAliasedName())) { + if (const auto &defined = dyn_cast(aliased)) { + symtab->aliasDefined(defined, alias->getName(), alias->getFile(), + alias->privateExtern); + } else { + // Common, dylib, and undefined symbols are all valid alias + // referents (undefineds can become valid Defined symbols later on + // in the link.) + error("TODO: support aliasing to symbols of kind " + + Twine(aliased->kind())); + } + } else { + // This shouldn't happen since MC generates undefined symbols to + // represent the alias referents. Thus we fatal() instead of just + // warning here. + fatal("unable to find alias referent " + alias->getAliasedName() + + " for " + alias->getName()); + } } } - - warn("undefined base symbol '" + pair.first + "' for alias '" + - pair.second + "'\n"); } } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index f68985f99b47..588b87a2927f 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -872,7 +872,8 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file, template macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym, - StringRef name) { + const char *strtab) { + StringRef name = StringRef(strtab + sym.n_strx); uint8_t type = sym.n_type & N_TYPE; bool isPrivateExtern = sym.n_type & N_PEXT || forceHidden; switch (type) { @@ -884,9 +885,20 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym, isPrivateExtern); case N_ABS: return createAbsolute(sym, this, name, forceHidden); + case N_INDR: { + // Not much point in making local aliases -- relocs in the current file can + // just refer to the actual symbol itself. ld64 ignores these symbols too. + if (!(sym.n_type & N_EXT)) + return nullptr; + StringRef aliasedName = StringRef(strtab + sym.n_value); + // isPrivateExtern is the only symbol flag that has an impact on the final + // aliased symbol. + auto alias = make(this, name, aliasedName, isPrivateExtern); + aliases.push_back(alias); + return alias; + } case N_PBUD: - case N_INDR: - error("TODO: support symbols of type " + std::to_string(type)); + error("TODO: support symbols of type N_PBUD"); return nullptr; case N_SECT: llvm_unreachable( @@ -927,7 +939,7 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, } else if (isUndef(sym)) { undefineds.push_back(i); } else { - symbols[i] = parseNonSectionSymbol(sym, StringRef(strtab + sym.n_strx)); + symbols[i] = parseNonSectionSymbol(sym, strtab); } } @@ -1026,11 +1038,8 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, // symbol resolution behavior. In addition, a set of interconnected symbols // will all be resolved to the same file, instead of being resolved to // different files. - for (unsigned i : undefineds) { - const NList &sym = nList[i]; - StringRef name = strtab + sym.n_strx; - symbols[i] = parseNonSectionSymbol(sym, name); - } + for (unsigned i : undefineds) + symbols[i] = parseNonSectionSymbol(nList[i], strtab); } OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 89172922a0a7..1b454f98932a 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -44,6 +44,7 @@ struct PlatformInfo; class ConcatInputSection; class Symbol; class Defined; +class AliasSymbol; struct Reloc; enum class RefState : uint8_t; @@ -176,6 +177,7 @@ public: std::vector callGraph; llvm::DenseMap fdes; std::vector optimizationHints; + std::vector aliases; private: llvm::once_flag initDwarf; @@ -186,7 +188,7 @@ private: ArrayRef nList, const char *strtab, bool subsectionsViaSymbols); template - Symbol *parseNonSectionSymbol(const NList &sym, StringRef name); + Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab); template void parseRelocations(ArrayRef sectionHeaders, const SectionHeader &, Section &); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index de4af3c8a945..6b0a8f4442b3 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -54,8 +54,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, bool overridesWeakDef = false; auto [s, wasInserted] = insert(name, file); - assert(!isWeakDef || (isa(file) && !isec) || - (isa(file) && file == isec->getFile())); + assert(!file || !isa(file) || !isec); if (!wasInserted) { if (auto *defined = dyn_cast(s)) { @@ -115,9 +114,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, return defined; } -Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) { - return addDefined(target, src->getFile(), src->isec, src->value, src->size, - src->isWeakDef(), src->privateExtern, src->thumb, +Defined *SymbolTable::aliasDefined(Defined *src, StringRef target, + InputFile *newFile, bool makePrivateExtern) { + bool isPrivateExtern = makePrivateExtern || src->privateExtern; + return addDefined(target, newFile, src->isec, src->value, src->size, + src->isWeakDef(), isPrivateExtern, src->thumb, src->referencedDynamically, src->noDeadStrip, src->weakDefCanBeHidden); } diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index a393f9c91808..d90245c7aaa0 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -42,7 +42,8 @@ public: bool isReferencedDynamically, bool noDeadStrip, bool isWeakDefCanBeHidden); - Defined *aliasDefined(Defined *src, StringRef target); + Defined *aliasDefined(Defined *src, StringRef target, InputFile *newFile, + bool makePrivateExtern = false); Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef); diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 9d3b56a7ae26..6773170e028a 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -39,6 +39,7 @@ public: DylibKind, LazyArchiveKind, LazyObjectKind, + AliasKind, }; virtual ~Symbol() {} @@ -321,6 +322,26 @@ public: static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } }; +// Represents N_INDR symbols. Note that if we are given valid, linkable inputs, +// then all AliasSymbol instances will be converted into one of the other Symbol +// types after `createAliases()` runs. +class AliasSymbol final : public Symbol { +public: + AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName, + bool isPrivateExtern) + : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern), + aliasedName(aliasedName) {} + + StringRef getAliasedName() const { return aliasedName; } + + static bool classof(const Symbol *s) { return s->kind() == AliasKind; } + + const bool privateExtern; + +private: + StringRef aliasedName; +}; + union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; @@ -328,6 +349,7 @@ union SymbolUnion { alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; alignas(LazyArchive) char e[sizeof(LazyArchive)]; alignas(LazyObject) char f[sizeof(LazyObject)]; + alignas(AliasSymbol) char g[sizeof(AliasSymbol)]; }; template diff --git a/lld/docs/MachO/ld64-vs-lld.rst b/lld/docs/MachO/ld64-vs-lld.rst index 4f3ae696bb96..f571e6a62b36 100644 --- a/lld/docs/MachO/ld64-vs-lld.rst +++ b/lld/docs/MachO/ld64-vs-lld.rst @@ -37,3 +37,12 @@ archives. symbol" error. - LLD: Duplicate symbols, regardless of which archives they are from, will raise errors. + +Aliases +======= +ld64 treats all aliases as strong extern definitions. Having two aliases of the +same name, or an alias plus a regular extern symbol of the same name, both +result in duplicate symbol errors. LLD does not check for duplicate aliases; +instead we perform alias resolution first, and only then do we check for +duplicate symbols. In particular, we will not report a duplicate symbol error if +the aliased symbols turn out to be weak definitions, but ld64 will. diff --git a/lld/test/MachO/aliases.s b/lld/test/MachO/alias-option.s similarity index 100% rename from lld/test/MachO/aliases.s rename to lld/test/MachO/alias-option.s diff --git a/lld/test/MachO/alias-symbols.s b/lld/test/MachO/alias-symbols.s new file mode 100644 index 000000000000..ef22c87a647f --- /dev/null +++ b/lld/test/MachO/alias-symbols.s @@ -0,0 +1,136 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/aliases.s -o %t/aliases.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/definitions.s -o %t/definitions.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-weak.s -o %t/weak-extern-alias-to-weak.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-extern-alias-to-strong.s -o %t/weak-extern-alias-to-strong.o + +# RUN: %lld -lSystem %t/aliases.o %t/definitions.o -o %t/out +# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s + +## local aliases should be dropped entirely. --implicit-check-not doesn't seem +## to work well with -DAG matches, so we check for _local_alias' absence in a +## separate step. +# RUN: llvm-objdump --macho --syms %t/out | FileCheck /dev/null --implicit-check-not _local_alias + +# CHECK-DAG: [[#%.16x,STRONG:]] g F __TEXT,__text _strong +# CHECK-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1 +# CHECK-DAG: [[#%.16x,PEXT:]] l F __TEXT,__text _pext +# CHECK-DAG: [[#%.16x,DEAD:]] g F __TEXT,__text _dead +# CHECK-DAG: [[#STRONG]] l F __TEXT,__text _pext_alias +# CHECK-DAG: [[#PEXT]] l F __TEXT,__text _alias_to_pext +# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _extern_alias_to_strong +# CHECK-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak +# CHECK-DAG: [[#DEAD]] g F __TEXT,__text _no_dead_strip_alias +# CHECK-DAG: [[#STRONG]] g F __TEXT,__text _weak_extern_alias_to_strong + +# RUN: %lld -lSystem -dead_strip %t/aliases.o %t/definitions.o -o %t/dead-stripped +# RUN: llvm-objdump --macho --syms %t/dead-stripped | FileCheck %s --check-prefix=STRIPPED + +# STRIPPED: SYMBOL TABLE: +# STRIPPED-NEXT: g F __TEXT,__text _main +# STRIPPED-NEXT: g F __TEXT,__text __mh_execute_header +# STRIPPED-NEXT: *UND* dyld_stub_binder +# STRIPPED-EMPTY: + +# RUN: not %lld -lSystem %t/aliases.o %t/definitions.o \ +# RUN: %t/weak-extern-alias-to-strong.o -o /dev/null 2>&1 + +## Verify that we preserve the file names of the aliases, rather than using the +## filename of the aliased symbols. +# DUP: error: duplicate symbol: _weak_extern_alias_to_weak +# DUP-NEXT: >>> defined in {{.*}}aliases.o +# DUP-NEXT: >>> defined in {{.*}}weak-extern-alias-to-weak.o + +## The following cases are actually all dup symbol errors under ld64. Alias +## symbols are treated like strong extern symbols by ld64 even if the symbol they alias +## is actually weak. LLD OTOH does not check for dup symbols until after +## resolving the aliases; this makes for a simpler implementation. +## The following test cases are meant to elucidate what LLD's behavior is, but +## we should feel free to change it in the future should it be helpful for the +## implementation. + +# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \ +# RUN: %t/weak-extern-alias-to-weak.o -o %t/alias-clash-1 +# RUN: llvm-objdump --macho --syms %t/alias-clash-1 | FileCheck %s --check-prefix WEAK-1 + +# RUN: %lld -lSystem %t/weak-extern-alias-to-weak.o %t/aliases.o \ +# RUN: %t/definitions.o -o %t/alias-clash-2 +# RUN: llvm-objdump --macho --syms %t/alias-clash-2 | FileCheck %s --check-prefix WEAK-2 + +# RUN: %lld -lSystem %t/aliases.o %t/definitions.o \ +# RUN: -alias _weak_2 _weak_extern_alias_to_weak -o %t/opt-vs-symbol +# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2 + +# RUN: %lld -lSystem -alias _weak_2 _weak_extern_alias_to_weak %t/aliases.o \ +# RUN: %t/definitions.o -o %t/opt-vs-symbol +# RUN: llvm-objdump --macho --syms %t/opt-vs-symbol | FileCheck %s --check-prefix WEAK-2 + +# WEAK-1-DAG: [[#%.16x,WEAK_1:]] w F __TEXT,__text _weak_1 +# WEAK-1-DAG: [[#WEAK_1]] w F __TEXT,__text _weak_extern_alias_to_weak + +# WEAK-2-DAG: [[#%.16x,WEAK_2:]] w F __TEXT,__text _weak_2 +# WEAK-2-DAG: [[#WEAK_2]] w F __TEXT,__text _weak_extern_alias_to_weak + +#--- aliases.s +.globl _extern_alias_to_strong, _weak_extern_alias_to_weak +.weak_definition _weak_extern_alias_to_weak + +## Private extern aliases result in local symbols in the output (i.e. it is as +## if the aliased symbol is also private extern.) +.private_extern _pext_alias + +## This test case demonstrates that it doesn't matter whether the alias itself +## is strong or weak. Rather, what matters is whether the aliased symbol is +## strong or weak. +.globl _weak_extern_alias_to_strong +.weak_definition _weak_extern_alias_to_strong + +## no_dead_strip doesn't retain the aliased symbol if it is dead +.globl _no_dead_strip_alias +.no_dead_strip _no_dead_strip_alias + +.globl _alias_to_pext +_alias_to_pext = _pext + +_extern_alias_to_strong = _strong +_weak_extern_alias_to_weak = _weak_1 +_weak_extern_alias_to_strong = _strong + +_pext_alias = _strong +_local_alias = _strong +_no_dead_strip_alias = _dead + +.subsections_via_symbols + +#--- weak-extern-alias-to-weak.s +.globl _weak_extern_alias_to_weak +.weak_definition _weak_extern_alias_to_weak +_weak_extern_alias_to_weak = _weak_2 + +#--- weak-extern-alias-to-strong.s +.globl _weak_extern_alias_to_strong +.weak_definition _weak_extern_alias_to_strong +_weak_extern_alias_to_strong = _strong + +#--- definitions.s +.globl _strong, _weak_1, _weak_2, _dead +.private_extern _pext +.weak_definition _weak_1 +.weak_definition _weak_2 + +_strong: + .space 1 +_weak_1: + .space 1 +_weak_2: + .space 1 +_dead: + .space 1 +_pext: + .space 1 + +.globl _main +_main: + +.subsections_via_symbols