[lld-macho] Do not error out on dead stripped duplicate symbols

Builds that error out on duplicate symbols can still succeed if the symbols
will be dead stripped. Currently, this is the current behavior in ld64.
https://github.com/apple-oss-distributions/ld64/blob/main/src/ld/Resolver.cpp#L2018.
In order to provide an easier to path for adoption, introduce a new flag that will
retain compatibility with ld64's behavior (similar to `--deduplicate-literals`). This is
turned off by default since we do not encourage this behavior in the linker.

Reviewed By: #lld-macho, thakis, int3

Differential Revision: https://reviews.llvm.org/D134794
This commit is contained in:
Vincent Lee 2022-09-27 23:42:47 -07:00
parent e898be2f6e
commit 58edaef3fe
10 changed files with 133 additions and 37 deletions

View File

@ -135,6 +135,7 @@ struct Configuration {
bool timeTraceEnabled = false;
bool dataConst = false;
bool dedupLiterals = true;
bool deadStripDuplicates = false;
bool omitDebugInfo = false;
bool warnDylibInstallName = false;
bool ignoreOptimizationHints = false;

View File

@ -1480,6 +1480,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->dedupLiterals =
args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) ||
config->icfLevel != ICFLevel::none;
config->deadStripDuplicates = args.hasArg(OPT_dead_strip_duplicates);
config->warnDylibInstallName = args.hasFlag(
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
config->ignoreOptimizationHints = args.hasArg(OPT_ignore_optimization_hints);

View File

@ -57,6 +57,9 @@ def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">,
def deduplicate_literals: Flag<["--"], "deduplicate-literals">,
HelpText<"Enable literal deduplication. This is implied by --icf={safe,all}">,
Group<grp_lld>;
def dead_strip_duplicates: Flag<["--"], "dead-strip-duplicates">,
HelpText<"Do not error on duplicate symbols that will be dead stripped.">,
Group<grp_lld>;
def print_dylib_search: Flag<["--"], "print-dylib-search">,
HelpText<"Print which paths lld searched when trying to find dylibs">,
Group<grp_lld>;

View File

@ -45,6 +45,21 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
return {sym, p.second};
}
namespace {
struct DuplicateSymbolDiag {
// Pair containing source location and source file
const std::pair<std::string, std::string> src1;
const std::pair<std::string, std::string> src2;
const Symbol *sym;
DuplicateSymbolDiag(const std::pair<std::string, std::string> src1,
const std::pair<std::string, std::string> src2,
const Symbol *sym)
: src1(src1), src2(src2), sym(sym) {}
};
SmallVector<DuplicateSymbolDiag> dupSymDiags;
} // namespace
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef,
@ -80,17 +95,13 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined));
}
} else {
std::string src1 = defined->getSourceLocation();
std::string src2 = isec ? isec->getSourceLocation(value) : "";
std::string srcLoc1 = defined->getSourceLocation();
std::string srcLoc2 = isec ? isec->getSourceLocation(value) : "";
std::string srcFile1 = toString(defined->getFile());
std::string srcFile2 = toString(file);
std::string message =
"duplicate symbol: " + toString(*defined) + "\n>>> defined in ";
if (!src1.empty())
message += src1 + "\n>>> ";
message += toString(defined->getFile()) + "\n>>> defined in ";
if (!src2.empty())
message += src2 + "\n>>> ";
error(message + toString(file));
dupSymDiags.push_back({make_pair(srcLoc1, srcFile1),
make_pair(srcLoc2, srcFile2), defined});
}
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
@ -366,6 +377,21 @@ struct UndefinedDiag {
MapVector<const Undefined *, UndefinedDiag> undefs;
}
void macho::reportPendingDuplicateSymbols() {
for (const auto &duplicate : dupSymDiags) {
if (!config->deadStripDuplicates || duplicate.sym->isLive()) {
std::string message =
"duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in ";
if (!duplicate.src1.first.empty())
message += duplicate.src1.first + "\n>>> ";
message += duplicate.src1.second + "\n>>> defined in ";
if (!duplicate.src2.first.empty())
message += duplicate.src2.first + "\n>>> ";
error(message + duplicate.src2.second);
}
}
}
void macho::reportPendingUndefinedSymbols() {
for (const auto &undef : undefs) {
const UndefinedDiag &locations = undef.second;

View File

@ -72,6 +72,7 @@ private:
};
void reportPendingUndefinedSymbols();
void reportPendingDuplicateSymbols();
// Call reportPendingUndefinedSymbols() to emit diagnostics.
void treatUndefinedSymbol(const Undefined &, StringRef source);

View File

@ -1185,8 +1185,9 @@ template <class LP> void Writer::run() {
if (in.initOffsets->isNeeded())
in.initOffsets->setUp();
// Do not proceed if there was an undefined symbol.
// Do not proceed if there were undefined or duplicate symbols.
reportPendingUndefinedSymbols();
reportPendingDuplicateSymbols();
if (errorCount())
return;

View File

@ -14,6 +14,15 @@ some programs which have (incorrectly) relied on string deduplication always
occurring. In particular, programs which compare string literals via pointer
equality must be fixed to use value equality instead.
Dead Stripping Duplicate Symbols
********************************
ld64 strips dead code before reporting duplicate symbols. By default, LLD does
the opposite. ld64's behavior hides ODR violations, so we have chosen not
to follow it. But, to make adoption easy, LLD can mimic this behavior via
the ``--dead-strip-duplicates`` flag. Usage of this flag is discouraged, and
this behavior should be fixed in the source. However, for sources that are not
within the user's control, this will mitigate users for adoption.
``-no_deduplicate`` Flag
************************
- ld64: This turns off ICF (deduplication pass) in the linker.

View File

@ -0,0 +1,41 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
# RUN: not %lld -lSystem %t/test.o %t/weakfoo.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: duplicate symbol: _weakfoo
# CHECK-NEXT: >>> defined in {{.*}}/test.o
# CHECK-NEXT: >>> defined in {{.*}}/weakfoo.o
## Duplicate absolute symbols that will be dead stripped later should not fail.
# RUN: %lld -lSystem -dead_strip --dead-strip-duplicates -map %t/stripped-duplicate-map \
# RUN: %t/test.o %t/weakfoo.o -o %t/test
# RUN: llvm-objdump --syms %t/test | FileCheck %s --check-prefix=DUP
# DUP-LABEL: SYMBOL TABLE:
# DUP-NEXT: g F __TEXT,__text _main
# DUP-NEXT: g F __TEXT,__text __mh_execute_header
# DUP-NEXT: *UND* dyld_stub_binder
## Dead stripped non-section symbols don't show up in map files because there's no input section.
## Check that _weakfoo doesn't show up. This matches ld64.
# RUN: FileCheck --check-prefix=DUPMAP %s < %t/stripped-duplicate-map
# DUPMAP: _main
# DUPMAP-LABEL: Dead Stripped Symbols
# DUPMAP-NOT: _weakfoo
#--- weakfoo.s
.globl _weakfoo
## The weak attribute is ignored for absolute symbols, so we will have a
## duplicate symbol error for _weakfoo.
.weak_definition _weakfoo
_weakfoo = 0x1234
#--- test.s
.globl _main, _weakfoo
.weak_definition _weakfoo
_weakfoo = 0x5678
.text
_main:
ret

View File

@ -321,7 +321,6 @@
# RUN: %lld -dylib -dead_strip --deduplicate-literals %t/literals.o -o %t/literals
# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --section="__DATA,str_ptrs" \
# RUN: --section="__TEXT,__literals" %t/literals | FileCheck %s --check-prefix=LIT
# LIT: Contents of (__TEXT,__cstring) section
# LIT-NEXT: foobar
# LIT-NEXT: Contents of (__DATA,str_ptrs) section
@ -330,6 +329,45 @@
# LIT-NEXT: Contents of (__TEXT,__literals) section
# LIT-NEXT: ef be ad de {{$}}
## Duplicate symbols that will be dead stripped later should not fail when using
## the --dead-stripped-duplicates flag
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/duplicate1.s -o %t/duplicate1.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \
# RUN: %t/duplicate2.s -o %t/duplicate2.o
# RUN: %lld -lSystem -dead_strip --dead-strip-duplicates -map %t/stripped-duplicate-map \
# RUN: %t/duplicate1.o %t/duplicate2.o -o %t/duplicate
# RUN: llvm-objdump --syms %t/duplicate | FileCheck %s --check-prefix=DUP
# DUP-LABEL: SYMBOL TABLE:
# DUP-NEXT: g F __TEXT,__text _main
# DUP-NEXT: g F __TEXT,__text __mh_execute_header
# DUP-NEXT: *UND* dyld_stub_binder
## Check that the duplicate dead stripped symbols get listed properly.
# RUN: FileCheck --check-prefix=DUPMAP %s < %t/stripped-duplicate-map
# DUPMAP: _main
# DUPMAP-LABEL: Dead Stripped Symbols
# DUPMAP: <<dead>> [ 2] _foo
#--- duplicate1.s
.text
.globl _main, _foo
_foo:
retq
_main:
retq
.subsections_via_symbols
#--- duplicate2.s
.text
.globl _foo
_foo:
retq
.subsections_via_symbols
#--- basics.s
.comm _ref_com, 1
.comm _unref_com, 1

View File

@ -1,25 +0,0 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
# RUN: not %lld -lSystem %t/test.o %t/weakfoo.o -o %t/test 2>&1 | FileCheck %s
# CHECK: error: duplicate symbol: _weakfoo
# CHECK-NEXT: >>> defined in {{.*}}/test.o
# CHECK-NEXT: >>> defined in {{.*}}/weakfoo.o
#--- weakfoo.s
.globl _weakfoo
## The weak attribute is ignored for absolute symbols, so we will have a
## duplicate symbol error for _weakfoo.
.weak_definition _weakfoo
_weakfoo = 0x1234
#--- test.s
.globl _main, _weakfoo
.weak_definition _weakfoo
_weakfoo = 0x5678
.text
_main:
ret