[ELF] Apply version script patterns to non-default version symbols

Currently version script patterns are ignored for .symver produced
non-default version (single @) symbols. This makes such symbols
not localizable by `local:`, e.g.

```
.symver foo3_v1,foo3@v1
.globl foo_v1
foo3_v1:

ld.lld --version-script=a.ver -shared a.o
# In a.out, foo3@v1 is incorrectly exported.
```

This patch adds the support:

* Move `config->versionDefinitions[VER_NDX_LOCAL].patterns` to `config->versionDefinitions[versionId].localPatterns`
* Rename `config->versionDefinitions[versionId].patterns` to `config->versionDefinitions[versionId].nonLocalPatterns`
* Allow `findAllByVersion` to find non-default version symbols when `includeNonDefault` is true. (Note: `symtab` keys do not have `@@`)
* Make each pattern check both the unversioned `pat.name` and the versioned `${pat.name}@${v.name}`
* `localPatterns` can localize `${pat.name}@${v.name}`. `nonLocalPatterns` can prevent localization by assigning `verdefIndex` (before `parseSymbolVersion`).

---

If a user notices new `undefined symbol` errors with a version script containing
`local: *;`, the issue is likely due to a missing `global:` pattern.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D107234
This commit is contained in:
Fangrui Song 2021-08-04 09:02:11 -07:00
parent 3bc8ce5dd7
commit 7ed22a6fa9
12 changed files with 121 additions and 224 deletions

View File

@ -86,7 +86,8 @@ struct SymbolVersion {
struct VersionDefinition {
llvm::StringRef name;
uint16_t id;
std::vector<SymbolVersion> patterns;
std::vector<SymbolVersion> nonLocalPatterns;
std::vector<SymbolVersion> localPatterns;
};
// This struct contains the global configuration for the linker.

View File

@ -1351,18 +1351,19 @@ static void readConfigs(opt::InputArgList &args) {
}
assert(config->versionDefinitions.empty());
config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}});
config->versionDefinitions.push_back(
{"global", (uint16_t)VER_NDX_GLOBAL, {}});
{"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
config->versionDefinitions.push_back(
{"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
// If --retain-symbol-file is used, we'll keep only the symbols listed in
// the file and discard all others.
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(
config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
for (StringRef s : args::getLines(*buffer))
config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(
config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
}

View File

@ -1496,9 +1496,9 @@ void ScriptParser::readAnonymousDeclaration() {
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
for (const SymbolVersion &pat : locals)
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat);
for (const SymbolVersion &pat : globals)
config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat);
config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat);
expect(";");
}
@ -1510,13 +1510,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) {
std::vector<SymbolVersion> locals;
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
for (const SymbolVersion &pat : locals)
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
// Create a new version definition and add that to the global symbols.
VersionDefinition ver;
ver.name = verStr;
ver.patterns = globals;
ver.nonLocalPatterns = std::move(globals);
ver.localPatterns = std::move(locals);
ver.id = config->versionDefinitions.size();
config->versionDefinitions.push_back(ver);

View File

@ -150,19 +150,24 @@ std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) {
return {};
}
std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) {
std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver,
bool includeNonDefault) {
std::vector<Symbol *> res;
SingleStringMatcher m(ver.name);
if (ver.isExternCpp) {
for (auto &p : getDemangledSyms())
if (m.match(p.first()))
res.insert(res.end(), p.second.begin(), p.second.end());
for (Symbol *sym : p.second)
if (includeNonDefault || !sym->getName().contains('@'))
res.push_back(sym);
return res;
}
for (Symbol *sym : symVector)
if (canBeVersioned(*sym) && m.match(sym->getName()))
if (canBeVersioned(*sym) &&
(includeNonDefault || !sym->getName().contains('@')) &&
m.match(sym->getName()))
res.push_back(sym);
return res;
}
@ -172,7 +177,7 @@ void SymbolTable::handleDynamicList() {
for (SymbolVersion &ver : config->dynamicList) {
std::vector<Symbol *> syms;
if (ver.hasWildcard)
syms = findAllByVersion(ver);
syms = findAllByVersion(ver, /*includeNonDefault=*/true);
else
syms = findByVersion(ver);
@ -181,21 +186,12 @@ void SymbolTable::handleDynamicList() {
}
}
// Set symbol versions to symbols. This function handles patterns
// containing no wildcard characters.
void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
// Set symbol versions to symbols. This function handles patterns containing no
// wildcard characters. Return false if no symbol definition matches ver.
bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
StringRef versionName) {
if (ver.hasWildcard)
return;
// Get a list of symbols which we need to assign the version to.
std::vector<Symbol *> syms = findByVersion(ver);
if (syms.empty()) {
if (!config->undefinedVersion)
error("version script assignment of '" + versionName + "' to symbol '" +
ver.name + "' failed: symbol not defined");
return;
}
auto getName = [](uint16_t ver) -> std::string {
if (ver == VER_NDX_LOCAL)
@ -207,10 +203,10 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
// Assign the version.
for (Symbol *sym : syms) {
// Skip symbols containing version info because symbol versions
// specified by symbol names take precedence over version scripts.
// See parseSymbolVersion().
if (sym->getName().contains('@'))
// For a non-local versionId, skip symbols containing version info because
// symbol versions specified by symbol names take precedence over version
// scripts. See parseSymbolVersion().
if (versionId != VER_NDX_LOCAL && sym->getName().contains('@'))
continue;
// If the version has not been assigned, verdefIndex is -1. Use an arbitrary
@ -225,13 +221,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
warn("attempt to reassign symbol '" + ver.name + "' of " +
getName(sym->versionId) + " to " + getName(versionId));
}
return !syms.empty();
}
void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
bool includeNonDefault) {
// Exact matching takes precedence over fuzzy matching,
// so we set a version to a symbol only if no version has been assigned
// to the symbol. This behavior is compatible with GNU.
for (Symbol *sym : findAllByVersion(ver))
for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
if (sym->verdefIndex == UINT32_C(-1)) {
sym->verdefIndex = 0;
sym->versionId = versionId;
@ -244,26 +242,57 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
// script file, the script does not actually define any symbol version,
// but just specifies symbols visibilities.
void SymbolTable::scanVersionScript() {
SmallString<128> buf;
// First, we assign versions to exact matching symbols,
// i.e. version definitions not containing any glob meta-characters.
for (VersionDefinition &v : config->versionDefinitions)
for (SymbolVersion &pat : v.patterns)
assignExactVersion(pat, v.id, v.name);
std::vector<Symbol *> syms;
for (VersionDefinition &v : config->versionDefinitions) {
auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
bool found = assignExactVersion(pat, id, ver);
found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
pat.isExternCpp, /*hasWildCard=*/false},
id, ver);
if (!found && !config->undefinedVersion)
errorOrWarn("version script assignment of '" + ver + "' to symbol '" +
pat.name + "' failed: symbol not defined");
};
for (SymbolVersion &pat : v.nonLocalPatterns)
if (!pat.hasWildcard)
assignExact(pat, v.id, v.name);
for (SymbolVersion pat : v.localPatterns)
if (!pat.hasWildcard)
assignExact(pat, VER_NDX_LOCAL, "local");
}
// Next, assign versions to wildcards that are not "*". Note that because the
// last match takes precedence over previous matches, we iterate over the
// definitions in the reverse order.
for (VersionDefinition &v : llvm::reverse(config->versionDefinitions))
for (SymbolVersion &pat : v.patterns)
auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
pat.isExternCpp, /*hasWildCard=*/true},
id,
/*includeNonDefault=*/true);
};
for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) {
for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name != "*")
assignWildcardVersion(pat, v.id);
assignWildcard(pat, v.id, v.name);
for (SymbolVersion &pat : v.localPatterns)
if (pat.hasWildcard && pat.name != "*")
assignWildcard(pat, VER_NDX_LOCAL, v.name);
}
// Then, assign versions to "*". In GNU linkers they have lower priority than
// other wildcards.
for (VersionDefinition &v : config->versionDefinitions)
for (SymbolVersion &pat : v.patterns)
for (VersionDefinition &v : config->versionDefinitions) {
for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name == "*")
assignWildcardVersion(pat, v.id);
assignWildcard(pat, v.id, v.name);
for (SymbolVersion &pat : v.localPatterns)
if (pat.hasWildcard && pat.name == "*")
assignWildcard(pat, VER_NDX_LOCAL, v.name);
}
// Symbol themselves might know their versions because symbols
// can contain versions in the form of <name>@<version>.

View File

@ -65,12 +65,14 @@ public:
private:
std::vector<Symbol *> findByVersion(SymbolVersion ver);
std::vector<Symbol *> findAllByVersion(SymbolVersion ver);
std::vector<Symbol *> findAllByVersion(SymbolVersion ver,
bool includeNonDefault);
llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
void assignExactVersion(SymbolVersion ver, uint16_t versionId,
bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
StringRef versionName);
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId);
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
bool includeNonDefault);
// The order the global symbols are in is not defined. We can use an arbitrary
// order, but it has to be reproducible. That is true even when cross linking.

View File

@ -208,6 +208,9 @@ OutputSection *Symbol::getOutputSection() const {
// If a symbol name contains '@', the characters after that is
// a symbol version name. This function parses that.
void Symbol::parseSymbolVersion() {
// Return if localized by a local: pattern in a version script.
if (versionId == VER_NDX_LOCAL)
return;
StringRef s = getName();
size_t pos = s.find('@');
if (pos == 0 || pos == StringRef::npos)

View File

@ -1,30 +0,0 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
# RUN: echo "FOO { global: extern \"C++\" { \"aaa*\"; }; };" > %t.script
# RUN: ld.lld --version-script %t.script -shared %t.o -o %t.so
# RUN: llvm-readobj --dyn-syms %t.so | FileCheck %s --check-prefix=NOMATCH
# NOMATCH: DynamicSymbols [
# NOMATCH-NOT: _Z3aaaPi@@FOO
# NOMATCH-NOT: _Z3aaaPf@@FOO
# NOMATCH: ]
# RUN: echo "FOO { global: extern \"C++\" { \"aaa*\"; aaa*; }; };" > %t2.script
# RUN: ld.lld --version-script %t2.script -shared %t.o -o %t2.so
# RUN: llvm-readobj --dyn-syms %t2.so | FileCheck %s --check-prefix=MATCH
# MATCH: DynamicSymbols [
# MATCH: _Z3aaaPi@@FOO
# MATCH: _Z3aaaPf@@FOO
# MATCH: ]
.text
.globl _Z3aaaPi
.type _Z3aaaPi,@function
_Z3aaaPi:
retq
.globl _Z3aaaPf
.type _Z3aaaPf,@function
_Z3aaaPf:
retq

View File

@ -1,28 +0,0 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
# RUN: echo "FOO { global: extern \"C++\" { foo*; }; };" > %t.script
# RUN: echo "BAR { global: extern \"C++\" { zed*; bar; }; };" >> %t.script
# RUN: ld.lld --version-script %t.script -shared %t.o -o %t.so
# RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck %s
# CHECK: VersionSymbols [
# CHECK: Name: _Z3fooi@@FOO
# CHECK: Name: _Z3bari
# CHECK: Name: _Z3zedi@@BAR
.text
.globl _Z3fooi
.type _Z3fooi,@function
_Z3fooi:
retq
.globl _Z3bari
.type _Z3bari,@function
_Z3bari:
retq
.globl _Z3zedi
.type _Z3zedi,@function
_Z3zedi:
retq

View File

@ -1,120 +0,0 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
# RUN: echo "LIBSAMPLE_1.0 { global:" > %t.script
# RUN: echo ' extern "C++" { "foo(int)"; "zed(int)"; "abc::abc()"; };' >> %t.script
# RUN: echo "};" >> %t.script
# RUN: echo "LIBSAMPLE_2.0 { global:" >> %t.script
# RUN: echo ' extern "C" { _Z3bari; };' >> %t.script
# RUN: echo "};" >> %t.script
# RUN: ld.lld --hash-style=sysv --version-script %t.script -soname fixed-length-string -shared %t.o -o %t.so
# RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck --check-prefix=DSO %s
# DSO: DynamicSymbols [
# DSO-NEXT: Symbol {
# DSO-NEXT: Name:
# DSO-NEXT: Value: 0x0
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Local
# DSO-NEXT: Type: None
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: Undefined
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Name: _Z3fooi@@LIBSAMPLE_1.0
# DSO-NEXT: Value:
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Global
# DSO-NEXT: Type: Function
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: .text
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Name: _Z3bari@@LIBSAMPLE_2.0
# DSO-NEXT: Value:
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Global
# DSO-NEXT: Type: Function
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: .text
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Name: _Z3zedi@@LIBSAMPLE_1.0
# DSO-NEXT: Value:
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Global (0x1)
# DSO-NEXT: Type: Function (0x2)
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: .text (0x6)
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Name: _ZN3abcC1Ev@@LIBSAMPLE_1.0
# DSO-NEXT: Value:
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Global (0x1)
# DSO-NEXT: Type: Function (0x2)
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: .text (0x6)
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Name: _ZN3abcC2Ev@@LIBSAMPLE_1.0
# DSO-NEXT: Value:
# DSO-NEXT: Size: 0
# DSO-NEXT: Binding: Global (0x1)
# DSO-NEXT: Type: Function (0x2)
# DSO-NEXT: Other: 0
# DSO-NEXT: Section: .text (0x6)
# DSO-NEXT: }
# DSO-NEXT: ]
# DSO-NEXT: VersionSymbols [
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 0
# DSO-NEXT: Name:
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 2
# DSO-NEXT: Name: _Z3fooi@@LIBSAMPLE_1.0
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 3
# DSO-NEXT: Name: _Z3bari@@LIBSAMPLE_2.0
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 2
# DSO-NEXT: Name: _Z3zedi@@LIBSAMPLE_1.0
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 2
# DSO-NEXT: Name: _ZN3abcC1Ev@@LIBSAMPLE_1.0
# DSO-NEXT: }
# DSO-NEXT: Symbol {
# DSO-NEXT: Version: 2
# DSO-NEXT: Name: _ZN3abcC2Ev@@LIBSAMPLE_1.0
# DSO-NEXT: }
# DSO-NEXT: ]
.text
.globl _Z3fooi
.type _Z3fooi,@function
_Z3fooi:
retq
.globl _Z3bari
.type _Z3bari,@function
_Z3bari:
retq
.globl _Z3zedi
.type _Z3zedi,@function
_Z3zedi:
retq
.globl _ZN3abcC1Ev
.type _ZN3abcC1Ev,@function
_ZN3abcC1Ev:
retq
.globl _ZN3abcC2Ev
.type _ZN3abcC2Ev,@function
_ZN3abcC2Ev:
retq

View File

@ -18,6 +18,10 @@
# RUN: %t.o -o %t.so 2>&1 | FileCheck -check-prefix=ERR3 %s
# ERR3: version script assignment of 'local' to symbol 'und' failed: symbol not defined
## Wildcard patterns do not error.
# RUN: echo "VERSION_1.0 { global: b*; local: u*; };" > %t4.script
# RUN: ld.lld --version-script %t4.script -shared --no-undefined-version --fatal-warnings %t.o -o /dev/null
.text
.globl foo
.type foo,@function

View File

@ -0,0 +1,38 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: echo 'v1 { local: extern "C++" { "foo1()"; }; }; v2 { local: extern "C++" { "foo2()"; }; };' > %t1.script
# RUN: ld.lld --version-script %t1.script -shared %t.o -o %t1.so
# RUN: llvm-readelf --dyn-syms %t1.so | FileCheck --check-prefix=EXACT %s
# EXACT: UND
# EXACT-NEXT: [[#]] _start{{$}}
# EXACT-NEXT: [[#]] _Z4foo3i@v1
# EXACT-NEXT: [[#]] _Z4foo4i@@v2
# EXACT-NOT: {{.}}
# RUN: echo 'v1 { global: *; local: extern "C++" {foo*;}; }; v2 {};' > %t2.script
# RUN: ld.lld --version-script %t2.script -shared %t.o -o %t2.so
# RUN: llvm-readelf --dyn-syms %t2.so | FileCheck --check-prefix=MIX1 %s
# MIX1: UND
# MIX1-NEXT: [[#]] _start@@v1
# MIX1-NEXT: [[#]] _Z4foo3i@v1
# MIX1-NEXT: [[#]] _Z4foo4i@@v2
# MIX1-NOT: {{.}}
# RUN: echo 'v1 { global: extern "C++" {foo*;}; local: *; }; v2 { global: extern "C++" {"foo4(int)";}; local: *; };' > %t3.script
# RUN: ld.lld --version-script %t3.script -shared %t.o -o %t3.so
# RUN: llvm-readelf --dyn-syms %t3.so | FileCheck --check-prefix=MIX2 %s
# MIX2: UND
# MIX2-NEXT: [[#]] _Z4foo1v@@v1
# MIX2-NEXT: [[#]] _Z4foo2v@@v1
# MIX2-NEXT: [[#]] _Z4foo3i@v1
# MIX2-NEXT: [[#]] _Z4foo4i@@v2
# MIX2-NOT: {{.}}
.globl _Z4foo1v; _Z4foo1v: ret
.globl _Z4foo2v; _Z4foo2v: ret
.globl _Z4foo3i; .symver _Z4foo3i,_Z4foo3i@v1,remove; _Z4foo3i: ret
.globl _Z4foo4i; .symver _Z4foo4i,_Z4foo4i@@@v2; _Z4foo4i: ret
.globl _start; _start: ret

View File

@ -18,7 +18,6 @@
# WC: UND
# WC-NEXT: [[#]] foo4@@v2
# WC-NEXT: [[#]] _start{{$}}
# WC-NEXT: [[#]] foo3@v1
# WC-NOT: {{.}}
# RUN: echo 'v1 { global: *; local: foo*; }; v2 {};' > %t3.script
@ -27,7 +26,6 @@
# MIX1: UND
# MIX1-NEXT: [[#]] foo4@@v2
# MIX1-NEXT: [[#]] _start@@v1
# MIX1-NEXT: [[#]] foo3@v1
# MIX1-NOT: {{.}}
# RUN: echo 'v1 { global: foo*; local: *; }; v2 { global: foo4; local: *; };' > %t4.script