From 06c4aadeb654f2430f4d8a7cf789330e0d583360 Mon Sep 17 00:00:00 2001 From: Greg McGary Date: Wed, 3 Mar 2021 12:15:09 -0800 Subject: [PATCH] [lld-macho] implement options -(un)exported_symbol(s_list) Implement command-line options to alter a dylib's exported-symbols list: * `-exported_symbol*` options override the default export list. The export list is compiled according to the command-line option(s) only. * `-unexported_symbol*` options hide otherwise public symbols. * `-*exported_symbol PATTERN` options specify a single literal or glob pattern. * `-*exported_symbols_list FILE` options specify a file containing a series of lines containing symbol literals or glob patterns. Whitespace and `#`-prefix comments are stripped. Note: This is a simple implementation of the primary use case. ld64 has much more complexity surrounding interactions with other options, many of which are obscure and undocumented. We will start simple and complexity as necessary. Differential Revision: https://reviews.llvm.org/D98223 --- lld/MachO/Config.h | 22 ++++++ lld/MachO/Driver.cpp | 61 ++++++++++++++++ lld/MachO/Options.td | 16 ++--- lld/MachO/SyntheticSections.cpp | 10 ++- lld/test/MachO/export-options.s | 119 ++++++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 12 deletions(-) create mode 100644 lld/test/MachO/export-options.s diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 31195457eeef..810b719cd09d 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -9,9 +9,12 @@ #ifndef LLD_MACHO_CONFIG_H #define LLD_MACHO_CONFIG_H +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TextAPI/MachO/Architecture.h" #include "llvm/TextAPI/MachO/Platform.h" @@ -47,6 +50,21 @@ enum class UndefinedSymbolTreatment { dynamic_lookup, }; +class SymbolPatterns { + // GlobPattern can also match literals, + // but we prefer the O(1) lookup of DenseSet. + llvm::DenseSet literals; + std::vector globs; + +public: + bool empty() const { return literals.empty() && globs.empty(); } + void clear(); + void insert(llvm::StringRef symbolName); + bool matchLiteral(llvm::StringRef symbolName) const; + bool matchGlob(llvm::StringRef symbolName) const; + bool match(llvm::StringRef symbolName) const; +}; + struct Configuration { Symbol *entry; bool hasReexports = false; @@ -80,9 +98,13 @@ struct Configuration { std::vector frameworkSearchPaths; std::vector runtimePaths; std::vector explicitUndefineds; + llvm::DenseMap priorities; SectionRenameMap sectionRenameMap; SegmentRenameMap segmentRenameMap; + + SymbolPatterns exportedSymbols; + SymbolPatterns unexportedSymbols; }; // The symbol with the highest priority should be ordered first in the output diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index ca01908d40ca..8543efa6cd7b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -719,6 +719,57 @@ static uint32_t parseDylibVersion(const opt::ArgList& args, unsigned id) { return version.rawValue(); } +void SymbolPatterns::clear() { + literals.clear(); + globs.clear(); +} + +void SymbolPatterns::insert(StringRef symbolName) { + if (symbolName.find_first_of("*?[]") == StringRef::npos) + literals.insert(CachedHashStringRef(symbolName)); + else if (Expected pattern = GlobPattern::create(symbolName)) + globs.emplace_back(*pattern); + else + error("invalid symbol-name pattern: " + symbolName); +} + +bool SymbolPatterns::matchLiteral(StringRef symbolName) const { + return literals.contains(CachedHashStringRef(symbolName)); +} + +bool SymbolPatterns::matchGlob(StringRef symbolName) const { + for (const llvm::GlobPattern &glob : globs) + if (glob.match(symbolName)) + return true; + return false; +} + +bool SymbolPatterns::match(StringRef symbolName) const { + return matchLiteral(symbolName) || matchGlob(symbolName); +} + +static void handleSymbolPatterns(opt::InputArgList &args, + SymbolPatterns &symbolPatterns, + unsigned singleOptionCode, + unsigned listFileOptionCode) { + for (opt::Arg *arg : args.filtered(singleOptionCode)) + symbolPatterns.insert(arg->getValue()); + for (opt::Arg *arg : args.filtered(listFileOptionCode)) { + StringRef path = arg->getValue(); + Optional buffer = readFile(path); + if (!buffer) { + error("Could not read symbol file: " + path); + continue; + } + MemoryBufferRef mbref = *buffer; + for (StringRef line : args::getLines(mbref)) { + line = line.take_until([](char c) { return c == '#'; }).trim(); + if (!line.empty()) + symbolPatterns.insert(line); + } + } +} + bool macho::link(ArrayRef argsArr, bool canExitEarly, raw_ostream &stdoutOS, raw_ostream &stderrOS) { lld::stdoutOS = &stdoutOS; @@ -843,6 +894,16 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, validName(arg->getValue(1)); } + handleSymbolPatterns(args, config->exportedSymbols, OPT_exported_symbol, + OPT_exported_symbols_list); + handleSymbolPatterns(args, config->unexportedSymbols, OPT_unexported_symbol, + OPT_unexported_symbols_list); + if (!config->exportedSymbols.empty() && !config->unexportedSymbols.empty()) { + error("cannot use both -exported_symbol* and -unexported_symbol* options\n" + ">>> ignoring unexports"); + config->unexportedSymbols.clear(); + } + config->saveTemps = args.hasArg(OPT_save_temps); config->adhocCodesign = args.hasFlag( diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 457b95491b5f..c83ed3e95ecb 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -406,25 +406,21 @@ def d : Flag<["-"], "d">, def grp_resolve : OptionGroup<"resolve">, HelpText<"SYMBOL RESOLUTION">; -def exported_symbols_list : Separate<["-"], "exported_symbols_list">, - MetaVarName<"">, - HelpText<"Symbols specified in remain global, while others become private externs">, - Flags<[HelpHidden]>, - Group; def exported_symbol : Separate<["-"], "exported_symbol">, MetaVarName<"">, HelpText<" remains global, while others become private externs">, - Flags<[HelpHidden]>, Group; -def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">, +def exported_symbols_list : Separate<["-"], "exported_symbols_list">, MetaVarName<"">, - HelpText<"Global symbols specified in become private externs">, - Flags<[HelpHidden]>, + HelpText<"Symbols specified in remain global, while others become private externs">, Group; def unexported_symbol : Separate<["-"], "unexported_symbol">, MetaVarName<"">, HelpText<"Global becomes private extern">, - Flags<[HelpHidden]>, + Group; +def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">, + MetaVarName<"">, + HelpText<"Global symbols specified in become private externs">, Group; def reexported_symbols_list : Separate<["-"], "reexported_symbols_list">, MetaVarName<"">, diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index dcfdac643370..2cc1e8da3348 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -601,8 +601,14 @@ void ExportSection::finalizeContents() { trieBuilder.setImageBase(in.header->addr); for (const Symbol *sym : symtab->getSymbols()) { if (const auto *defined = dyn_cast(sym)) { - if (defined->privateExtern) - continue; + if (config->exportedSymbols.empty()) { + if (defined->privateExtern || + config->unexportedSymbols.match(defined->getName())) + continue; + } else { + if (!config->exportedSymbols.match(defined->getName())) + continue; + } trieBuilder.addSymbol(*defined); hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); } diff --git a/lld/test/MachO/export-options.s b/lld/test/MachO/export-options.s new file mode 100644 index 000000000000..51fcdb12f60f --- /dev/null +++ b/lld/test/MachO/export-options.s @@ -0,0 +1,119 @@ +# REQUIRES: x86 + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/default.s -o %t/default.o + +## Check that mixing exported and unexported symbol options yields an error +# RUN: not %lld -dylib %t/default.o -o /dev/null \ +# RUN: -exported_symbol a -unexported_symbol b 2>&1 | \ +# RUN: FileCheck --check-prefix=CONFLICT %s + +# CONFLICT: error: cannot use both -exported_symbol* and -unexported_symbol* options +# CONFLICT-NEXT: >>> ignoring unexports + +#--- default.s + +.macro DEFSYM, type, sym +\type \sym +\sym: + retq +.endm + +DEFSYM .globl, _keep_globl +DEFSYM .globl, _hide_globl +DEFSYM .private_extern, _keep_private +DEFSYM .private_extern, _show_private + +## Check that the export trie is unaltered +# RUN: %lld -dylib %t/default.o -o %t/default +# RUN: llvm-objdump --macho --exports-trie %t/default | \ +# RUN: FileCheck --check-prefix=DEFAULT %s + +# DEFAULT-LABEL: Exports trie: +# DEFAULT-DAG: _hide_globl +# DEFAULT-DAG: _keep_globl +# DEFAULT-NOT: _hide_private +# DEFAULT-NOT: _show_private + +## Check that the export trie is properly augmented +## Check that non-matching literal pattern has no effect +# RUN: %lld -dylib %t/default.o -o %t/export \ +# RUN: -exported_symbol _show_private \ +# RUN: -exported_symbol _extra_cruft -exported_symbol '*xtra_cr?ft' +# RUN: llvm-objdump --macho --exports-trie %t/export | \ +# RUN: FileCheck --check-prefix=EXPORTED %s + +# EXPORTED-LABEL: Exports trie: +# EXPORTED-DAG: _show_private +# EXPORTED-NOT: _hide_globl +# EXPORTED-NOT: _keep_globl +# EXPORTED-NOT: _hide_private +# EXPORTED-NOT: {{.*}}xtra_cr{{.}}ft + +## Check that the export trie is properly diminished +## Check that non-matching glob pattern has no effect +# RUN: %lld -dylib %t/default.o -o %t/unexport \ +# RUN: -unexported_symbol _hide_global +# RUN: llvm-objdump --macho --exports-trie %t/unexport | \ +# RUN: FileCheck --check-prefix=UNEXPORTED %s + +# UNEXPORTED-LABEL: Exports trie: +# UNEXPORTED-DAG: _keep_globl +# UNEXPORTED-NOT: _hide_globl +# UNEXPORTED-NOT: _show_private +# UNEXPORTED-NOT: _hide_private + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/symdefs.s -o %t/symdefs.o + +#--- symdefs.s + +.macro DEFSYM, sym +.private_extern \sym +\sym: + retq +.endm + +DEFSYM literal_only +DEFSYM literal_also +DEFSYM globby_only +DEFSYM globby_also + +#--- literals + + literal_only # comment + literal_also + +# globby_only + globby_also + +## Check that only string-literal patterns match +## Check that comments and blank lines are stripped from symbol list +# RUN: %lld -dylib %t/symdefs.o -o %t/literal \ +# RUN: -exported_symbols_list %t/literals +# RUN: llvm-objdump --macho --exports-trie %t/literal | \ +# RUN: FileCheck --check-prefix=LITERAL %s + +# LITERAL-DAG: literal_only +# LITERAL-DAG: literal_also +# LITERAL-DAG: globby_also +# LITERAL-NOT: globby_only + +#--- globbys + +# literal_only + l?ter[aeiou]l_*[^y] # comment + + *gl?bby_* + +## Check that only glob patterns match +## Check that comments and blank lines are stripped from symbol list +# RUN: %lld -dylib %t/symdefs.o -o %t/globby \ +# RUN: -exported_symbols_list %t/globbys +# RUN: llvm-objdump --macho --exports-trie %t/globby | \ +# RUN: FileCheck --check-prefix=GLOBBY %s + +# GLOBBY-DAG: literal_also +# GLOBBY-DAG: globby_only +# GLOBBY-DAG: globby_also +# GLOBBY-NOT: literal_only