From c91930a17f2c32fefe5eb5caeb767462655a12d3 Mon Sep 17 00:00:00 2001 From: George Rimar Date: Fri, 2 Sep 2016 21:17:20 +0000 Subject: [PATCH] [ELF] - Use std::regex instead of hand written logic in elf::globMatch() Use std::regex instead of hand written matcher. Patch based on code and ideas of Rui Ueyama. Differential revision: https://reviews.llvm.org/D23829 llvm-svn: 280544 --- lld/ELF/LinkerScript.cpp | 45 +++++++++++++---------------------- lld/ELF/LinkerScript.h | 14 +++++++---- lld/ELF/Strings.cpp | 49 +++++++++++++++++++++++---------------- lld/ELF/Strings.h | 4 +++- lld/ELF/SymbolTable.cpp | 20 +++++++--------- lld/ELF/SymbolTable.h | 3 ++- lld/test/ELF/wildcards2.s | 25 ++++++++++++++++++++ 7 files changed, 93 insertions(+), 67 deletions(-) create mode 100644 lld/test/ELF/wildcards2.s diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 3872d30d2488..6085b19b6789 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -92,44 +92,35 @@ template LinkerScript::~LinkerScript() {} template bool LinkerScript::shouldKeep(InputSectionBase *S) { - for (StringRef Pat : Opt.KeptSections) - if (globMatch(Pat, S->getSectionName())) - return true; - return false; -} - -static bool match(ArrayRef Patterns, StringRef S) { - for (StringRef Pat : Patterns) - if (globMatch(Pat, S)) + for (Regex *Re : Opt.KeptSections) + if (Re->match(S->getSectionName())) return true; return false; } static bool fileMatches(const InputSectionDescription *Desc, StringRef Filename) { - if (!globMatch(Desc->FilePattern, Filename)) - return false; - return Desc->ExcludedFiles.empty() || !match(Desc->ExcludedFiles, Filename); + return const_cast(Desc->FileRe).match(Filename) && + !const_cast(Desc->ExcludedFileRe).match(Filename); } // Returns input sections filtered by given glob patterns. template std::vector *> LinkerScript::getInputSections(const InputSectionDescription *I) { - ArrayRef Patterns = I->SectionPatterns; + const Regex &Re = I->SectionRe; std::vector *> Ret; for (const std::unique_ptr> &F : Symtab::X->getObjectFiles()) { if (fileMatches(I, sys::path::filename(F->getName()))) for (InputSectionBase *S : F->getSections()) if (!isDiscarded(S) && !S->OutSec && - match(Patterns, S->getSectionName())) + const_cast(Re).match(S->getSectionName())) Ret.push_back(S); } - if (llvm::find(Patterns, "COMMON") != Patterns.end()) + if (const_cast(Re).match("COMMON")) Ret.push_back(CommonInputSection::X); - return Ret; } @@ -634,7 +625,7 @@ private: std::vector readOutputSectionFiller(); std::vector readOutputSectionPhdrs(); InputSectionDescription *readInputSectionDescription(StringRef Tok); - std::vector readInputFilePatterns(); + Regex readFilePatterns(); InputSectionDescription *readInputSectionRules(StringRef FilePattern); unsigned readPhdrType(); SortKind readSortKind(); @@ -908,11 +899,11 @@ static int precedence(StringRef Op) { .Default(-1); } -std::vector ScriptParser::readInputFilePatterns() { +Regex ScriptParser::readFilePatterns() { std::vector V; while (!Error && !skip(")")) V.push_back(next()); - return V; + return compileGlobPatterns(V); } SortKind ScriptParser::readSortKind() { @@ -925,15 +916,13 @@ SortKind ScriptParser::readSortKind() { InputSectionDescription * ScriptParser::readInputSectionRules(StringRef FilePattern) { - auto *Cmd = new InputSectionDescription; - Cmd->FilePattern = FilePattern; + auto *Cmd = new InputSectionDescription(FilePattern); expect("("); // Read EXCLUDE_FILE(). if (skip("EXCLUDE_FILE")) { expect("("); - while (!Error && !skip(")")) - Cmd->ExcludedFiles.push_back(next()); + Cmd->ExcludedFileRe = readFilePatterns(); } // Read SORT(). @@ -943,16 +932,16 @@ ScriptParser::readInputSectionRules(StringRef FilePattern) { if (SortKind K2 = readSortKind()) { Cmd->SortInner = K2; expect("("); - Cmd->SectionPatterns = readInputFilePatterns(); + Cmd->SectionRe = readFilePatterns(); expect(")"); } else { - Cmd->SectionPatterns = readInputFilePatterns(); + Cmd->SectionRe = readFilePatterns(); } expect(")"); return Cmd; } - Cmd->SectionPatterns = readInputFilePatterns(); + Cmd->SectionRe = readFilePatterns(); return Cmd; } @@ -965,9 +954,7 @@ ScriptParser::readInputSectionDescription(StringRef Tok) { StringRef FilePattern = next(); InputSectionDescription *Cmd = readInputSectionRules(FilePattern); expect(")"); - Opt.KeptSections.insert(Opt.KeptSections.end(), - Cmd->SectionPatterns.begin(), - Cmd->SectionPatterns.end()); + Opt.KeptSections.push_back(&Cmd->SectionRe); return Cmd; } return readInputSectionRules(Tok); diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index a5e3d9ec52bb..bb1f092bb0c4 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -10,12 +10,14 @@ #ifndef LLD_ELF_LINKER_SCRIPT_H #define LLD_ELF_LINKER_SCRIPT_H +#include "Strings.h" #include "Writer.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" #include namespace lld { @@ -94,13 +96,15 @@ struct OutputSectionCommand : BaseCommand { enum SortKind { SortNone, SortByName, SortByAlignment }; struct InputSectionDescription : BaseCommand { - InputSectionDescription() : BaseCommand(InputSectionKind) {} + InputSectionDescription(StringRef FilePattern) + : BaseCommand(InputSectionKind), + FileRe(compileGlobPatterns({FilePattern})) {} static bool classof(const BaseCommand *C); - StringRef FilePattern; + llvm::Regex FileRe; SortKind SortOuter = SortNone; SortKind SortInner = SortNone; - std::vector ExcludedFiles; - std::vector SectionPatterns; + llvm::Regex ExcludedFileRe; + llvm::Regex SectionRe; }; struct AssertCommand : BaseCommand { @@ -133,7 +137,7 @@ struct ScriptConfiguration { // List of section patterns specified with KEEP commands. They will // be kept even if they are unused and --gc-sections is specified. - std::vector KeptSections; + std::vector KeptSections; }; extern ScriptConfiguration *ScriptConfig; diff --git a/lld/ELF/Strings.cpp b/lld/ELF/Strings.cpp index 0c21e8819d6c..6fecc75f7353 100644 --- a/lld/ELF/Strings.cpp +++ b/lld/ELF/Strings.cpp @@ -9,6 +9,7 @@ #include "Strings.h" #include "Error.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -22,28 +23,36 @@ using namespace llvm; using namespace lld; using namespace lld::elf; -// Returns true if S matches T. S can contain glob meta-characters. -// The asterisk ('*') matches zero or more characters, and the question -// mark ('?') matches one character. -bool elf::globMatch(StringRef S, StringRef T) { - for (;;) { - if (S.empty()) - return T.empty(); - if (S[0] == '*') { - S = S.substr(1); - if (S.empty()) - // Fast path. If a pattern is '*', it matches anything. - return true; - for (size_t I = 0, E = T.size(); I < E; ++I) - if (globMatch(S, T.substr(I))) - return true; - return false; - } - if (T.empty() || (S[0] != T[0] && S[0] != '?')) - return false; +bool elf::hasWildcard(StringRef S) { + return S.find_first_of("?*") != StringRef::npos; +} + +static std::string toRegex(StringRef S) { + if (S.find_first_of("[]") != StringRef::npos) + warning("unsupported wildcard: " + S); + + std::string T; + while (!S.empty()) { + char C = S.front(); + if (C == '*') + T += ".*"; + else if (C == '?') + T += '.'; + else if (StringRef(".+^${}()|/\\[]").find_first_of(C) != StringRef::npos) + T += std::string("\\") + C; + else + T += C; S = S.substr(1); - T = T.substr(1); } + return T; +} + +// Takes multiple glob patterns and converts them into regex object. +Regex elf::compileGlobPatterns(ArrayRef V) { + std::string T = "^(" + toRegex(V[0]); + for (StringRef S : V.slice(1)) + T += "|" + toRegex(S); + return Regex(T + ")$"); } // Converts a hex string (e.g. "deadbeef") to a vector. diff --git a/lld/ELF/Strings.h b/lld/ELF/Strings.h index 4948e9dbd56b..ad99eb9bae9b 100644 --- a/lld/ELF/Strings.h +++ b/lld/ELF/Strings.h @@ -11,11 +11,13 @@ #define LLD_COFF_STRINGS_H #include "lld/Core/LLVM.h" +#include "llvm/Support/Regex.h" #include namespace lld { namespace elf { -bool globMatch(StringRef S, StringRef T); +llvm::Regex compileGlobPatterns(ArrayRef V); +bool hasWildcard(StringRef S); std::vector parseHex(StringRef S); bool isValidCIdentifier(StringRef S); diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 3d8cbc1a4452..c3a8eae9b906 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -483,13 +483,14 @@ template SymbolBody *SymbolTable::find(StringRef Name) { return SymVector[V.Idx]->body(); } -// Returns a list of defined symbols that match with a given glob pattern. +// Returns a list of defined symbols that match with a given regex. template -std::vector SymbolTable::findAll(StringRef Pattern) { +std::vector SymbolTable::findAll(const Regex &Re) { std::vector Res; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); - if (!B->isUndefined() && globMatch(Pattern, B->getName())) + StringRef Name = B->getName(); + if (!B->isUndefined() && const_cast(Re).match(Name)) Res.push_back(B); } return Res; @@ -578,10 +579,6 @@ template void SymbolTable::scanDynamicList() { B->symbol()->ExportDynamic = true; } -static bool hasWildcard(StringRef S) { - return S.find_first_of("?*") != StringRef::npos; -} - static void setVersionId(SymbolBody *Body, StringRef VersionName, StringRef Name, uint16_t Version) { if (!Body || Body->isUndefined()) { @@ -625,11 +622,11 @@ static SymbolBody *findDemangled(const std::map &D, static std::vector findAllDemangled(const std::map &D, - StringRef Pattern) { + const Regex &Re) { std::vector Res; for (auto &P : D) { SymbolBody *Body = P.second; - if (!Body->isUndefined() && globMatch(Pattern, P.first)) + if (!Body->isUndefined() && const_cast(Re).match(P.first)) Res.push_back(Body); } return Res; @@ -682,8 +679,9 @@ template void SymbolTable::scanVersionScript() { if (!hasWildcard(Sym.Name)) continue; std::vector All = - Sym.IsExternCpp ? findAllDemangled(Demangled, Sym.Name) - : findAll(Sym.Name); + Sym.IsExternCpp + ? findAllDemangled(Demangled, compileGlobPatterns({Sym.Name})) + : findAll(compileGlobPatterns({Sym.Name})); for (SymbolBody *B : All) if (B->symbol()->VersionId == Config->DefaultSymbolVersion) diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index cf4838ce8041..8a7bba16eccf 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -13,6 +13,7 @@ #include "InputFiles.h" #include "LTO.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Regex.h" namespace lld { namespace elf { @@ -91,7 +92,7 @@ public: void wrap(StringRef Name); private: - std::vector findAll(StringRef Pattern); + std::vector findAll(const llvm::Regex &Re); std::pair insert(StringRef &Name); std::pair insert(StringRef &Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, diff --git a/lld/test/ELF/wildcards2.s b/lld/test/ELF/wildcards2.s new file mode 100644 index 000000000000..e8a0f58c52e4 --- /dev/null +++ b/lld/test/ELF/wildcards2.s @@ -0,0 +1,25 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +## Check that aabc is not included in text. +# RUN: echo "SECTIONS { \ +# RUN: .text : { *(.abc) } }" > %t.script +# RUN: ld.lld -o %t.out --script %t.script %t +# RUN: llvm-objdump -section-headers %t.out | \ +# RUN: FileCheck %s +# CHECK: Sections: +# CHECK-NEXT: Idx Name Size Address Type +# CHECK-NEXT: 0 00000000 0000000000000000 +# CHECK-NEXT: 1 .text 00000004 0000000000000120 TEXT DATA +# CHECK-NEXT: 2 aabc 00000004 0000000000000124 TEXT DATA + +.text +.section .abc,"ax",@progbits +.long 0 + +.text +.section aabc,"ax",@progbits +.long 0 + +.globl _start +_start: