[ELF] - Use std::regex instead of hand written logic in elf::globMatch()

Use std::regex instead of hand written matcher.

Patch based on code and ideas of Rui Ueyama.

Differential revision: https://reviews.llvm.org/D23829

llvm-svn: 280544
This commit is contained in:
George Rimar 2016-09-02 21:17:20 +00:00
parent 54150db232
commit c91930a17f
7 changed files with 93 additions and 67 deletions

View File

@ -92,44 +92,35 @@ template <class ELFT> LinkerScript<ELFT>::~LinkerScript() {}
template <class ELFT> template <class ELFT>
bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
for (StringRef Pat : Opt.KeptSections) for (Regex *Re : Opt.KeptSections)
if (globMatch(Pat, S->getSectionName())) if (Re->match(S->getSectionName()))
return true;
return false;
}
static bool match(ArrayRef<StringRef> Patterns, StringRef S) {
for (StringRef Pat : Patterns)
if (globMatch(Pat, S))
return true; return true;
return false; return false;
} }
static bool fileMatches(const InputSectionDescription *Desc, static bool fileMatches(const InputSectionDescription *Desc,
StringRef Filename) { StringRef Filename) {
if (!globMatch(Desc->FilePattern, Filename)) return const_cast<Regex &>(Desc->FileRe).match(Filename) &&
return false; !const_cast<Regex &>(Desc->ExcludedFileRe).match(Filename);
return Desc->ExcludedFiles.empty() || !match(Desc->ExcludedFiles, Filename);
} }
// Returns input sections filtered by given glob patterns. // Returns input sections filtered by given glob patterns.
template <class ELFT> template <class ELFT>
std::vector<InputSectionBase<ELFT> *> std::vector<InputSectionBase<ELFT> *>
LinkerScript<ELFT>::getInputSections(const InputSectionDescription *I) { LinkerScript<ELFT>::getInputSections(const InputSectionDescription *I) {
ArrayRef<StringRef> Patterns = I->SectionPatterns; const Regex &Re = I->SectionRe;
std::vector<InputSectionBase<ELFT> *> Ret; std::vector<InputSectionBase<ELFT> *> Ret;
for (const std::unique_ptr<ObjectFile<ELFT>> &F : for (const std::unique_ptr<ObjectFile<ELFT>> &F :
Symtab<ELFT>::X->getObjectFiles()) { Symtab<ELFT>::X->getObjectFiles()) {
if (fileMatches(I, sys::path::filename(F->getName()))) if (fileMatches(I, sys::path::filename(F->getName())))
for (InputSectionBase<ELFT> *S : F->getSections()) for (InputSectionBase<ELFT> *S : F->getSections())
if (!isDiscarded(S) && !S->OutSec && if (!isDiscarded(S) && !S->OutSec &&
match(Patterns, S->getSectionName())) const_cast<Regex &>(Re).match(S->getSectionName()))
Ret.push_back(S); Ret.push_back(S);
} }
if (llvm::find(Patterns, "COMMON") != Patterns.end()) if (const_cast<Regex &>(Re).match("COMMON"))
Ret.push_back(CommonInputSection<ELFT>::X); Ret.push_back(CommonInputSection<ELFT>::X);
return Ret; return Ret;
} }
@ -634,7 +625,7 @@ private:
std::vector<uint8_t> readOutputSectionFiller(); std::vector<uint8_t> readOutputSectionFiller();
std::vector<StringRef> readOutputSectionPhdrs(); std::vector<StringRef> readOutputSectionPhdrs();
InputSectionDescription *readInputSectionDescription(StringRef Tok); InputSectionDescription *readInputSectionDescription(StringRef Tok);
std::vector<StringRef> readInputFilePatterns(); Regex readFilePatterns();
InputSectionDescription *readInputSectionRules(StringRef FilePattern); InputSectionDescription *readInputSectionRules(StringRef FilePattern);
unsigned readPhdrType(); unsigned readPhdrType();
SortKind readSortKind(); SortKind readSortKind();
@ -908,11 +899,11 @@ static int precedence(StringRef Op) {
.Default(-1); .Default(-1);
} }
std::vector<StringRef> ScriptParser::readInputFilePatterns() { Regex ScriptParser::readFilePatterns() {
std::vector<StringRef> V; std::vector<StringRef> V;
while (!Error && !skip(")")) while (!Error && !skip(")"))
V.push_back(next()); V.push_back(next());
return V; return compileGlobPatterns(V);
} }
SortKind ScriptParser::readSortKind() { SortKind ScriptParser::readSortKind() {
@ -925,15 +916,13 @@ SortKind ScriptParser::readSortKind() {
InputSectionDescription * InputSectionDescription *
ScriptParser::readInputSectionRules(StringRef FilePattern) { ScriptParser::readInputSectionRules(StringRef FilePattern) {
auto *Cmd = new InputSectionDescription; auto *Cmd = new InputSectionDescription(FilePattern);
Cmd->FilePattern = FilePattern;
expect("("); expect("(");
// Read EXCLUDE_FILE(). // Read EXCLUDE_FILE().
if (skip("EXCLUDE_FILE")) { if (skip("EXCLUDE_FILE")) {
expect("("); expect("(");
while (!Error && !skip(")")) Cmd->ExcludedFileRe = readFilePatterns();
Cmd->ExcludedFiles.push_back(next());
} }
// Read SORT(). // Read SORT().
@ -943,16 +932,16 @@ ScriptParser::readInputSectionRules(StringRef FilePattern) {
if (SortKind K2 = readSortKind()) { if (SortKind K2 = readSortKind()) {
Cmd->SortInner = K2; Cmd->SortInner = K2;
expect("("); expect("(");
Cmd->SectionPatterns = readInputFilePatterns(); Cmd->SectionRe = readFilePatterns();
expect(")"); expect(")");
} else { } else {
Cmd->SectionPatterns = readInputFilePatterns(); Cmd->SectionRe = readFilePatterns();
} }
expect(")"); expect(")");
return Cmd; return Cmd;
} }
Cmd->SectionPatterns = readInputFilePatterns(); Cmd->SectionRe = readFilePatterns();
return Cmd; return Cmd;
} }
@ -965,9 +954,7 @@ ScriptParser::readInputSectionDescription(StringRef Tok) {
StringRef FilePattern = next(); StringRef FilePattern = next();
InputSectionDescription *Cmd = readInputSectionRules(FilePattern); InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
expect(")"); expect(")");
Opt.KeptSections.insert(Opt.KeptSections.end(), Opt.KeptSections.push_back(&Cmd->SectionRe);
Cmd->SectionPatterns.begin(),
Cmd->SectionPatterns.end());
return Cmd; return Cmd;
} }
return readInputSectionRules(Tok); return readInputSectionRules(Tok);

View File

@ -10,12 +10,14 @@
#ifndef LLD_ELF_LINKER_SCRIPT_H #ifndef LLD_ELF_LINKER_SCRIPT_H
#define LLD_ELF_LINKER_SCRIPT_H #define LLD_ELF_LINKER_SCRIPT_H
#include "Strings.h"
#include "Writer.h" #include "Writer.h"
#include "lld/Core/LLVM.h" #include "lld/Core/LLVM.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h" #include "llvm/ADT/MapVector.h"
#include "llvm/Support/Allocator.h" #include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include <functional> #include <functional>
namespace lld { namespace lld {
@ -94,13 +96,15 @@ struct OutputSectionCommand : BaseCommand {
enum SortKind { SortNone, SortByName, SortByAlignment }; enum SortKind { SortNone, SortByName, SortByAlignment };
struct InputSectionDescription : BaseCommand { struct InputSectionDescription : BaseCommand {
InputSectionDescription() : BaseCommand(InputSectionKind) {} InputSectionDescription(StringRef FilePattern)
: BaseCommand(InputSectionKind),
FileRe(compileGlobPatterns({FilePattern})) {}
static bool classof(const BaseCommand *C); static bool classof(const BaseCommand *C);
StringRef FilePattern; llvm::Regex FileRe;
SortKind SortOuter = SortNone; SortKind SortOuter = SortNone;
SortKind SortInner = SortNone; SortKind SortInner = SortNone;
std::vector<StringRef> ExcludedFiles; llvm::Regex ExcludedFileRe;
std::vector<StringRef> SectionPatterns; llvm::Regex SectionRe;
}; };
struct AssertCommand : BaseCommand { struct AssertCommand : BaseCommand {
@ -133,7 +137,7 @@ struct ScriptConfiguration {
// List of section patterns specified with KEEP commands. They will // List of section patterns specified with KEEP commands. They will
// be kept even if they are unused and --gc-sections is specified. // be kept even if they are unused and --gc-sections is specified.
std::vector<StringRef> KeptSections; std::vector<llvm::Regex *> KeptSections;
}; };
extern ScriptConfiguration *ScriptConfig; extern ScriptConfiguration *ScriptConfig;

View File

@ -9,6 +9,7 @@
#include "Strings.h" #include "Strings.h"
#include "Error.h" #include "Error.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h" #include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h" #include "llvm/Config/config.h"
@ -22,28 +23,36 @@ using namespace llvm;
using namespace lld; using namespace lld;
using namespace lld::elf; using namespace lld::elf;
// Returns true if S matches T. S can contain glob meta-characters. bool elf::hasWildcard(StringRef S) {
// The asterisk ('*') matches zero or more characters, and the question return S.find_first_of("?*") != StringRef::npos;
// mark ('?') matches one character. }
bool elf::globMatch(StringRef S, StringRef T) {
for (;;) { static std::string toRegex(StringRef S) {
if (S.empty()) if (S.find_first_of("[]") != StringRef::npos)
return T.empty(); warning("unsupported wildcard: " + S);
if (S[0] == '*') {
S = S.substr(1); std::string T;
if (S.empty()) while (!S.empty()) {
// Fast path. If a pattern is '*', it matches anything. char C = S.front();
return true; if (C == '*')
for (size_t I = 0, E = T.size(); I < E; ++I) T += ".*";
if (globMatch(S, T.substr(I))) else if (C == '?')
return true; T += '.';
return false; else if (StringRef(".+^${}()|/\\[]").find_first_of(C) != StringRef::npos)
} T += std::string("\\") + C;
if (T.empty() || (S[0] != T[0] && S[0] != '?')) else
return false; T += C;
S = S.substr(1); S = S.substr(1);
T = T.substr(1);
} }
return T;
}
// Takes multiple glob patterns and converts them into regex object.
Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) {
std::string T = "^(" + toRegex(V[0]);
for (StringRef S : V.slice(1))
T += "|" + toRegex(S);
return Regex(T + ")$");
} }
// Converts a hex string (e.g. "deadbeef") to a vector. // Converts a hex string (e.g. "deadbeef") to a vector.

View File

@ -11,11 +11,13 @@
#define LLD_COFF_STRINGS_H #define LLD_COFF_STRINGS_H
#include "lld/Core/LLVM.h" #include "lld/Core/LLVM.h"
#include "llvm/Support/Regex.h"
#include <vector> #include <vector>
namespace lld { namespace lld {
namespace elf { namespace elf {
bool globMatch(StringRef S, StringRef T); llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V);
bool hasWildcard(StringRef S);
std::vector<uint8_t> parseHex(StringRef S); std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S); bool isValidCIdentifier(StringRef S);

View File

@ -483,13 +483,14 @@ template <class ELFT> SymbolBody *SymbolTable<ELFT>::find(StringRef Name) {
return SymVector[V.Idx]->body(); return SymVector[V.Idx]->body();
} }
// Returns a list of defined symbols that match with a given glob pattern. // Returns a list of defined symbols that match with a given regex.
template <class ELFT> template <class ELFT>
std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) { std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) {
std::vector<SymbolBody *> Res; std::vector<SymbolBody *> Res;
for (Symbol *Sym : SymVector) { for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body(); SymbolBody *B = Sym->body();
if (!B->isUndefined() && globMatch(Pattern, B->getName())) StringRef Name = B->getName();
if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name))
Res.push_back(B); Res.push_back(B);
} }
return Res; return Res;
@ -578,10 +579,6 @@ template <class ELFT> void SymbolTable<ELFT>::scanDynamicList() {
B->symbol()->ExportDynamic = true; B->symbol()->ExportDynamic = true;
} }
static bool hasWildcard(StringRef S) {
return S.find_first_of("?*") != StringRef::npos;
}
static void setVersionId(SymbolBody *Body, StringRef VersionName, static void setVersionId(SymbolBody *Body, StringRef VersionName,
StringRef Name, uint16_t Version) { StringRef Name, uint16_t Version) {
if (!Body || Body->isUndefined()) { if (!Body || Body->isUndefined()) {
@ -625,11 +622,11 @@ static SymbolBody *findDemangled(const std::map<std::string, SymbolBody *> &D,
static std::vector<SymbolBody *> static std::vector<SymbolBody *>
findAllDemangled(const std::map<std::string, SymbolBody *> &D, findAllDemangled(const std::map<std::string, SymbolBody *> &D,
StringRef Pattern) { const Regex &Re) {
std::vector<SymbolBody *> Res; std::vector<SymbolBody *> Res;
for (auto &P : D) { for (auto &P : D) {
SymbolBody *Body = P.second; SymbolBody *Body = P.second;
if (!Body->isUndefined() && globMatch(Pattern, P.first)) if (!Body->isUndefined() && const_cast<Regex &>(Re).match(P.first))
Res.push_back(Body); Res.push_back(Body);
} }
return Res; return Res;
@ -682,8 +679,9 @@ template <class ELFT> void SymbolTable<ELFT>::scanVersionScript() {
if (!hasWildcard(Sym.Name)) if (!hasWildcard(Sym.Name))
continue; continue;
std::vector<SymbolBody *> All = std::vector<SymbolBody *> All =
Sym.IsExternCpp ? findAllDemangled(Demangled, Sym.Name) Sym.IsExternCpp
: findAll(Sym.Name); ? findAllDemangled(Demangled, compileGlobPatterns({Sym.Name}))
: findAll(compileGlobPatterns({Sym.Name}));
for (SymbolBody *B : All) for (SymbolBody *B : All)
if (B->symbol()->VersionId == Config->DefaultSymbolVersion) if (B->symbol()->VersionId == Config->DefaultSymbolVersion)

View File

@ -13,6 +13,7 @@
#include "InputFiles.h" #include "InputFiles.h"
#include "LTO.h" #include "LTO.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Regex.h"
namespace lld { namespace lld {
namespace elf { namespace elf {
@ -91,7 +92,7 @@ public:
void wrap(StringRef Name); void wrap(StringRef Name);
private: private:
std::vector<SymbolBody *> findAll(StringRef Pattern); std::vector<SymbolBody *> findAll(const llvm::Regex &Re);
std::pair<Symbol *, bool> insert(StringRef &Name); std::pair<Symbol *, bool> insert(StringRef &Name);
std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type, std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type,
uint8_t Visibility, bool CanOmitFromDynSym, uint8_t Visibility, bool CanOmitFromDynSym,

25
lld/test/ELF/wildcards2.s Normal file
View File

@ -0,0 +1,25 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
## Check that aabc is not included in text.
# RUN: echo "SECTIONS { \
# RUN: .text : { *(.abc) } }" > %t.script
# RUN: ld.lld -o %t.out --script %t.script %t
# RUN: llvm-objdump -section-headers %t.out | \
# RUN: FileCheck %s
# CHECK: Sections:
# CHECK-NEXT: Idx Name Size Address Type
# CHECK-NEXT: 0 00000000 0000000000000000
# CHECK-NEXT: 1 .text 00000004 0000000000000120 TEXT DATA
# CHECK-NEXT: 2 aabc 00000004 0000000000000124 TEXT DATA
.text
.section .abc,"ax",@progbits
.long 0
.text
.section aabc,"ax",@progbits
.long 0
.globl _start
_start: