forked from OSchip/llvm-project
228 lines
6.1 KiB
C++
228 lines
6.1 KiB
C++
//===- Strings.cpp -------------------------------------------------------===//
|
|
//
|
|
// The LLVM Linker
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Strings.h"
|
|
#include "Config.h"
|
|
#include "Error.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/Config/config.h"
|
|
#include "llvm/Demangle/Demangle.h"
|
|
#include <algorithm>
|
|
|
|
using namespace llvm;
|
|
using namespace lld;
|
|
using namespace lld::elf;
|
|
|
|
// This is a scanner for the glob pattern.
|
|
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
|
|
// (which is a negative form of "[<chars>]"), or a non-meta character.
|
|
// This function returns the first token in S.
|
|
BitVector GlobPattern::scan(StringRef &S) {
|
|
switch (S[0]) {
|
|
case '*':
|
|
S = S.substr(1);
|
|
// '*' is represented by an empty bitvector.
|
|
// All other bitvectors are 256-bit long.
|
|
return BitVector();
|
|
case '?':
|
|
S = S.substr(1);
|
|
return BitVector(256, true);
|
|
case '[': {
|
|
size_t End = S.find(']', 1);
|
|
if (End == StringRef::npos) {
|
|
error("invalid glob pattern: " + Original);
|
|
return BitVector(256, false);
|
|
}
|
|
StringRef Chars = S.substr(1, End - 1);
|
|
S = S.substr(End + 1);
|
|
if (Chars.startswith("^"))
|
|
return expand(Chars.substr(1)).flip();
|
|
return expand(Chars);
|
|
}
|
|
default:
|
|
BitVector BV(256, false);
|
|
BV[S[0]] = true;
|
|
S = S.substr(1);
|
|
return BV;
|
|
}
|
|
}
|
|
|
|
// Expands character ranges and returns a bitmap.
|
|
// For example, "a-cf-hz" is expanded to "abcfghz".
|
|
BitVector GlobPattern::expand(StringRef S) {
|
|
BitVector BV(256, false);
|
|
|
|
// Expand "x-y".
|
|
for (;;) {
|
|
if (S.size() < 3)
|
|
break;
|
|
|
|
// If it doesn't start with something like "x-y",
|
|
// consume the first character and proceed.
|
|
if (S[1] != '-') {
|
|
BV[S[0]] = true;
|
|
S = S.substr(1);
|
|
continue;
|
|
}
|
|
|
|
// It must be in the form of "x-y".
|
|
// Validate it and then interpret the range.
|
|
if (S[0] > S[2]) {
|
|
error("invalid glob pattern: " + Original);
|
|
return BV;
|
|
}
|
|
for (int C = S[0]; C <= S[2]; ++C)
|
|
BV[C] = true;
|
|
S = S.substr(3);
|
|
}
|
|
|
|
for (char C : S)
|
|
BV[C] = true;
|
|
return BV;
|
|
}
|
|
|
|
GlobPattern::GlobPattern(StringRef S) : Original(S) {
|
|
if (!hasWildcard(S)) {
|
|
// S doesn't contain any metacharacter,
|
|
// so the regular string comparison should work.
|
|
Exact = S;
|
|
} else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
|
|
// S is something like "foo*". We can use startswith().
|
|
Prefix = S.drop_back();
|
|
} else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
|
|
// S is something like "*foo". We can use endswith().
|
|
Suffix = S.drop_front();
|
|
} else {
|
|
// Otherwise, we need to do real glob pattern matching.
|
|
// Parse the pattern now.
|
|
while (!S.empty())
|
|
Tokens.push_back(scan(S));
|
|
}
|
|
}
|
|
|
|
bool GlobPattern::match(StringRef S) const {
|
|
if (Exact)
|
|
return S == *Exact;
|
|
if (Prefix)
|
|
return S.startswith(*Prefix);
|
|
if (Suffix)
|
|
return S.endswith(*Suffix);
|
|
return matchOne(Tokens, S);
|
|
}
|
|
|
|
// Runs glob pattern Pats against string S.
|
|
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
|
|
for (;;) {
|
|
if (Pats.empty())
|
|
return S.empty();
|
|
|
|
// If Pats[0] is '*', try to match Pats[1..] against all possible
|
|
// substrings of S to see at least one pattern succeeds.
|
|
if (Pats[0].size() == 0) {
|
|
Pats = Pats.slice(1);
|
|
if (Pats.empty())
|
|
// Fast path. If a pattern is '*', it matches anything.
|
|
return true;
|
|
for (size_t I = 0, E = S.size(); I < E; ++I)
|
|
if (matchOne(Pats, S.substr(I)))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// If Pats[0] is not '*', it must consume one character.
|
|
if (S.empty() || !Pats[0][S[0]])
|
|
return false;
|
|
Pats = Pats.slice(1);
|
|
S = S.substr(1);
|
|
}
|
|
}
|
|
|
|
StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
|
|
for (StringRef S : Pat)
|
|
Patterns.push_back(GlobPattern(S));
|
|
}
|
|
|
|
bool StringMatcher::match(StringRef S) const {
|
|
for (const GlobPattern &Pat : Patterns)
|
|
if (Pat.match(S))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// If an input string is in the form of "foo.N" where N is a number,
|
|
// return N. Otherwise, returns 65536, which is one greater than the
|
|
// lowest priority.
|
|
int elf::getPriority(StringRef S) {
|
|
size_t Pos = S.rfind('.');
|
|
if (Pos == StringRef::npos)
|
|
return 65536;
|
|
int V;
|
|
if (S.substr(Pos + 1).getAsInteger(10, V))
|
|
return 65536;
|
|
return V;
|
|
}
|
|
|
|
bool elf::hasWildcard(StringRef S) {
|
|
return S.find_first_of("?*[") != StringRef::npos;
|
|
}
|
|
|
|
StringRef elf::unquote(StringRef S) {
|
|
if (!S.startswith("\""))
|
|
return S;
|
|
return S.substr(1, S.size() - 2);
|
|
}
|
|
|
|
// Converts a hex string (e.g. "deadbeef") to a vector.
|
|
std::vector<uint8_t> elf::parseHex(StringRef S) {
|
|
std::vector<uint8_t> Hex;
|
|
while (!S.empty()) {
|
|
StringRef B = S.substr(0, 2);
|
|
S = S.substr(2);
|
|
uint8_t H;
|
|
if (B.getAsInteger(16, H)) {
|
|
error("not a hexadecimal value: " + B);
|
|
return {};
|
|
}
|
|
Hex.push_back(H);
|
|
}
|
|
return Hex;
|
|
}
|
|
|
|
static bool isAlpha(char C) {
|
|
return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
|
|
}
|
|
|
|
static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
|
|
|
|
// Returns true if S is valid as a C language identifier.
|
|
bool elf::isValidCIdentifier(StringRef S) {
|
|
return !S.empty() && isAlpha(S[0]) &&
|
|
std::all_of(S.begin() + 1, S.end(), isAlnum);
|
|
}
|
|
|
|
// Returns the demangled C++ symbol name for Name.
|
|
std::string elf::demangle(StringRef Name) {
|
|
// __cxa_demangle can be used to demangle strings other than symbol
|
|
// names which do not necessarily start with "_Z". Name can be
|
|
// either a C or C++ symbol. Don't call __cxa_demangle if the name
|
|
// does not look like a C++ symbol name to avoid getting unexpected
|
|
// result for a C symbol that happens to match a mangled type name.
|
|
if (!Name.startswith("_Z"))
|
|
return Name;
|
|
|
|
char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
|
|
if (!Buf)
|
|
return Name;
|
|
std::string S(Buf);
|
|
free(Buf);
|
|
return S;
|
|
}
|