forked from OSchip/llvm-project
[ADT] Add a single-character version of the small vector split routine
on StringRef. Finding and splitting on a single character is substantially faster than doing it on even a single character StringRef -- we immediately get to a *very* tuned memchr call this way. Even nicer, we get to this even in a debug build, shaving 18% off the runtime of TripleTest.Normalization, helping PR23676 some more. llvm-svn: 247244
This commit is contained in:
parent
93d5d3b5db
commit
477121721b
|
@ -489,6 +489,23 @@ namespace llvm {
|
|||
StringRef Separator, int MaxSplit = -1,
|
||||
bool KeepEmpty = true) const;
|
||||
|
||||
/// Split into substrings around the occurrences of a separator character.
|
||||
///
|
||||
/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
|
||||
/// \p MaxSplit splits are done and consequently <= \p MaxSplit
|
||||
/// elements are added to A.
|
||||
/// If \p KeepEmpty is false, empty strings are not added to \p A. They
|
||||
/// still count when considering \p MaxSplit
|
||||
/// An useful invariant is that
|
||||
/// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
|
||||
///
|
||||
/// \param A - Where to put the substrings.
|
||||
/// \param Separator - The string to split on.
|
||||
/// \param MaxSplit - The maximum number of times the string is split.
|
||||
/// \param KeepEmpty - True if empty substring should be added.
|
||||
void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
|
||||
bool KeepEmpty = true) const;
|
||||
|
||||
/// Split into two substrings around the last occurrence of a separator
|
||||
/// character.
|
||||
///
|
||||
|
|
|
@ -294,6 +294,26 @@ void StringRef::split(SmallVectorImpl<StringRef> &A,
|
|||
A.push_back(rest);
|
||||
}
|
||||
|
||||
void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
|
||||
int MaxSplit, bool KeepEmpty) const {
|
||||
StringRef rest = *this;
|
||||
|
||||
// rest.data() is used to distinguish cases like "a," that splits into
|
||||
// "a" + "" and "a" that splits into "a" + 0.
|
||||
for (int splits = 0;
|
||||
rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
|
||||
++splits) {
|
||||
std::pair<StringRef, StringRef> p = rest.split(Separator);
|
||||
|
||||
if (KeepEmpty || p.first.size() != 0)
|
||||
A.push_back(p.first);
|
||||
rest = p.second;
|
||||
}
|
||||
// If we have a tail left, add it.
|
||||
if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
|
||||
A.push_back(rest);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helpful Algorithms
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -624,7 +624,7 @@ std::string Triple::normalize(StringRef Str) {
|
|||
|
||||
// Parse into components.
|
||||
SmallVector<StringRef, 4> Components;
|
||||
Str.split(Components, "-");
|
||||
Str.split(Components, '-');
|
||||
|
||||
// If the first component corresponds to a known architecture, preferentially
|
||||
// use it for the architecture. If the second component corresponds to a
|
||||
|
|
|
@ -225,6 +225,11 @@ TEST(StringRefTest, Split2) {
|
|||
expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
|
||||
StringRef("a,,b,c").split(parts, ",", 3, false);
|
||||
EXPECT_TRUE(parts == expected);
|
||||
|
||||
expected.clear(); parts.clear();
|
||||
expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
|
||||
StringRef("a,,b,c").split(parts, ',', 3, false);
|
||||
EXPECT_TRUE(parts == expected);
|
||||
}
|
||||
|
||||
TEST(StringRefTest, Trim) {
|
||||
|
|
Loading…
Reference in New Issue