forked from OSchip/llvm-project
[ADT] Add edit_distance_insensitive to StringRef
In some instances its advantageous to calculate edit distances without worrying about casing. Currently to achieve this both strings need to be converted to the same case first, then edit distance can be calculated. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D126159
This commit is contained in:
parent
95a134254a
commit
a13b61f7f0
|
@ -240,6 +240,10 @@ namespace llvm {
|
|||
unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
|
||||
unsigned MaxEditDistance = 0) const;
|
||||
|
||||
LLVM_NODISCARD unsigned
|
||||
edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
|
||||
unsigned MaxEditDistance = 0) const;
|
||||
|
||||
/// str - Get the contents as an std::string.
|
||||
LLVM_NODISCARD
|
||||
std::string str() const {
|
||||
|
|
|
@ -28,6 +28,9 @@ namespace llvm {
|
|||
///
|
||||
/// \param ToArray the second sequence to compare.
|
||||
///
|
||||
/// \param Map A Functor to apply to each item of the sequences before
|
||||
/// comparison.
|
||||
///
|
||||
/// \param AllowReplacements whether to allow element replacements (change one
|
||||
/// element into another) as a single operation, rather than as two operations
|
||||
/// (an insertion and a removal).
|
||||
|
@ -39,10 +42,10 @@ namespace llvm {
|
|||
/// \returns the minimum number of element insertions, removals, or (if
|
||||
/// \p AllowReplacements is \c true) replacements needed to transform one of
|
||||
/// the given sequences into the other. If zero, the sequences are identical.
|
||||
template<typename T>
|
||||
unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
|
||||
bool AllowReplacements = true,
|
||||
unsigned MaxEditDistance = 0) {
|
||||
template <typename T, typename Functor>
|
||||
unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
|
||||
Functor Map, bool AllowReplacements = true,
|
||||
unsigned MaxEditDistance = 0) {
|
||||
// The algorithm implemented below is the "classic"
|
||||
// dynamic-programming algorithm for computing the Levenshtein
|
||||
// distance, which is described here:
|
||||
|
@ -75,15 +78,16 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
|
|||
unsigned BestThisRow = Row[0];
|
||||
|
||||
unsigned Previous = y - 1;
|
||||
const auto &CurItem = Map(FromArray[y - 1]);
|
||||
for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
|
||||
int OldRow = Row[x];
|
||||
if (AllowReplacements) {
|
||||
Row[x] = std::min(
|
||||
Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
|
||||
std::min(Row[x-1], Row[x])+1);
|
||||
Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u),
|
||||
std::min(Row[x - 1], Row[x]) + 1);
|
||||
}
|
||||
else {
|
||||
if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
|
||||
if (CurItem == Map(ToArray[x - 1]))
|
||||
Row[x] = Previous;
|
||||
else Row[x] = std::min(Row[x-1], Row[x]) + 1;
|
||||
}
|
||||
Previous = OldRow;
|
||||
|
@ -98,6 +102,15 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
|
|||
return Result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
|
||||
bool AllowReplacements = true,
|
||||
unsigned MaxEditDistance = 0) {
|
||||
return ComputeMappedEditDistance(
|
||||
FromArray, ToArray, [](const T &X) -> const T & { return X; },
|
||||
AllowReplacements, MaxEditDistance);
|
||||
}
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -98,6 +98,13 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
|
|||
AllowReplacements, MaxEditDistance);
|
||||
}
|
||||
|
||||
unsigned llvm::StringRef::edit_distance_insensitive(
|
||||
StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
|
||||
return llvm::ComputeMappedEditDistance(
|
||||
makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()),
|
||||
llvm::toLower, AllowReplacements, MaxEditDistance);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -584,6 +584,15 @@ TEST(StringRefTest, EditDistance) {
|
|||
"people soiled our green "));
|
||||
}
|
||||
|
||||
TEST(StringRefTest, EditDistanceInsensitive) {
|
||||
StringRef Hello("HELLO");
|
||||
EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
|
||||
EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
|
||||
|
||||
StringRef Industry("InDuStRy");
|
||||
EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
|
||||
}
|
||||
|
||||
TEST(StringRefTest, Misc) {
|
||||
std::string Storage;
|
||||
raw_string_ostream OS(Storage);
|
||||
|
|
Loading…
Reference in New Issue