[ADT] Add edit_distance_insensitive to StringRef

In some instances its advantageous to calculate edit distances without worrying about casing.
Currently to achieve this both strings need to be converted to the same case first, then edit distance can be calculated.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D126159
This commit is contained in:
Nathan James 2022-06-05 12:03:08 +01:00
parent 95a134254a
commit a13b61f7f0
No known key found for this signature in database
GPG Key ID: CC007AFCDA90AA5F
4 changed files with 41 additions and 8 deletions

View File

@ -240,6 +240,10 @@ namespace llvm {
unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) const;
LLVM_NODISCARD unsigned
edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) const;
/// str - Get the contents as an std::string.
LLVM_NODISCARD
std::string str() const {

View File

@ -28,6 +28,9 @@ namespace llvm {
///
/// \param ToArray the second sequence to compare.
///
/// \param Map A Functor to apply to each item of the sequences before
/// comparison.
///
/// \param AllowReplacements whether to allow element replacements (change one
/// element into another) as a single operation, rather than as two operations
/// (an insertion and a removal).
@ -39,10 +42,10 @@ namespace llvm {
/// \returns the minimum number of element insertions, removals, or (if
/// \p AllowReplacements is \c true) replacements needed to transform one of
/// the given sequences into the other. If zero, the sequences are identical.
template<typename T>
unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
bool AllowReplacements = true,
unsigned MaxEditDistance = 0) {
template <typename T, typename Functor>
unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
Functor Map, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) {
// The algorithm implemented below is the "classic"
// dynamic-programming algorithm for computing the Levenshtein
// distance, which is described here:
@ -75,15 +78,16 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
unsigned BestThisRow = Row[0];
unsigned Previous = y - 1;
const auto &CurItem = Map(FromArray[y - 1]);
for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
int OldRow = Row[x];
if (AllowReplacements) {
Row[x] = std::min(
Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
std::min(Row[x-1], Row[x])+1);
Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u),
std::min(Row[x - 1], Row[x]) + 1);
}
else {
if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
if (CurItem == Map(ToArray[x - 1]))
Row[x] = Previous;
else Row[x] = std::min(Row[x-1], Row[x]) + 1;
}
Previous = OldRow;
@ -98,6 +102,15 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
return Result;
}
template <typename T>
unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
bool AllowReplacements = true,
unsigned MaxEditDistance = 0) {
return ComputeMappedEditDistance(
FromArray, ToArray, [](const T &X) -> const T & { return X; },
AllowReplacements, MaxEditDistance);
}
} // End llvm namespace
#endif

View File

@ -98,6 +98,13 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
AllowReplacements, MaxEditDistance);
}
unsigned llvm::StringRef::edit_distance_insensitive(
StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
return llvm::ComputeMappedEditDistance(
makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()),
llvm::toLower, AllowReplacements, MaxEditDistance);
}
//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//

View File

@ -584,6 +584,15 @@ TEST(StringRefTest, EditDistance) {
"people soiled our green "));
}
TEST(StringRefTest, EditDistanceInsensitive) {
StringRef Hello("HELLO");
EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
StringRef Industry("InDuStRy");
EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
}
TEST(StringRefTest, Misc) {
std::string Storage;
raw_string_ostream OS(Storage);