[ADT][NFC] Early bail out for ComputeEditDistance

The minimun bound for number of edits is the size difference between the 2 arrays.
If MaxEditDistance is smaller than this, we can bail out early without needing to traverse any of the arrays.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D127070
This commit is contained in:
Nathan James 2022-06-08 08:20:28 +01:00
parent d48479791f
commit 638b0fb4d6
No known key found for this signature in database
GPG Key ID: CC007AFCDA90AA5F
3 changed files with 73 additions and 0 deletions

View File

@ -61,6 +61,15 @@ unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
typename ArrayRef<T>::size_type m = FromArray.size(); typename ArrayRef<T>::size_type m = FromArray.size();
typename ArrayRef<T>::size_type n = ToArray.size(); typename ArrayRef<T>::size_type n = ToArray.size();
if (MaxEditDistance) {
// If the difference in size between the 2 arrays is larger than the max
// distance allowed, we can bail out as we will always need at least
// MaxEditDistance insertions or removals.
typename ArrayRef<T>::size_type AbsDiff = m > n ? m - n : n - m;
if (AbsDiff > MaxEditDistance)
return MaxEditDistance + 1;
}
const unsigned SmallBufferSize = 64; const unsigned SmallBufferSize = 64;
unsigned SmallBuffer[SmallBufferSize]; unsigned SmallBuffer[SmallBufferSize];
std::unique_ptr<unsigned[]> Allocated; std::unique_ptr<unsigned[]> Allocated;

View File

@ -22,6 +22,7 @@ add_llvm_unittest(ADTTests
DenseSetTest.cpp DenseSetTest.cpp
DepthFirstIteratorTest.cpp DepthFirstIteratorTest.cpp
DirectedGraphTest.cpp DirectedGraphTest.cpp
EditDistanceTest.cpp
EnumeratedArrayTest.cpp EnumeratedArrayTest.cpp
EquivalenceClassesTest.cpp EquivalenceClassesTest.cpp
FallibleIteratorTest.cpp FallibleIteratorTest.cpp

View File

@ -0,0 +1,63 @@
//===- llvm/unittest/Support/EditDistanceTest.cpp - Edit distance tests ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/edit_distance.h"
#include "gtest/gtest.h"
#include <cstdlib>
using namespace llvm;
namespace {
struct Result {
unsigned NumMaps;
unsigned EditDist;
};
} // namespace
static Result editDistanceAndMaps(StringRef A, StringRef B,
unsigned MaxEditDistance = 0) {
unsigned NumMaps = 0;
auto TrackMaps = [&](const char X) {
++NumMaps;
return X;
};
unsigned EditDist = llvm::ComputeMappedEditDistance(
makeArrayRef(A.data(), A.size()), makeArrayRef(B.data(), B.size()),
TrackMaps, true, MaxEditDistance);
return {NumMaps, EditDist};
}
TEST(EditDistance, VerifyShortCircuit) {
StringRef Hello = "Hello";
StringRef HelloWorld = "HelloWorld";
Result R = editDistanceAndMaps(Hello, HelloWorld, 5);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_GT(R.NumMaps, 0U);
R = editDistanceAndMaps(Hello, HelloWorld);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_GT(R.NumMaps, 0U);
R = editDistanceAndMaps(Hello, HelloWorld, 4);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_EQ(R.NumMaps, 0U);
R = editDistanceAndMaps(HelloWorld, Hello, 4);
EXPECT_EQ(R.EditDist, 5U);
EXPECT_EQ(R.NumMaps, 0U);
R = editDistanceAndMaps(Hello, HelloWorld, 1);
EXPECT_EQ(R.EditDist, 2U);
EXPECT_EQ(R.NumMaps, 0U);
R = editDistanceAndMaps(HelloWorld, Hello, 1);
EXPECT_EQ(R.EditDist, 2U);
EXPECT_EQ(R.NumMaps, 0U);
}