[clangd] Implement range patching heuristics for cross-file rename.

Reviewers: sammccall, ilya-biryukov

Reviewed By: sammccall

Subscribers: merge_guards_bot, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70594
This commit is contained in:
Haojian Wu 2019-12-09 17:00:51 +01:00
parent 2ea6ab6777
commit 891f82222b
3 changed files with 491 additions and 2 deletions

View File

@ -18,9 +18,11 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include <algorithm>
namespace clang {
namespace clangd {
@ -355,9 +357,22 @@ llvm::Expected<FileEdits> renameOutsideFile(
elog("Fail to read file content: {0}", AffectedFileCode.takeError());
continue;
}
auto RenameRanges =
adjustRenameRanges(*AffectedFileCode, RenameDecl.getNameAsString(),
std::move(FileAndOccurrences.second),
RenameDecl.getASTContext().getLangOpts());
if (!RenameRanges) {
// Our heuristice fails to adjust rename ranges to the current state of
// the file, it is most likely the index is stale, so we give up the
// entire rename.
return llvm::make_error<llvm::StringError>(
llvm::formatv("Index results don't match the content of file {0} "
"(the index may be stale)",
FilePath),
llvm::inconvertibleErrorCode());
}
auto RenameEdit =
buildRenameEdit(FilePath, *AffectedFileCode,
std::move(FileAndOccurrences.second), NewName);
buildRenameEdit(FilePath, *AffectedFileCode, *RenameRanges, NewName);
if (!RenameEdit) {
return llvm::make_error<llvm::StringError>(
llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath,
@ -370,6 +385,44 @@ llvm::Expected<FileEdits> renameOutsideFile(
return Results;
}
// A simple edit is eithor changing line or column, but not both.
bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
return LHS.line == RHS.line || LHS.character == RHS.character;
}
// Performs a DFS to enumerate all possible near-miss matches.
// It finds the locations where the indexed occurrences are now spelled in
// Lexed occurrences, a near miss is defined as:
// - a near miss maps all of the **name** occurrences from the index onto a
// *subset* of lexed occurrences (we allow a single name refers to more
// than one symbol)
// - all indexed occurrences must be mapped, and Result must be distinct and
// preseve order (only support detecting simple edits to ensure a
// robust mapping)
// - each indexed -> lexed occurrences mapping correspondence may change the
// *line* or *column*, but not both (increases chance of a robust mapping)
void findNearMiss(
std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
if (--Fuel < 0)
return;
if (IndexedRest.size() > LexedRest.size())
return;
if (IndexedRest.empty()) {
MatchedCB(PartialMatch);
return;
}
if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
PartialMatch.push_back(LexedIndex);
findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
LexedIndex + 1, Fuel, MatchedCB);
PartialMatch.pop_back();
}
findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
LexedIndex + 1, Fuel, MatchedCB);
}
} // namespace
llvm::Expected<FileEdits> rename(const RenameInputs &RInputs) {
@ -504,5 +557,112 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
return Edit(InitialCode, std::move(RenameEdit));
}
// Details:
// - lex the draft code to get all rename candidates, this yields a superset
// of candidates.
// - apply range patching heuristics to generate "authoritative" occurrences,
// cases we consider:
// (a) index returns a subset of candidates, we use the indexed results.
// - fully equal, we are sure the index is up-to-date
// - proper subset, index is correct in most cases? there may be false
// positives (e.g. candidates got appended), but rename is still safe
// (b) index returns non-candidate results, we attempt to map the indexed
// ranges onto candidates in a plausible way (e.g. guess that lines
// were inserted). If such a "near miss" is found, the rename is still
// possible
llvm::Optional<std::vector<Range>>
adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
std::vector<Range> Indexed, const LangOptions &LangOpts) {
assert(!Indexed.empty());
std::vector<Range> Lexed =
collectIdentifierRanges(Identifier, DraftCode, LangOpts);
llvm::sort(Indexed);
llvm::sort(Lexed);
return getMappedRanges(Indexed, Lexed);
}
llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
ArrayRef<Range> Lexed) {
assert(!Indexed.empty());
assert(std::is_sorted(Indexed.begin(), Indexed.end()));
assert(std::is_sorted(Lexed.begin(), Lexed.end()));
if (Indexed.size() > Lexed.size()) {
vlog("The number of lexed occurrences is less than indexed occurrences");
return llvm::None;
}
// Fast check for the special subset case.
if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
return Indexed.vec();
std::vector<size_t> Best;
size_t BestCost = std::numeric_limits<size_t>::max();
bool HasMultiple = 0;
std::vector<size_t> ResultStorage;
int Fuel = 10000;
findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
[&](const std::vector<size_t> &Matched) {
size_t MCost =
renameRangeAdjustmentCost(Indexed, Lexed, Matched);
if (MCost < BestCost) {
BestCost = MCost;
Best = std::move(Matched);
HasMultiple = false; // reset
return;
}
if (MCost == BestCost)
HasMultiple = true;
});
if (HasMultiple) {
vlog("The best near miss is not unique.");
return llvm::None;
}
if (Best.empty()) {
vlog("Didn't find a near miss.");
return llvm::None;
}
std::vector<Range> Mapped;
for (auto I : Best)
Mapped.push_back(Lexed[I]);
return Mapped;
}
// The cost is the sum of the implied edit sizes between successive diffs, only
// simple edits are considered:
// - insert/remove a line (change line offset)
// - insert/remove a character on an existing line (change column offset)
//
// Example I, total result is 1 + 1 = 2.
// diff[0]: line + 1 <- insert a line before edit 0.
// diff[1]: line + 1
// diff[2]: line + 1
// diff[3]: line + 2 <- insert a line before edits 2 and 3.
//
// Example II, total result is 1 + 1 + 1 = 3.
// diff[0]: line + 1 <- insert a line before edit 0.
// diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a
// character on edit 1.
size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
ArrayRef<size_t> MappedIndex) {
assert(Indexed.size() == MappedIndex.size());
assert(std::is_sorted(Indexed.begin(), Indexed.end()));
assert(std::is_sorted(Lexed.begin(), Lexed.end()));
int LastLine = -1;
int LastDLine = 0, LastDColumn = 0;
int Cost = 0;
for (size_t I = 0; I < Indexed.size(); ++I) {
int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
int DColumn =
Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
int Line = Indexed[I].start.line;
if (Line != LastLine)
LastDColumn = 0; // colmun offsets don't carry cross lines.
Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
}
return Cost;
}
} // namespace clangd
} // namespace clang

View File

@ -12,6 +12,7 @@
#include "Path.h"
#include "Protocol.h"
#include "SourceCode.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Error.h"
@ -55,6 +56,36 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
std::vector<Range> Occurrences,
llvm::StringRef NewName);
/// Adjusts indexed occurrences to match the current state of the file.
///
/// The Index is not always up to date. Blindly editing at the locations
/// reported by the index may mangle the code in such cases.
/// This function determines whether the indexed occurrences can be applied to
/// this file, and heuristically repairs the occurrences if necessary.
///
/// The API assumes that Indexed contains only named occurrences (each
/// occurrence has the same length).
llvm::Optional<std::vector<Range>>
adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
std::vector<Range> Indexed, const LangOptions &LangOpts);
/// Calculates the lexed occurrences that the given indexed occurrences map to.
/// Returns None if we don't find a mapping.
///
/// Exposed for testing only.
///
/// REQUIRED: Indexed and Lexed are sorted.
llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
ArrayRef<Range> Lexed);
/// Evaluates how good the mapped result is. 0 indicates a perfect match.
///
/// Exposed for testing only.
///
/// REQUIRED: Indexed and Lexed are sorted, Indexed and MappedIndex have the
/// same size.
size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
ArrayRef<size_t> MappedIndex);
} // namespace clangd
} // namespace clang

View File

@ -14,9 +14,11 @@
#include "index/Ref.h"
#include "refactor/Rename.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <algorithm>
namespace clang {
namespace clangd {
@ -24,7 +26,9 @@ namespace {
using testing::Eq;
using testing::Pair;
using testing::IsEmpty;
using testing::UnorderedElementsAre;
using testing::UnorderedElementsAreArray;
// Build a RefSlab from all marked ranges in the annotation. The ranges are
// assumed to associate with the given SymbolName.
@ -853,6 +857,300 @@ TEST(CrossFileRenameTests, BuildRenameEdits) {
expectedResult(Code, expectedResult(T, "abc")));
}
TEST(CrossFileRenameTests, adjustRenameRanges) {
// Ranges in IndexedCode indicate the indexed occurrences;
// ranges in DraftCode indicate the expected mapped result, empty indicates
// we expect no matched result found.
struct {
llvm::StringRef IndexedCode;
llvm::StringRef DraftCode;
} Tests[] = {
{
// both line and column are changed, not a near miss.
R"cpp(
int [[x]] = 0;
)cpp",
R"cpp(
// insert a line.
double x = 0;
)cpp",
},
{
// subset.
R"cpp(
int [[x]] = 0;
)cpp",
R"cpp(
int [[x]] = 0;
{int x = 0; }
)cpp",
},
{
// shift columns.
R"cpp(int [[x]] = 0; void foo(int x);)cpp",
R"cpp(double [[x]] = 0; void foo(double x);)cpp",
},
{
// shift lines.
R"cpp(
int [[x]] = 0;
void foo(int x);
)cpp",
R"cpp(
// insert a line.
int [[x]] = 0;
void foo(int x);
)cpp",
},
};
LangOptions LangOpts;
LangOpts.CPlusPlus = true;
for (const auto &T : Tests) {
Annotations Draft(T.DraftCode);
auto ActualRanges = adjustRenameRanges(
Draft.code(), "x", Annotations(T.IndexedCode).ranges(), LangOpts);
if (!ActualRanges)
EXPECT_THAT(Draft.ranges(), testing::IsEmpty());
else
EXPECT_THAT(Draft.ranges(),
testing::UnorderedElementsAreArray(*ActualRanges))
<< T.DraftCode;
}
}
TEST(RangePatchingHeuristic, GetMappedRanges) {
// ^ in LexedCode marks the ranges we expect to be mapped; no ^ indicates
// there are no mapped ranges.
struct {
llvm::StringRef IndexedCode;
llvm::StringRef LexedCode;
} Tests[] = {
{
// no lexed ranges.
"[[]]",
"",
},
{
// both line and column are changed, not a near miss.
R"([[]])",
R"(
[[]]
)",
},
{
// subset.
"[[]]",
"^[[]] [[]]"
},
{
// shift columns.
"[[]] [[]]",
" ^[[]] ^[[]] [[]]"
},
{
R"(
[[]]
[[]] [[]]
)",
R"(
// insert a line
^[[]]
^[[]] ^[[]]
)",
},
{
R"(
[[]]
[[]] [[]]
)",
R"(
// insert a line
^[[]]
^[[]] ^[[]] // column is shifted.
)",
},
{
R"(
[[]]
[[]] [[]]
)",
R"(
// insert a line
[[]]
[[]] [[]] // not mapped (both line and column are changed).
)",
},
{
R"(
[[]]
[[]]
[[]]
[[]]
}
)",
R"(
// insert a new line
^[[]]
^[[]]
[[]] // additional range
^[[]]
^[[]]
[[]] // additional range
)",
},
{
// non-distinct result (two best results), not a near miss
R"(
[[]]
[[]]
[[]]
)",
R"(
[[]]
[[]]
[[]]
[[]]
)",
}
};
for (const auto &T : Tests) {
auto Lexed = Annotations(T.LexedCode);
auto LexedRanges = Lexed.ranges();
std::vector<Range> ExpectedMatches;
for (auto P : Lexed.points()) {
auto Match = llvm::find_if(LexedRanges, [&P](const Range& R) {
return R.start == P;
});
ASSERT_NE(Match, LexedRanges.end());
ExpectedMatches.push_back(*Match);
}
auto Mapped =
getMappedRanges(Annotations(T.IndexedCode).ranges(), LexedRanges);
if (!Mapped)
EXPECT_THAT(ExpectedMatches, IsEmpty());
else
EXPECT_THAT(ExpectedMatches, UnorderedElementsAreArray(*Mapped))
<< T.IndexedCode;
}
}
TEST(CrossFileRenameTests, adjustmentCost) {
struct {
llvm::StringRef RangeCode;
size_t ExpectedCost;
} Tests[] = {
{
R"(
$idx[[]]$lex[[]] // diff: 0
)",
0,
},
{
R"(
$idx[[]]
$lex[[]] // line diff: +1
$idx[[]]
$lex[[]] // line diff: +1
$idx[[]]
$lex[[]] // line diff: +1
$idx[[]]
$lex[[]] // line diff: +2
)",
1 + 1
},
{
R"(
$idx[[]]
$lex[[]] // line diff: +1
$idx[[]]
$lex[[]] // line diff: +2
$idx[[]]
$lex[[]] // line diff: +3
)",
1 + 1 + 1
},
{
R"(
$idx[[]]
$lex[[]] // line diff: +3
$idx[[]]
$lex[[]] // line diff: +2
$idx[[]]
$lex[[]] // line diff: +1
)",
3 + 1 + 1
},
{
R"(
$idx[[]]
$lex[[]] // line diff: +1
$lex[[]] // line diff: -2
$idx[[]]
$idx[[]]
$lex[[]] // line diff: +3
)",
1 + 3 + 5
},
{
R"(
$idx[[]] $lex[[]] // column diff: +1
$idx[[]]$lex[[]] // diff: 0
)",
1
},
{
R"(
$idx[[]]
$lex[[]] // diff: +1
$idx[[]] $lex[[]] // column diff: +1
$idx[[]]$lex[[]] // diff: 0
)",
1 + 1 + 1
},
{
R"(
$idx[[]] $lex[[]] // column diff: +1
)",
1
},
{
R"(
// column diffs: +1, +2, +3
$idx[[]] $lex[[]] $idx[[]] $lex[[]] $idx[[]] $lex[[]]
)",
1 + 1 + 1,
},
};
for (const auto &T : Tests) {
Annotations C(T.RangeCode);
std::vector<size_t> MappedIndex;
for (size_t I = 0; I < C.ranges("lex").size(); ++I)
MappedIndex.push_back(I);
EXPECT_EQ(renameRangeAdjustmentCost(C.ranges("idx"), C.ranges("lex"),
MappedIndex),
T.ExpectedCost) << T.RangeCode;
}
}
} // namespace
} // namespace clangd
} // namespace clang