[clangd] Perform merging for stale symbols in MergeIndex

Clangd drops symbols from static index whenever the dynamic index is
authoritative for the file. This results in regressions when static and
dynamic index contains different set of information, e.g.
IncludeHeaders.

After this patch, we'll choose to merge symbols from static index with
dynamic one rather than just dropping. This implies correctness problems
when the definition/documentation of the symbol is deleted. But seems
like it is worth having in more cases.

We still drop symbols if dynamic index owns the file and didn't report
the symbol, which means symbol is deleted.

Differential Revision: https://reviews.llvm.org/D98538
This commit is contained in:
Kadir Cetinkaya 2021-03-12 19:49:40 +01:00
parent 6919c58262
commit 6d2fb3cefb
No known key found for this signature in database
GPG Key ID: E39E36B8D2057ED6
3 changed files with 106 additions and 29 deletions

View File

@ -149,8 +149,9 @@ public:
/// Returns function which checks if the specified file was used to build this /// Returns function which checks if the specified file was used to build this
/// index or not. The function must only be called while the index is alive. /// index or not. The function must only be called while the index is alive.
virtual llvm::unique_function<IndexContents(llvm::StringRef) const> using IndexedFiles =
indexedFiles() const = 0; llvm::unique_function<IndexContents(llvm::StringRef) const>;
virtual IndexedFiles indexedFiles() const = 0;
/// Returns estimated size of index (in bytes). /// Returns estimated size of index (in bytes).
virtual size_t estimateMemoryUsage() const = 0; virtual size_t estimateMemoryUsage() const = 0;

View File

@ -22,6 +22,19 @@
namespace clang { namespace clang {
namespace clangd { namespace clangd {
namespace {
// Returns true if file defining/declaring \p S is covered by \p Index.
bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index,
const Symbol &S) {
// We expect the definition to see the canonical declaration, so it seems to
// be enough to check only the definition if it exists.
const char *OwningFile =
S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI;
return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None;
}
} // namespace
bool MergedIndex::fuzzyFind( bool MergedIndex::fuzzyFind(
const FuzzyFindRequest &Req, const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const { llvm::function_ref<void(const Symbol &)> Callback) const {
@ -37,36 +50,44 @@ bool MergedIndex::fuzzyFind(
unsigned DynamicCount = 0; unsigned DynamicCount = 0;
unsigned StaticCount = 0; unsigned StaticCount = 0;
unsigned MergedCount = 0; unsigned MergedCount = 0;
// Number of results ignored due to staleness.
unsigned StaticDropped = 0;
More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) { More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
++DynamicCount; ++DynamicCount;
DynB.insert(S); DynB.insert(S);
}); });
SymbolSlab Dyn = std::move(DynB).build(); SymbolSlab Dyn = std::move(DynB).build();
llvm::DenseSet<SymbolID> SeenDynamicSymbols; llvm::DenseSet<SymbolID> ReportedDynSymbols;
{ {
auto DynamicContainsFile = Dynamic->indexedFiles(); auto DynamicContainsFile = Dynamic->indexedFiles();
More |= Static->fuzzyFind(Req, [&](const Symbol &S) { More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
// We expect the definition to see the canonical declaration, so it seems
// to be enough to check only the definition if it exists.
if ((DynamicContainsFile(S.Definition ? S.Definition.FileURI
: S.CanonicalDeclaration.FileURI) &
IndexContents::Symbols) != IndexContents::None)
return;
auto DynS = Dyn.find(S.ID);
++StaticCount; ++StaticCount;
if (DynS == Dyn.end()) auto DynS = Dyn.find(S.ID);
return Callback(S); // If symbol also exist in the dynamic index, just merge and report.
++MergedCount; if (DynS != Dyn.end()) {
SeenDynamicSymbols.insert(S.ID); ++MergedCount;
Callback(mergeSymbol(*DynS, S)); ReportedDynSymbols.insert(S.ID);
return Callback(mergeSymbol(*DynS, S));
}
// Otherwise, if the dynamic index owns the symbol's file, it means static
// index is stale just drop the symbol.
if (isIndexAuthoritative(DynamicContainsFile, S)) {
++StaticDropped;
return;
}
// If not just report the symbol from static index as is.
return Callback(S);
}); });
} }
SPAN_ATTACH(Tracer, "dynamic", DynamicCount); SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
SPAN_ATTACH(Tracer, "static", StaticCount); SPAN_ATTACH(Tracer, "static", StaticCount);
SPAN_ATTACH(Tracer, "static_dropped", StaticDropped);
SPAN_ATTACH(Tracer, "merged", MergedCount); SPAN_ATTACH(Tracer, "merged", MergedCount);
for (const Symbol &S : Dyn) for (const Symbol &S : Dyn)
if (!SeenDynamicSymbols.count(S.ID)) if (!ReportedDynSymbols.count(S.ID))
Callback(S); Callback(S);
return More; return More;
} }
@ -83,18 +104,21 @@ void MergedIndex::lookup(
{ {
auto DynamicContainsFile = Dynamic->indexedFiles(); auto DynamicContainsFile = Dynamic->indexedFiles();
Static->lookup(Req, [&](const Symbol &S) { Static->lookup(Req, [&](const Symbol &S) {
// We expect the definition to see the canonical declaration, so it seems // If we've seen the symbol before, just merge.
// to be enough to check only the definition if it exists. if (const Symbol *Sym = B.find(S.ID)) {
if ((DynamicContainsFile(S.Definition ? S.Definition.FileURI RemainingIDs.erase(S.ID);
: S.CanonicalDeclaration.FileURI) & return Callback(mergeSymbol(*Sym, S));
IndexContents::Symbols) != IndexContents::None) }
// If symbol is missing in dynamic index, and dynamic index owns the
// symbol's file. Static index is stale, just drop the symbol.
if (isIndexAuthoritative(DynamicContainsFile, S))
return; return;
const Symbol *Sym = B.find(S.ID);
// Dynamic index doesn't know about this file, just use the symbol from
// static index.
RemainingIDs.erase(S.ID); RemainingIDs.erase(S.ID);
if (!Sym) Callback(S);
Callback(S);
else
Callback(mergeSymbol(*Sym, S));
}); });
} }
for (const auto &ID : RemainingIDs) for (const auto &ID : RemainingIDs)

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Annotations.h" #include "Annotations.h"
#include "SyncAPI.h"
#include "TestIndex.h" #include "TestIndex.h"
#include "TestTU.h" #include "TestTU.h"
#include "index/FileIndex.h" #include "index/FileIndex.h"
@ -17,6 +18,7 @@
#include "clang/Index/IndexSymbol.h" #include "clang/Index/IndexSymbol.h"
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include <utility>
using ::testing::_; using ::testing::_;
using ::testing::AllOf; using ::testing::AllOf;
@ -312,16 +314,28 @@ TEST(MergeIndexTest, LookupRemovedDefinition) {
AST = Test.build(); AST = Test.build();
DynamicIndex.updateMain(testPath(Test.Filename), AST); DynamicIndex.updateMain(testPath(Test.Filename), AST);
// Merged index should not return the symbol definition if this definition // Even though the definition is actually deleted in the newer version of the
// location is inside a file from the dynamic index. // file, we still chose to merge with information coming from static index.
// This seems wrong, but is generic behavior we want for e.g. include headers
// which are always missing from the dynamic index
LookupRequest LookupReq; LookupRequest LookupReq;
LookupReq.IDs = {Foo.ID}; LookupReq.IDs = {Foo.ID};
unsigned SymbolCounter = 0; unsigned SymbolCounter = 0;
Merge.lookup(LookupReq, [&](const Symbol &Sym) { Merge.lookup(LookupReq, [&](const Symbol &Sym) {
++SymbolCounter; ++SymbolCounter;
EXPECT_FALSE(Sym.Definition); EXPECT_TRUE(Sym.Definition);
}); });
EXPECT_EQ(SymbolCounter, 1u); EXPECT_EQ(SymbolCounter, 1u);
// Drop the symbol completely.
Test.Code = "class Bar {};";
AST = Test.build();
DynamicIndex.updateMain(testPath(Test.Filename), AST);
// Now we don't expect to see the symbol at all.
SymbolCounter = 0;
Merge.lookup(LookupReq, [&](const Symbol &Sym) { ++SymbolCounter; });
EXPECT_EQ(SymbolCounter, 0u);
} }
TEST(MergeIndexTest, FuzzyFind) { TEST(MergeIndexTest, FuzzyFind) {
@ -585,6 +599,44 @@ TEST(MergeTest, MergeIncludesOnDifferentDefinitions) {
IncludeHeaderWithRef("new", 1u))); IncludeHeaderWithRef("new", 1u)));
} }
TEST(MergeIndexTest, IncludeHeadersMerged) {
auto S = symbol("Z");
S.Definition.FileURI = "unittest:///foo.cc";
SymbolSlab::Builder DynB;
S.IncludeHeaders.clear();
DynB.insert(S);
SymbolSlab DynSymbols = std::move(DynB).build();
RefSlab DynRefs;
auto DynSize = DynSymbols.bytes() + DynRefs.bytes();
auto DynData = std::make_pair(std::move(DynSymbols), std::move(DynRefs));
llvm::StringSet<> DynFiles = {S.Definition.FileURI};
MemIndex DynIndex(std::move(DynData.first), std::move(DynData.second),
RelationSlab(), std::move(DynFiles), IndexContents::Symbols,
std::move(DynData), DynSize);
SymbolSlab::Builder StaticB;
S.IncludeHeaders.push_back({"<header>", 0});
StaticB.insert(S);
auto StaticIndex =
MemIndex::build(std::move(StaticB).build(), RefSlab(), RelationSlab());
MergedIndex Merge(&DynIndex, StaticIndex.get());
EXPECT_THAT(runFuzzyFind(Merge, S.Name),
ElementsAre(testing::Field(
&Symbol::IncludeHeaders,
ElementsAre(IncludeHeaderWithRef("<header>", 0u)))));
LookupRequest Req;
Req.IDs = {S.ID};
std::string IncludeHeader;
Merge.lookup(Req, [&](const Symbol &S) {
EXPECT_TRUE(IncludeHeader.empty());
ASSERT_EQ(S.IncludeHeaders.size(), 1u);
IncludeHeader = S.IncludeHeaders.front().IncludeHeader.str();
});
EXPECT_EQ(IncludeHeader, "<header>");
}
} // namespace } // namespace
} // namespace clangd } // namespace clangd
} // namespace clang } // namespace clang