[clangd] Ignore the static index refs from the dynamic index files.

This patch fixes the following problem:
- open a file with references to the symbol `Foo`
- remove all references to `Foo` (from the dynamic index).
- `MergedIndex::refs()` result will contain positions of removed references (from the static index).

The idea of this patch is to keep a set of files which were used during index build inside the index.
Thus at processing the static index references we can check if the file of processing reference is a part of the dynamic index or not.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D93393
This commit is contained in:
Aleksandr Platonov 2020-12-18 15:14:15 +03:00
parent c15c296521
commit e35f9229dc
16 changed files with 199 additions and 20 deletions

View File

@ -266,11 +266,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
std::vector<std::shared_ptr<RefSlab>> RefSlabs;
std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
llvm::StringSet<> Files;
std::vector<RefSlab *> MainFileRefs;
{
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &FileAndSymbols : SymbolsSnapshot)
for (const auto &FileAndSymbols : SymbolsSnapshot) {
SymbolSlabs.push_back(FileAndSymbols.second);
Files.insert(FileAndSymbols.first());
}
for (const auto &FileAndRefs : RefsSnapshot) {
RefSlabs.push_back(FileAndRefs.second.Slab);
if (FileAndRefs.second.CountReferences)
@ -372,14 +375,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
case IndexType::Light:
return std::make_unique<MemIndex>(
llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
std::move(AllRelations),
std::move(AllRelations), std::move(Files),
std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
std::move(RefsStorage), std::move(SymsStorage)),
StorageSize);
case IndexType::Heavy:
return std::make_unique<dex::Dex>(
llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
std::move(AllRelations),
std::move(AllRelations), std::move(Files),
std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
std::move(RefsStorage), std::move(SymsStorage)),
StorageSize);

View File

@ -76,6 +76,11 @@ void SwapIndex::relations(
return snapshot()->relations(R, CB);
}
llvm::unique_function<bool(llvm::StringRef) const>
SwapIndex::indexedFiles() const {
return snapshot()->indexedFiles();
}
size_t SwapIndex::estimateMemoryUsage() const {
return snapshot()->estimateMemoryUsage();
}

View File

@ -14,6 +14,7 @@
#include "Symbol.h"
#include "SymbolID.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/JSON.h"
@ -121,6 +122,11 @@ public:
llvm::function_ref<void(const SymbolID &Subject, const Symbol &Object)>
Callback) const = 0;
/// Returns function which checks if the specified file was used to build this
/// index or not. The function must only be called while the index is alive.
virtual llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const = 0;
/// Returns estimated size of index (in bytes).
virtual size_t estimateMemoryUsage() const = 0;
};
@ -145,6 +151,9 @@ public:
llvm::function_ref<void(const SymbolID &, const Symbol &)>)
const override;
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override;
size_t estimateMemoryUsage() const override;
private:

View File

@ -109,6 +109,18 @@ void MemIndex::relations(
}
}
llvm::unique_function<bool(llvm::StringRef) const>
MemIndex::indexedFiles() const {
return [this](llvm::StringRef FileURI) {
auto Path = URI::resolve(FileURI);
if (!Path) {
llvm::consumeError(Path.takeError());
return false;
}
return Files.contains(*Path);
};
}
size_t MemIndex::estimateMemoryUsage() const {
return Index.getMemorySize() + Refs.getMemorySize() +
Relations.getMemorySize() + BackingDataSize;

View File

@ -10,6 +10,7 @@
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
#include "Index.h"
#include "llvm/ADT/StringSet.h"
#include <mutex>
namespace clang {
@ -44,6 +45,17 @@ public:
this->BackingDataSize = BackingDataSize;
}
template <typename SymbolRange, typename RefRange, typename RelationRange,
typename FileRange, typename Payload>
MemIndex(SymbolRange &&Symbols, RefRange &&Refs, RelationRange &&Relations,
FileRange &&Files, Payload &&BackingData, size_t BackingDataSize)
: MemIndex(std::forward<SymbolRange>(Symbols),
std::forward<RefRange>(Refs),
std::forward<RelationRange>(Relations),
std::forward<Payload>(BackingData), BackingDataSize) {
this->Files = std::forward<FileRange>(Files);
}
/// Builds an index from slabs. The index takes ownership of the data.
static std::unique_ptr<SymbolIndex> build(SymbolSlab Symbols, RefSlab Refs,
RelationSlab Relations);
@ -62,6 +74,9 @@ public:
llvm::function_ref<void(const SymbolID &, const Symbol &)>
Callback) const override;
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override;
size_t estimateMemoryUsage() const override;
private:
@ -73,6 +88,8 @@ private:
static_assert(sizeof(RelationKind) == sizeof(uint8_t),
"RelationKind should be of same size as a uint8_t");
llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
// Set of files which were used during this index build.
llvm::StringSet<> Files;
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
// Size of memory retained by KeepAlive.
size_t BackingDataSize = 0;

View File

@ -99,23 +99,18 @@ bool MergedIndex::refs(const RefsRequest &Req,
// and we can't reliably deduplicate them because offsets may differ slightly.
// We consider the dynamic index authoritative and report all its refs,
// and only report static index refs from other files.
//
// FIXME: The heuristic fails if the dynamic index contains a file, but all
// refs were removed (we will report stale ones from the static index).
// Ultimately we should explicit check which index has the file instead.
llvm::StringSet<> DynamicIndexFileURIs;
More |= Dynamic->refs(Req, [&](const Ref &O) {
DynamicIndexFileURIs.insert(O.Location.FileURI);
Callback(O);
assert(Remaining != 0);
--Remaining;
});
if (Remaining == 0 && More)
return More;
auto DynamicContainsFile = Dynamic->indexedFiles();
// We return less than Req.Limit if static index returns more refs for dirty
// files.
bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
if (DynamicIndexFileURIs.count(O.Location.FileURI))
bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
if (DynamicContainsFile(O.Location.FileURI))
return; // ignore refs that have been seen from dynamic index.
if (Remaining == 0) {
More = true;
@ -127,6 +122,14 @@ bool MergedIndex::refs(const RefsRequest &Req,
return More || StaticHadMore;
}
llvm::unique_function<bool(llvm::StringRef) const>
MergedIndex::indexedFiles() const {
return [DynamicContainsFile{Dynamic->indexedFiles()},
StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) {
return DynamicContainsFile(FileURI) || StaticContainsFile(FileURI);
};
}
void MergedIndex::relations(
const RelationsRequest &Req,
llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {

View File

@ -45,6 +45,8 @@ public:
void relations(const RelationsRequest &,
llvm::function_ref<void(const SymbolID &, const Symbol &)>)
const override;
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override;
size_t estimateMemoryUsage() const override {
return Dynamic->estimateMemoryUsage() + Static->estimateMemoryUsage();
}

View File

@ -54,6 +54,9 @@ public:
llvm::function_ref<void(const SymbolID &, const Symbol &)>
Callback) const override;
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override;
ProjectAwareIndex(IndexFactory Gen) : Gen(std::move(Gen)) {}
private:
@ -112,6 +115,14 @@ void ProjectAwareIndex::relations(
return Idx->relations(Req, Callback);
}
llvm::unique_function<bool(llvm::StringRef) const>
ProjectAwareIndex::indexedFiles() const {
trace::Span Tracer("ProjectAwareIndex::indexedFiles");
if (auto *Idx = getIndex())
return Idx->indexedFiles();
return [](llvm::StringRef) { return false; };
}
SymbolIndex *ProjectAwareIndex::getIndex() const {
const auto &C = Config::current();
if (!C.Index.External)

View File

@ -313,6 +313,17 @@ void Dex::relations(
}
}
llvm::unique_function<bool(llvm::StringRef) const> Dex::indexedFiles() const {
return [this](llvm::StringRef FileURI) {
auto Path = URI::resolve(FileURI);
if (!Path) {
llvm::consumeError(Path.takeError());
return false;
}
return Files.contains(*Path);
};
}
size_t Dex::estimateMemoryUsage() const {
size_t Bytes = Symbols.size() * sizeof(const Symbol *);
Bytes += SymbolQuality.size() * sizeof(float);

View File

@ -67,6 +67,16 @@ public:
this->BackingDataSize = BackingDataSize;
}
template <typename SymbolRange, typename RefsRange, typename RelationsRange,
typename FileRange, typename Payload>
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
FileRange &&Files, Payload &&BackingData, size_t BackingDataSize)
: Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
std::forward<RelationsRange>(Relations),
std::forward<Payload>(BackingData), BackingDataSize) {
this->Files = std::forward<FileRange>(Files);
}
/// Builds an index from slabs. The index takes ownership of the slab.
static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
@ -84,6 +94,9 @@ public:
llvm::function_ref<void(const SymbolID &, const Symbol &)>
Callback) const override;
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override;
size_t estimateMemoryUsage() const override;
private:
@ -112,6 +125,8 @@ private:
"RelationKind should be of same size as a uint8_t");
llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
// Set of files which were used during this index build.
llvm::StringSet<> Files;
// Size of memory retained by KeepAlive.
size_t BackingDataSize = 0;
};

View File

@ -152,6 +152,14 @@ public:
});
}
llvm::unique_function<bool(llvm::StringRef) const> indexedFiles() const {
// FIXME: For now we always return "false" regardless of whether the file
// was indexed or not. A possible implementation could be based on
// the idea that we do not want to send a request at every
// call of a function returned by IndexClient::indexedFiles().
return [](llvm::StringRef) { return false; };
}
// IndexClient does not take any space since the data is stored on the
// server.
size_t estimateMemoryUsage() const override { return 0; }

View File

@ -1349,6 +1349,11 @@ public:
llvm::function_ref<void(const SymbolID &, const Symbol &)>)
const override {}
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override {
return [](llvm::StringRef) { return false; };
}
// This is incorrect, but IndexRequestCollector is not an actual index and it
// isn't used in production code.
size_t estimateMemoryUsage() const override { return 0; }

View File

@ -732,6 +732,20 @@ TEST(DexTests, Relations) {
EXPECT_THAT(Results, UnorderedElementsAre(Child1.ID, Child2.ID));
}
TEST(DexIndex, IndexedFiles) {
SymbolSlab Symbols;
RefSlab Refs;
auto Size = Symbols.bytes() + Refs.bytes();
auto Data = std::make_pair(std::move(Symbols), std::move(Refs));
llvm::StringSet<> Files = {testPath("foo.cc"), testPath("bar.cc")};
Dex I(std::move(Data.first), std::move(Data.second), RelationSlab(),
std::move(Files), std::move(Data), Size);
auto ContainsFile = I.indexedFiles();
EXPECT_TRUE(ContainsFile("unittest:///foo.cc"));
EXPECT_TRUE(ContainsFile("unittest:///bar.cc"));
EXPECT_FALSE(ContainsFile("unittest:///foobar.cc"));
}
TEST(DexTest, PreferredTypesBoosting) {
auto Sym1 = symbol("t1");
Sym1.Type = "T1";

View File

@ -224,6 +224,20 @@ TEST(MemIndexTest, Lookup) {
EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
}
TEST(MemIndexTest, IndexedFiles) {
SymbolSlab Symbols;
RefSlab Refs;
auto Size = Symbols.bytes() + Refs.bytes();
auto Data = std::make_pair(std::move(Symbols), std::move(Refs));
llvm::StringSet<> Files = {testPath("foo.cc"), testPath("bar.cc")};
MemIndex I(std::move(Data.first), std::move(Data.second), RelationSlab(),
std::move(Files), std::move(Data), Size);
auto ContainsFile = I.indexedFiles();
EXPECT_TRUE(ContainsFile("unittest:///foo.cc"));
EXPECT_TRUE(ContainsFile("unittest:///bar.cc"));
EXPECT_FALSE(ContainsFile("unittest:///foobar.cc"));
}
TEST(MemIndexTest, TemplateSpecialization) {
SymbolSlab::Builder B;
@ -367,7 +381,7 @@ TEST(MergeIndexTest, Refs) {
Test.Code = std::string(Test1Code.code());
Test.Filename = "test.cc";
auto AST = Test.build();
Dyn.updateMain(Test.Filename, AST);
Dyn.updateMain(testPath(Test.Filename), AST);
// Build static index for test.cc.
Test.HeaderCode = HeaderCode;
@ -375,7 +389,7 @@ TEST(MergeIndexTest, Refs) {
Test.Filename = "test.cc";
auto StaticAST = Test.build();
// Add stale refs for test.cc.
StaticIndex.updateMain(Test.Filename, StaticAST);
StaticIndex.updateMain(testPath(Test.Filename), StaticAST);
// Add refs for test2.cc
Annotations Test2Code(R"(class $Foo[[Foo]] {};)");
@ -384,7 +398,7 @@ TEST(MergeIndexTest, Refs) {
Test2.Code = std::string(Test2Code.code());
Test2.Filename = "test2.cc";
StaticAST = Test2.build();
StaticIndex.updateMain(Test2.Filename, StaticAST);
StaticIndex.updateMain(testPath(Test2.Filename), StaticAST);
RefsRequest Request;
Request.IDs = {Foo.ID};
@ -403,10 +417,47 @@ TEST(MergeIndexTest, Refs) {
RefSlab::Builder Results2;
EXPECT_TRUE(
Merge.refs(Request, [&](const Ref &O) { Results2.insert(Foo.ID, O); }));
EXPECT_THAT(std::move(Results2).build(),
ElementsAre(Pair(
_, ElementsAre(AnyOf(FileURI("unittest:///test.cc"),
FileURI("unittest:///test2.cc"))))));
// Remove all refs for test.cc from dynamic index,
// merged index should not return results from static index for test.cc.
Test.Code = "";
AST = Test.build();
Dyn.updateMain(testPath(Test.Filename), AST);
Request.Limit = llvm::None;
RefSlab::Builder Results3;
EXPECT_FALSE(
Merge.refs(Request, [&](const Ref &O) { Results3.insert(Foo.ID, O); }));
EXPECT_THAT(std::move(Results3).build(),
ElementsAre(Pair(_, UnorderedElementsAre(AllOf(
RefRange(Test2Code.range("Foo")),
FileURI("unittest:///test2.cc"))))));
}
TEST(MergeIndexTest, IndexedFiles) {
SymbolSlab DynSymbols;
RefSlab DynRefs;
auto DynSize = DynSymbols.bytes() + DynRefs.bytes();
auto DynData = std::make_pair(std::move(DynSymbols), std::move(DynRefs));
llvm::StringSet<> DynFiles = {testPath("foo.cc")};
MemIndex DynIndex(std::move(DynData.first), std::move(DynData.second),
RelationSlab(), std::move(DynFiles), std::move(DynData),
DynSize);
SymbolSlab StaticSymbols;
RefSlab StaticRefs;
auto StaticData =
std::make_pair(std::move(StaticSymbols), std::move(StaticRefs));
llvm::StringSet<> StaticFiles = {testPath("bar.cc")};
MemIndex StaticIndex(std::move(StaticData.first),
std::move(StaticData.second), RelationSlab(),
std::move(StaticFiles), std::move(StaticData),
StaticSymbols.bytes() + StaticRefs.bytes());
MergedIndex Merge(&DynIndex, &StaticIndex);
auto ContainsFile = Merge.indexedFiles();
EXPECT_TRUE(ContainsFile("unittest:///foo.cc"));
EXPECT_TRUE(ContainsFile("unittest:///bar.cc"));
EXPECT_FALSE(ContainsFile("unittest:///foobar.cc"));
}
TEST(MergeIndexTest, NonDocumentation) {

View File

@ -1237,6 +1237,12 @@ TEST(CrossFileRenameTests, DirtyBuffer) {
void relations(const RelationsRequest &Req,
llvm::function_ref<void(const SymbolID &, const Symbol &)>
Callback) const override {}
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override {
return [](llvm::StringRef) { return false; };
}
size_t estimateMemoryUsage() const override { return 0; }
} PIndex;
Results = rename({MainCode.point(),
@ -1285,6 +1291,12 @@ TEST(CrossFileRenameTests, DeduplicateRefsFromIndex) {
void relations(const RelationsRequest &,
llvm::function_ref<void(const SymbolID &, const Symbol &)>)
const override {}
llvm::unique_function<bool(llvm::StringRef) const>
indexedFiles() const override {
return [](llvm::StringRef) { return false; };
}
size_t estimateMemoryUsage() const override { return 0; }
Ref ReturnedRef;
} DIndex(XRefInBarCC);

View File

@ -99,8 +99,9 @@ public:
llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
llvm::StringRef HintPath) const override {
if (!HintPath.startswith(testRoot()))
return error("Hint path doesn't start with test root: {0}", HintPath);
if (!HintPath.empty() && !HintPath.startswith(testRoot()))
return error("Hint path is not empty and doesn't start with {0}: {1}",
testRoot(), HintPath);
if (!Body.consume_front("/"))
return error("Body of an unittest: URI must start with '/'");
llvm::SmallString<16> Path(Body.begin(), Body.end());