[clangd] Factor out the data-swapping functionality from MemIndex/DexIndex.

Summary:
This is now handled by a wrapper class SwapIndex, so MemIndex/DexIndex can be
immutable and focus on their job.

Old and busted:
 I have a MemIndex, which holds a shared_ptr<vector<Symbol*>>, which keeps the
 symbol slab alive. I update by calling build(shared_ptr<vector<Symbol*>>).

New hotness: I have a SwapIndex, which holds a unique_ptr<SymbolIndex>, which
 holds a MemIndex, which holds a shared_ptr<void>, which keeps backing
 data alive.
 I update by building a new MemIndex and calling SwapIndex::reset().

Reviewers: kbobyrev, ioeric

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D51422

llvm-svn: 341318
This commit is contained in:
Sam McCall 2018-09-03 14:37:43 +00:00
parent ca25b58957
commit 9c7624e14b
13 changed files with 374 additions and 516 deletions

View File

@ -71,7 +71,9 @@ indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
}
FileIndex::FileIndex(std::vector<std::string> URISchemes)
: URISchemes(std::move(URISchemes)) {}
: URISchemes(std::move(URISchemes)) {
reset(FSymbols.buildMemIndex());
}
void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Slab,
std::unique_ptr<SymbolOccurrenceSlab> Occurrences) {
@ -86,52 +88,32 @@ void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Slab,
FileToOccurrenceSlabs[Path] = std::move(Occurrences);
}
std::shared_ptr<std::vector<const Symbol *>> FileSymbols::allSymbols() {
// The snapshot manages life time of symbol slabs and provides pointers of all
// symbols in all slabs.
struct Snapshot {
std::vector<const Symbol *> Pointers;
std::vector<std::shared_ptr<SymbolSlab>> KeepAlive;
};
auto Snap = std::make_shared<Snapshot>();
std::unique_ptr<SymbolIndex> FileSymbols::buildMemIndex() {
std::vector<std::shared_ptr<SymbolSlab>> Slabs;
std::vector<std::shared_ptr<SymbolOccurrenceSlab>> OccurrenceSlabs;
{
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &FileAndSlab : FileToSlabs) {
Snap->KeepAlive.push_back(FileAndSlab.second);
for (const auto &Iter : *FileAndSlab.second)
Snap->Pointers.push_back(&Iter);
}
for (const auto &FileAndSlab : FileToSlabs)
Slabs.push_back(FileAndSlab.second);
for (const auto &FileAndOccurrenceSlab : FileToOccurrenceSlabs)
OccurrenceSlabs.push_back(FileAndOccurrenceSlab.second);
}
auto *Pointers = &Snap->Pointers;
// Use aliasing constructor to keep the snapshot alive along with the
// pointers.
return {std::move(Snap), Pointers};
}
std::shared_ptr<MemIndex::OccurrenceMap> FileSymbols::allOccurrences() const {
// The snapshot manages life time of symbol occurrence slabs and provides
// pointers to all occurrences in all occurrence slabs.
struct Snapshot {
MemIndex::OccurrenceMap Occurrences; // ID => {Occurrence}
std::vector<std::shared_ptr<SymbolOccurrenceSlab>> KeepAlive;
};
auto Snap = std::make_shared<Snapshot>();
{
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &FileAndSlab : FileToOccurrenceSlabs) {
Snap->KeepAlive.push_back(FileAndSlab.second);
for (const auto &IDAndOccurrences : *FileAndSlab.second) {
auto &Occurrences = Snap->Occurrences[IDAndOccurrences.first];
for (const auto &Occurrence : IDAndOccurrences.second)
Occurrences.push_back(&Occurrence);
}
std::vector<const Symbol *> AllSymbols;
for (const auto &Slab : Slabs)
for (const auto &Sym : *Slab)
AllSymbols.push_back(&Sym);
MemIndex::OccurrenceMap AllOccurrences;
for (const auto &OccurrenceSlab : OccurrenceSlabs)
for (const auto &Sym : *OccurrenceSlab) {
auto &Entry = AllOccurrences[Sym.first];
for (const auto &Occ : Sym.second)
Entry.push_back(&Occ);
}
}
return {std::move(Snap), &Snap->Occurrences};
// Index must keep the slabs alive.
return llvm::make_unique<MemIndex>(
llvm::make_pointee_range(AllSymbols), std::move(AllOccurrences),
std::make_pair(std::move(Slabs), std::move(OccurrenceSlabs)));
}
void FileIndex::update(PathRef Path, ASTContext *AST,
@ -148,30 +130,7 @@ void FileIndex::update(PathRef Path, ASTContext *AST,
indexAST(*AST, PP, TopLevelDecls, URISchemes);
FSymbols.update(Path, std::move(Slab), std::move(OccurrenceSlab));
}
auto Symbols = FSymbols.allSymbols();
Index.build(std::move(Symbols), FSymbols.allOccurrences());
}
bool FileIndex::fuzzyFind(
const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
return Index.fuzzyFind(Req, Callback);
}
void FileIndex::lookup(
const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
Index.lookup(Req, Callback);
}
void FileIndex::findOccurrences(
const OccurrencesRequest &Req,
llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
Index.findOccurrences(Req, Callback);
}
size_t FileIndex::estimateMemoryUsage() const {
return Index.estimateMemoryUsage();
reset(FSymbols.buildMemIndex());
}
} // namespace clangd

View File

@ -24,7 +24,7 @@
namespace clang {
namespace clangd {
/// \brief A container of Symbols from several source files. It can be updated
/// A container of Symbols from several source files. It can be updated
/// at source-file granularity, replacing all symbols from one file with a new
/// set.
///
@ -39,35 +39,31 @@ namespace clangd {
/// locking when we swap or obtain references to snapshots.
class FileSymbols {
public:
/// \brief Updates all symbols and occurrences in a file.
/// If \p Slab (Occurrence) is nullptr, symbols (occurrences) for \p Path
/// will be removed.
/// Updates all symbols and occurrences in a file.
/// If either is nullptr, corresponding data for \p Path will be removed.
void update(PathRef Path, std::unique_ptr<SymbolSlab> Slab,
std::unique_ptr<SymbolOccurrenceSlab> Occurrences);
// The shared_ptr keeps the symbols alive
std::shared_ptr<std::vector<const Symbol *>> allSymbols();
/// Returns all symbol occurrences for all active files.
std::shared_ptr<MemIndex::OccurrenceMap> allOccurrences() const;
// The index keeps the symbols alive.
std::unique_ptr<SymbolIndex> buildMemIndex();
private:
mutable std::mutex Mutex;
/// \brief Stores the latest snapshots for all active files.
/// Stores the latest snapshots for all active files.
llvm::StringMap<std::shared_ptr<SymbolSlab>> FileToSlabs;
/// Stores the latest occurrence slabs for all active files.
llvm::StringMap<std::shared_ptr<SymbolOccurrenceSlab>> FileToOccurrenceSlabs;
};
/// \brief This manages symbols from files and an in-memory index on all symbols.
class FileIndex : public SymbolIndex {
/// This manages symbols from files and an in-memory index on all symbols.
class FileIndex : public SwapIndex {
public:
/// If URISchemes is empty, the default schemes in SymbolCollector will be
/// used.
FileIndex(std::vector<std::string> URISchemes = {});
/// \brief Update symbols in \p Path with symbols in \p AST. If \p AST is
/// Update symbols in \p Path with symbols in \p AST. If \p AST is
/// nullptr, this removes all symbols in the file.
/// If \p AST is not null, \p PP cannot be null and it should be the
/// preprocessor that was used to build \p AST.
@ -77,23 +73,11 @@ public:
update(PathRef Path, ASTContext *AST, std::shared_ptr<Preprocessor> PP,
llvm::Optional<llvm::ArrayRef<Decl *>> TopLevelDecls = llvm::None);
bool
fuzzyFind(const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void findOccurrences(const OccurrencesRequest &Req,
llvm::function_ref<void(const SymbolOccurrence &)>
Callback) const override;
size_t estimateMemoryUsage() const override;
private:
// Only update() should swap the index.
using SwapIndex::reset;
FileSymbols FSymbols;
MemIndex Index;
std::vector<std::string> URISchemes;
};

View File

@ -165,5 +165,36 @@ void SymbolOccurrenceSlab::freeze() {
Frozen = true;
}
void SwapIndex::reset(std::unique_ptr<SymbolIndex> Index) {
// Keep the old index alive, so we don't destroy it under lock (may be slow).
std::shared_ptr<SymbolIndex> Pin;
{
std::lock_guard<std::mutex> Lock(Mutex);
Pin = std::move(this->Index);
this->Index = std::move(Index);
}
}
std::shared_ptr<SymbolIndex> SwapIndex::snapshot() const {
std::lock_guard<std::mutex> Lock(Mutex);
return Index;
}
bool SwapIndex::fuzzyFind(const FuzzyFindRequest &R,
llvm::function_ref<void(const Symbol &)> CB) const {
return snapshot()->fuzzyFind(R, CB);
}
void SwapIndex::lookup(const LookupRequest &R,
llvm::function_ref<void(const Symbol &)> CB) const {
return snapshot()->lookup(R, CB);
}
void SwapIndex::findOccurrences(
const OccurrencesRequest &R,
llvm::function_ref<void(const SymbolOccurrence &)> CB) const {
return snapshot()->findOccurrences(R, CB);
}
size_t SwapIndex::estimateMemoryUsage() const {
return snapshot()->estimateMemoryUsage();
}
} // namespace clangd
} // namespace clang

View File

@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/StringSaver.h"
#include <array>
#include <mutex>
#include <string>
#include <tuple>
@ -331,6 +332,7 @@ inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A,
return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(A) &
static_cast<uint8_t>(B));
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &, SymbolOccurrenceKind);
static const SymbolOccurrenceKind AllOccurrenceKinds =
SymbolOccurrenceKind::Declaration | SymbolOccurrenceKind::Definition |
SymbolOccurrenceKind::Reference;
@ -447,10 +449,10 @@ struct LookupRequest {
struct OccurrencesRequest {
llvm::DenseSet<SymbolID> IDs;
SymbolOccurrenceKind Filter;
SymbolOccurrenceKind Filter = AllOccurrenceKinds;
};
/// \brief Interface for symbol indexes that can be used for searching or
/// Interface for symbol indexes that can be used for searching or
/// matching symbols among a set of symbols based on names or unique IDs.
class SymbolIndex {
public:
@ -489,6 +491,31 @@ public:
virtual size_t estimateMemoryUsage() const = 0;
};
// Delegating implementation of SymbolIndex whose delegate can be swapped out.
class SwapIndex : public SymbolIndex {
public:
// If an index is not provided, reset() must be called.
SwapIndex(std::unique_ptr<SymbolIndex> Index = nullptr)
: Index(std::move(Index)) {}
void reset(std::unique_ptr<SymbolIndex>);
// SymbolIndex methods delegate to the current index, which is kept alive
// until the call returns (even if reset() is called).
bool fuzzyFind(const FuzzyFindRequest &,
llvm::function_ref<void(const Symbol &)>) const override;
void lookup(const LookupRequest &,
llvm::function_ref<void(const Symbol &)>) const override;
void findOccurrences(
const OccurrencesRequest &,
llvm::function_ref<void(const SymbolOccurrence &)>) const override;
size_t estimateMemoryUsage() const override;
private:
std::shared_ptr<SymbolIndex> snapshot() const;
mutable std::mutex Mutex;
std::shared_ptr<SymbolIndex> Index;
};
} // namespace clangd
} // namespace clang

View File

@ -15,49 +15,16 @@
namespace clang {
namespace clangd {
static std::shared_ptr<MemIndex::OccurrenceMap>
getOccurrencesFromSlab(SymbolOccurrenceSlab OccurrencesSlab) {
struct Snapshot {
SymbolOccurrenceSlab Slab;
MemIndex::OccurrenceMap Occurrences;
};
auto Snap = std::make_shared<Snapshot>();
Snap->Slab = std::move(OccurrencesSlab);
for (const auto &IDAndOccurrences : Snap->Slab) {
auto &Occurrences = Snap->Occurrences[IDAndOccurrences.first];
for (const auto &Occurrence : IDAndOccurrences.second)
Occurrences.push_back(&Occurrence);
}
return {std::move(Snap), &Snap->Occurrences};
}
void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms,
std::shared_ptr<OccurrenceMap> AllOccurrences) {
assert(Syms && "Syms must be set when build MemIndex");
assert(AllOccurrences && "Occurrences must be set when build MemIndex");
llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
for (const Symbol *Sym : *Syms)
TempIndex[Sym->ID] = Sym;
// Swap out the old symbols and index.
{
std::lock_guard<std::mutex> Lock(Mutex);
Index = std::move(TempIndex);
Symbols = std::move(Syms); // Release old symbols.
Occurrences = std::move(AllOccurrences);
}
vlog("Built MemIndex with estimated memory usage {0} bytes.",
estimateMemoryUsage());
}
std::unique_ptr<SymbolIndex> MemIndex::build(SymbolSlab Symbols,
std::unique_ptr<SymbolIndex> MemIndex::build(SymbolSlab Slab,
SymbolOccurrenceSlab Occurrences) {
auto Idx = llvm::make_unique<MemIndex>();
Idx->build(getSymbolsFromSlab(std::move(Symbols)),
getOccurrencesFromSlab(std::move(Occurrences)));
return std::move(Idx);
OccurrenceMap M;
for (const auto &SymbolAndOccurrences : Occurrences) {
auto &Entry = M[SymbolAndOccurrences.first];
for (const auto &Occurrence : SymbolAndOccurrences.second)
Entry.push_back(&Occurrence);
}
auto Data = std::make_pair(std::move(Slab), std::move(Occurrences));
return llvm::make_unique<MemIndex>(Data.first, std::move(M), std::move(Data));
}
bool MemIndex::fuzzyFind(
@ -69,34 +36,30 @@ bool MemIndex::fuzzyFind(
std::priority_queue<std::pair<float, const Symbol *>> Top;
FuzzyMatcher Filter(Req.Query);
bool More = false;
{
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto Pair : Index) {
const Symbol *Sym = Pair.second;
for (const auto Pair : Index) {
const Symbol *Sym = Pair.second;
// Exact match against all possible scopes.
if (!Req.Scopes.empty() && !llvm::is_contained(Req.Scopes, Sym->Scope))
continue;
if (Req.RestrictForCodeCompletion && !Sym->IsIndexedForCodeCompletion)
continue;
// Exact match against all possible scopes.
if (!Req.Scopes.empty() && !llvm::is_contained(Req.Scopes, Sym->Scope))
continue;
if (Req.RestrictForCodeCompletion && !Sym->IsIndexedForCodeCompletion)
continue;
if (auto Score = Filter.match(Sym->Name)) {
Top.emplace(-*Score * quality(*Sym), Sym);
if (Top.size() > Req.MaxCandidateCount) {
More = true;
Top.pop();
}
if (auto Score = Filter.match(Sym->Name)) {
Top.emplace(-*Score * quality(*Sym), Sym);
if (Top.size() > Req.MaxCandidateCount) {
More = true;
Top.pop();
}
}
for (; !Top.empty(); Top.pop())
Callback(*Top.top().second);
}
for (; !Top.empty(); Top.pop())
Callback(*Top.top().second);
return More;
}
void MemIndex::lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &ID : Req.IDs) {
auto I = Index.find(ID);
if (I != Index.end())
@ -107,10 +70,9 @@ void MemIndex::lookup(const LookupRequest &Req,
void MemIndex::findOccurrences(
const OccurrencesRequest &Req,
llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &ReqID : Req.IDs) {
auto FoundOccurrences = Occurrences->find(ReqID);
if (FoundOccurrences == Occurrences->end())
auto FoundOccurrences = Occurrences.find(ReqID);
if (FoundOccurrences == Occurrences.end())
continue;
for (const auto *O : FoundOccurrences->second) {
if (static_cast<int>(Req.Filter & O->Kind))
@ -119,22 +81,7 @@ void MemIndex::findOccurrences(
}
}
std::shared_ptr<std::vector<const Symbol *>>
getSymbolsFromSlab(SymbolSlab Slab) {
struct Snapshot {
SymbolSlab Slab;
std::vector<const Symbol *> Pointers;
};
auto Snap = std::make_shared<Snapshot>();
Snap->Slab = std::move(Slab);
for (auto &Sym : Snap->Slab)
Snap->Pointers.push_back(&Sym);
return std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap),
&Snap->Pointers);
}
size_t MemIndex::estimateMemoryUsage() const {
std::lock_guard<std::mutex> Lock(Mutex);
return Index.getMemorySize();
}

View File

@ -16,8 +16,7 @@
namespace clang {
namespace clangd {
/// \brief This implements an index for a (relatively small) set of symbols (or
/// symbol occurrences) that can be easily managed in memory.
/// MemIndex is a naive in-memory index suitable for a small set of symbols.
class MemIndex : public SymbolIndex {
public:
/// Maps from a symbol ID to all corresponding symbol occurrences.
@ -25,23 +24,33 @@ public:
using OccurrenceMap =
llvm::DenseMap<SymbolID, std::vector<const SymbolOccurrence *>>;
/// \brief (Re-)Build index for `Symbols` and update `Occurrences`.
/// All symbol pointers and symbol occurrence pointers must remain accessible
/// as long as `Symbols` and `Occurrences` are kept alive.
void build(std::shared_ptr<std::vector<const Symbol *>> Symbols,
std::shared_ptr<OccurrenceMap> Occurrences);
MemIndex() = default;
// All symbols and occurrences must outlive this index.
// TODO: find a better type for Occurrences here.
template <typename SymbolRange>
MemIndex(SymbolRange &&Symbols, OccurrenceMap Occurrences)
: Occurrences(std::move(Occurrences)) {
for (const Symbol &S : Symbols)
Index[S.ID] = &S;
}
// Symbols are owned by BackingData, Index takes ownership.
template <typename Range, typename Payload>
MemIndex(Range &&Symbols, OccurrenceMap Occurrences, Payload &&BackingData)
: MemIndex(std::forward<Range>(Symbols), std::move(Occurrences)) {
KeepAlive = std::shared_ptr<void>(
std::make_shared<Payload>(std::move(BackingData)), nullptr);
}
/// \brief Build index from a symbol slab and a symbol occurrence slab.
static std::unique_ptr<SymbolIndex> build(SymbolSlab Symbols,
/// Builds an index from a slab. The index takes ownership of the data.
static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab,
SymbolOccurrenceSlab Occurrences);
bool
fuzzyFind(const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void
lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const override;
void findOccurrences(const OccurrencesRequest &Req,
llvm::function_ref<void(const SymbolOccurrence &)>
@ -50,21 +59,13 @@ public:
size_t estimateMemoryUsage() const override;
private:
std::shared_ptr<std::vector<const Symbol *>> Symbols;
// Index is a set of symbols that are deduplicated by symbol IDs.
// FIXME: build smarter index structure.
llvm::DenseMap<SymbolID, const Symbol *> Index;
// A map from symbol ID to symbol occurrences, support query by IDs.
std::shared_ptr<OccurrenceMap> Occurrences;
mutable std::mutex Mutex;
OccurrenceMap Occurrences;
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
};
// Returns pointers to the symbols in given slab and bundles slab lifetime with
// returned symbol pointers so that the pointers are never invalid.
std::shared_ptr<std::vector<const Symbol *>>
getSymbolsFromSlab(SymbolSlab Slab);
} // namespace clangd
} // namespace clang

View File

@ -36,48 +36,30 @@ std::vector<Token> generateSearchTokens(const Symbol &Sym) {
} // namespace
void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
llvm::DenseMap<SymbolID, const Symbol *> TempLookupTable;
llvm::DenseMap<const Symbol *, float> TempSymbolQuality;
for (const Symbol *Sym : *Syms) {
TempLookupTable[Sym->ID] = Sym;
TempSymbolQuality[Sym] = quality(*Sym);
void DexIndex::buildIndex() {
for (const Symbol *Sym : Symbols) {
LookupTable[Sym->ID] = Sym;
SymbolQuality[Sym] = quality(*Sym);
}
// Symbols are sorted by symbol qualities so that items in the posting lists
// are stored in the descending order of symbol quality.
std::sort(begin(*Syms), end(*Syms),
std::sort(begin(Symbols), end(Symbols),
[&](const Symbol *LHS, const Symbol *RHS) {
return TempSymbolQuality[LHS] > TempSymbolQuality[RHS];
return SymbolQuality[LHS] > SymbolQuality[RHS];
});
llvm::DenseMap<Token, PostingList> TempInvertedIndex;
// Populate TempInvertedIndex with posting lists for index symbols.
for (DocID SymbolRank = 0; SymbolRank < Syms->size(); ++SymbolRank) {
const auto *Sym = (*Syms)[SymbolRank];
for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
const auto *Sym = Symbols[SymbolRank];
for (const auto &Token : generateSearchTokens(*Sym))
TempInvertedIndex[Token].push_back(SymbolRank);
}
{
std::lock_guard<std::mutex> Lock(Mutex);
// Replace outdated index with the new one.
LookupTable = std::move(TempLookupTable);
Symbols = std::move(Syms);
InvertedIndex = std::move(TempInvertedIndex);
SymbolQuality = std::move(TempSymbolQuality);
InvertedIndex[Token].push_back(SymbolRank);
}
vlog("Built DexIndex with estimated memory usage {0} bytes.",
estimateMemoryUsage());
}
std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
auto Idx = llvm::make_unique<DexIndex>();
Idx->build(getSymbolsFromSlab(std::move(Slab)));
return std::move(Idx);
}
/// Constructs iterators over tokens extracted from the query and exhausts it
/// while applying Callback to each symbol in the order of decreasing quality
/// of the matched symbols.
@ -92,75 +74,69 @@ bool DexIndex::fuzzyFind(
std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
{
std::lock_guard<std::mutex> Lock(Mutex);
// Generate query trigrams and construct AND iterator over all query
// trigrams.
std::vector<std::unique_ptr<Iterator>> TrigramIterators;
for (const auto &Trigram : TrigramTokens) {
const auto It = InvertedIndex.find(Trigram);
if (It != InvertedIndex.end())
TrigramIterators.push_back(create(It->second));
}
if (!TrigramIterators.empty())
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
// Generate query trigrams and construct AND iterator over all query
// trigrams.
std::vector<std::unique_ptr<Iterator>> TrigramIterators;
for (const auto &Trigram : TrigramTokens) {
const auto It = InvertedIndex.find(Trigram);
if (It != InvertedIndex.end())
TrigramIterators.push_back(create(It->second));
// Generate scope tokens for search query.
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
for (const auto &Scope : Req.Scopes) {
const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
if (It != InvertedIndex.end())
ScopeIterators.push_back(create(It->second));
}
// Add OR iterator for scopes if there are any Scope Iterators.
if (!ScopeIterators.empty())
TopLevelChildren.push_back(createOr(move(ScopeIterators)));
// Use TRUE iterator if both trigrams and scopes from the query are not
// present in the symbol index.
auto QueryIterator = TopLevelChildren.empty()
? createTrue(Symbols.size())
: createAnd(move(TopLevelChildren));
// Retrieve more items than it was requested: some of the items with high
// final score might not be retrieved otherwise.
// FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
// using 100x of the requested number might not be good in practice, e.g.
// when the requested number of items is small.
const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
// FIXME(kbobyrev): Add boosting to the query and utilize retrieved
// boosting scores.
std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
// Retrieve top Req.MaxCandidateCount items.
std::priority_queue<std::pair<float, const Symbol *>> Top;
for (const auto &P : SymbolDocIDs) {
const DocID SymbolDocID = P.first;
const auto *Sym = Symbols[SymbolDocID];
const llvm::Optional<float> Score = Filter.match(Sym->Name);
if (!Score)
continue;
// Multiply score by a negative factor so that Top stores items with the
// highest actual score.
Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
if (Top.size() > Req.MaxCandidateCount) {
More = true;
Top.pop();
}
if (!TrigramIterators.empty())
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
// Generate scope tokens for search query.
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
for (const auto &Scope : Req.Scopes) {
const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
if (It != InvertedIndex.end())
ScopeIterators.push_back(create(It->second));
}
// Add OR iterator for scopes if there are any Scope Iterators.
if (!ScopeIterators.empty())
TopLevelChildren.push_back(createOr(move(ScopeIterators)));
// Use TRUE iterator if both trigrams and scopes from the query are not
// present in the symbol index.
auto QueryIterator = TopLevelChildren.empty()
? createTrue(Symbols->size())
: createAnd(move(TopLevelChildren));
// Retrieve more items than it was requested: some of the items with high
// final score might not be retrieved otherwise.
// FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
// using 100x of the requested number might not be good in practice, e.g.
// when the requested number of items is small.
const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
// FIXME(kbobyrev): Add boosting to the query and utilize retrieved
// boosting scores.
std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
// Retrieve top Req.MaxCandidateCount items.
std::priority_queue<std::pair<float, const Symbol *>> Top;
for (const auto &P : SymbolDocIDs) {
const DocID SymbolDocID = P.first;
const auto *Sym = (*Symbols)[SymbolDocID];
const llvm::Optional<float> Score = Filter.match(Sym->Name);
if (!Score)
continue;
// Multiply score by a negative factor so that Top stores items with the
// highest actual score.
Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
if (Top.size() > Req.MaxCandidateCount) {
More = true;
Top.pop();
}
}
// Apply callback to the top Req.MaxCandidateCount items.
for (; !Top.empty(); Top.pop())
Callback(*Top.top().second);
}
// Apply callback to the top Req.MaxCandidateCount items.
for (; !Top.empty(); Top.pop())
Callback(*Top.top().second);
return More;
}
void DexIndex::lookup(const LookupRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto &ID : Req.IDs) {
auto I = LookupTable.find(ID);
if (I != LookupTable.end())
@ -175,8 +151,6 @@ void DexIndex::findOccurrences(
}
size_t DexIndex::estimateMemoryUsage() const {
std::lock_guard<std::mutex> Lock(Mutex);
size_t Bytes =
LookupTable.size() * sizeof(std::pair<SymbolID, const Symbol *>);
Bytes += SymbolQuality.size() * sizeof(std::pair<const Symbol *, float>);

View File

@ -25,7 +25,6 @@
#include "Iterator.h"
#include "Token.h"
#include "Trigram.h"
#include <mutex>
namespace clang {
namespace clangd {
@ -39,12 +38,24 @@ namespace dex {
// index on disk and then load it if available.
class DexIndex : public SymbolIndex {
public:
/// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
/// accessible as long as `Symbols` is kept alive.
void build(std::shared_ptr<std::vector<const Symbol *>> Syms);
// All symbols must outlive this index.
template <typename Range> DexIndex(Range &&Symbols) {
for (auto &&Sym : Symbols)
this->Symbols.push_back(&Sym);
buildIndex();
}
// Symbols are owned by BackingData, Index takes ownership.
template <typename Range, typename Payload>
DexIndex(Range &&Symbols, Payload &&BackingData)
: DexIndex(std::forward<Range>(Symbols)) {
KeepAlive = std::shared_ptr<void>(
std::make_shared<Payload>(std::move(BackingData)), nullptr);
}
/// \brief Build index from a symbol slab.
static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
/// Builds an index from a slab. The index takes ownership of the slab.
static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab) {
return llvm::make_unique<DexIndex>(Slab, std::move(Slab));
}
bool
fuzzyFind(const FuzzyFindRequest &Req,
@ -60,19 +71,19 @@ public:
size_t estimateMemoryUsage() const override;
private:
void buildIndex();
mutable std::mutex Mutex;
std::shared_ptr<std::vector<const Symbol *>> Symbols /*GUARDED_BY(Mutex)*/;
llvm::DenseMap<SymbolID, const Symbol *> LookupTable /*GUARDED_BY(Mutex)*/;
llvm::DenseMap<const Symbol *, float> SymbolQuality /*GUARDED_BY(Mutex)*/;
std::vector<const Symbol *> Symbols;
llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
llvm::DenseMap<const Symbol *, float> SymbolQuality;
// Inverted index is a mapping from the search token to the posting list,
// which contains all items which can be characterized by such search token.
// For example, if the search token is scope "std::", the corresponding
// posting list would contain all indices of symbols defined in namespace std.
// Inverted index is used to retrieve posting lists which are processed during
// the fuzzyFind process.
llvm::DenseMap<Token, PostingList> InvertedIndex /*GUARDED_BY(Mutex)*/;
llvm::DenseMap<Token, PostingList> InvertedIndex;
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
};
} // namespace dex

View File

@ -23,6 +23,7 @@
using ::testing::ElementsAre;
using ::testing::UnorderedElementsAre;
using namespace llvm;
namespace clang {
namespace clangd {
@ -408,171 +409,140 @@ TEST(DexIndexTrigrams, QueryTrigrams) {
}
TEST(DexIndex, Lookup) {
DexIndex I;
I.build(generateSymbols({"ns::abc", "ns::xyz"}));
EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::abc", "ns::xyz"));
EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::xyz"));
EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
}
TEST(DexIndex, FuzzyFind) {
DexIndex Index;
Index.build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
"other::ABC", "other::A"}));
auto Index = DexIndex::build(
generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
"other::ABC", "other::A"}));
FuzzyFindRequest Req;
Req.Query = "ABC";
Req.Scopes = {"ns::"};
EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
EXPECT_THAT(match(*Index, Req), UnorderedElementsAre("ns::ABC"));
Req.Scopes = {"ns::", "ns::nested::"};
EXPECT_THAT(match(Index, Req),
EXPECT_THAT(match(*Index, Req),
UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
Req.Query = "A";
Req.Scopes = {"other::"};
EXPECT_THAT(match(Index, Req),
EXPECT_THAT(match(*Index, Req),
UnorderedElementsAre("other::A", "other::ABC"));
Req.Query = "";
Req.Scopes = {};
EXPECT_THAT(match(Index, Req),
EXPECT_THAT(match(*Index, Req),
UnorderedElementsAre("ns::ABC", "ns::BCD", "::ABC",
"ns::nested::ABC", "other::ABC",
"other::A"));
}
TEST(DexIndexTest, FuzzyMatchQ) {
DexIndex I;
I.build(
auto I = DexIndex::build(
generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
FuzzyFindRequest Req;
Req.Query = "lol";
Req.MaxCandidateCount = 2;
EXPECT_THAT(match(I, Req),
EXPECT_THAT(match(*I, Req),
UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
}
TEST(DexIndexTest, DexIndexSymbolsRecycled) {
DexIndex I;
std::weak_ptr<SlabAndPointers> Symbols;
I.build(generateNumSymbols(0, 10, &Symbols));
FuzzyFindRequest Req;
Req.Query = "7";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
EXPECT_FALSE(Symbols.expired());
// Release old symbols.
I.build(generateNumSymbols(0, 0));
EXPECT_TRUE(Symbols.expired());
}
// FIXME(kbobyrev): This test is different for DexIndex and MemIndex: while
// MemIndex manages response deduplication, DexIndex simply returns all matched
// symbols which means there might be equivalent symbols in the response.
// Before drop-in replacement of MemIndex with DexIndex happens, FileIndex
// should handle deduplication instead.
TEST(DexIndexTest, DexIndexDeduplicate) {
auto Symbols = generateNumSymbols(0, 10);
// Inject duplicates.
auto Sym = symbol("7");
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
symbol("2") /* duplicate */};
FuzzyFindRequest Req;
Req.Query = "7";
DexIndex I;
I.build(std::move(Symbols));
auto Matches = match(I, Req);
EXPECT_EQ(Matches.size(), 4u);
Req.Query = "2";
DexIndex I(Symbols);
EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
}
TEST(DexIndexTest, DexIndexLimitedNumMatches) {
DexIndex I;
I.build(generateNumSymbols(0, 100));
auto I = DexIndex::build(generateNumSymbols(0, 100));
FuzzyFindRequest Req;
Req.Query = "5";
Req.MaxCandidateCount = 3;
bool Incomplete;
auto Matches = match(I, Req, &Incomplete);
auto Matches = match(*I, Req, &Incomplete);
EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
EXPECT_TRUE(Incomplete);
}
TEST(DexIndexTest, FuzzyMatch) {
DexIndex I;
I.build(
auto I = DexIndex::build(
generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
FuzzyFindRequest Req;
Req.Query = "lol";
Req.MaxCandidateCount = 2;
EXPECT_THAT(match(I, Req),
EXPECT_THAT(match(*I, Req),
UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
}
TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
DexIndex I;
I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
FuzzyFindRequest Req;
Req.Query = "y";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
}
TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
DexIndex I;
I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {""};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3"));
}
TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
DexIndex I;
I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
auto I = DexIndex::build(
generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2"));
}
TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
DexIndex I;
I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
auto I = DexIndex::build(
generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::", "b::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
}
TEST(DexIndexTest, NoMatchNestedScopes) {
DexIndex I;
I.build(generateSymbols({"a::y1", "a::b::y2"}));
auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}));
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1"));
}
TEST(DexIndexTest, IgnoreCases) {
DexIndex I;
I.build(generateSymbols({"ns::ABC", "ns::abc"}));
auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}));
FuzzyFindRequest Req;
Req.Query = "AB";
Req.Scopes = {"ns::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
}
TEST(DexIndexTest, Lookup) {
DexIndex I;
I.build(generateSymbols({"ns::abc", "ns::xyz"}));
EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::abc", "ns::xyz"));
EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::xyz"));
EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
}
} // namespace

View File

@ -54,23 +54,27 @@ std::unique_ptr<SymbolOccurrenceSlab> occurrenceSlab(const SymbolID &ID,
auto Slab = llvm::make_unique<SymbolOccurrenceSlab>();
SymbolOccurrence Occurrence;
Occurrence.Location.FileURI = Path;
Occurrence.Kind = SymbolOccurrenceKind::Reference;
Slab->insert(ID, Occurrence);
return Slab;
}
std::vector<std::string>
getSymbolNames(const std::vector<const Symbol *> &Symbols) {
std::vector<std::string> getSymbolNames(const SymbolIndex &I,
std::string Query = "") {
FuzzyFindRequest Req;
Req.Query = Query;
std::vector<std::string> Names;
for (const Symbol *Sym : Symbols)
Names.push_back(Sym->Name);
I.fuzzyFind(Req, [&](const Symbol &S) { Names.push_back(S.Name); });
return Names;
}
std::vector<std::string>
getOccurrencePath(const std::vector<const SymbolOccurrence *> &Occurrences) {
std::vector<std::string> getOccurrencePaths(const SymbolIndex &I, SymbolID ID) {
OccurrencesRequest Req;
Req.IDs = {ID};
std::vector<std::string> Paths;
for (const auto *O : Occurrences)
Paths.push_back(O->Location.FileURI);
I.findOccurrences(Req, [&](const SymbolOccurrence &S) {
Paths.push_back(S.Location.FileURI);
});
return Paths;
}
@ -82,15 +86,12 @@ std::unique_ptr<SymbolOccurrenceSlab> emptyOccurrence() {
TEST(FileSymbolsTest, UpdateAndGet) {
FileSymbols FS;
EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre());
EXPECT_TRUE(FS.allOccurrences()->empty());
EXPECT_THAT(getSymbolNames(*FS.buildMemIndex()), UnorderedElementsAre());
SymbolID ID("1");
FS.update("f1", numSlab(1, 3), occurrenceSlab(ID, "f1.cc"));
EXPECT_THAT(getSymbolNames(*FS.allSymbols()),
FS.update("f1", numSlab(1, 3), occurrenceSlab(SymbolID("1"), "f1.cc"));
EXPECT_THAT(getSymbolNames(*FS.buildMemIndex()),
UnorderedElementsAre("1", "2", "3"));
auto Occurrences = FS.allOccurrences();
EXPECT_THAT(getOccurrencePath((*Occurrences)[ID]),
EXPECT_THAT(getOccurrencePaths(*FS.buildMemIndex(), SymbolID("1")),
UnorderedElementsAre("f1.cc"));
}
@ -98,8 +99,8 @@ TEST(FileSymbolsTest, Overlap) {
FileSymbols FS;
FS.update("f1", numSlab(1, 3), emptyOccurrence());
FS.update("f2", numSlab(3, 5), emptyOccurrence());
EXPECT_THAT(getSymbolNames(*FS.allSymbols()),
UnorderedElementsAre("1", "2", "3", "3", "4", "5"));
EXPECT_THAT(getSymbolNames(*FS.buildMemIndex()),
UnorderedElementsAre("1", "2", "3", "4", "5"));
}
TEST(FileSymbolsTest, SnapshotAliveAfterRemove) {
@ -108,19 +109,17 @@ TEST(FileSymbolsTest, SnapshotAliveAfterRemove) {
SymbolID ID("1");
FS.update("f1", numSlab(1, 3), occurrenceSlab(ID, "f1.cc"));
auto Symbols = FS.allSymbols();
auto Symbols = FS.buildMemIndex();
EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3"));
auto Occurrences = FS.allOccurrences();
EXPECT_THAT(getOccurrencePath((*Occurrences)[ID]),
UnorderedElementsAre("f1.cc"));
EXPECT_THAT(getOccurrencePaths(*Symbols, ID), UnorderedElementsAre("f1.cc"));
FS.update("f1", nullptr, nullptr);
EXPECT_THAT(getSymbolNames(*FS.allSymbols()), UnorderedElementsAre());
EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3"));
auto Empty = FS.buildMemIndex();
EXPECT_THAT(getSymbolNames(*Empty), UnorderedElementsAre());
EXPECT_THAT(getOccurrencePaths(*Empty, ID), UnorderedElementsAre());
EXPECT_TRUE(FS.allOccurrences()->empty());
EXPECT_THAT(getOccurrencePath((*Occurrences)[ID]),
UnorderedElementsAre("f1.cc"));
EXPECT_THAT(getSymbolNames(*Symbols), UnorderedElementsAre("1", "2", "3"));
EXPECT_THAT(getOccurrencePaths(*Symbols, ID), UnorderedElementsAre("f1.cc"));
}
std::vector<std::string> match(const SymbolIndex &I,

View File

@ -17,18 +17,16 @@
#include "index/Merge.h"
#include "gtest/gtest.h"
using testing::AllOf;
using testing::ElementsAre;
using testing::Pointee;
using testing::UnorderedElementsAre;
using testing::AllOf;
using namespace llvm;
namespace clang {
namespace clangd {
namespace {
std::shared_ptr<MemIndex::OccurrenceMap> emptyOccurrences() {
return llvm::make_unique<MemIndex::OccurrenceMap>();
}
MATCHER_P(Named, N, "") { return arg.Name == N; }
MATCHER_P(OccurrenceRange, Range, "") {
return std::tie(arg.Location.Start.Line, arg.Location.Start.Column,
@ -54,155 +52,139 @@ TEST(SymbolSlab, FindAndIterate) {
EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
}
TEST(MemIndexTest, MemIndexSymbolsRecycled) {
MemIndex I;
std::weak_ptr<SlabAndPointers> Symbols;
I.build(generateNumSymbols(0, 10, &Symbols), emptyOccurrences());
FuzzyFindRequest Req;
Req.Query = "7";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
TEST(SwapIndexTest, OldIndexRecycled) {
auto Token = std::make_shared<int>();
std::weak_ptr<int> WeakToken = Token;
EXPECT_FALSE(Symbols.expired());
// Release old symbols.
I.build(generateNumSymbols(0, 0), emptyOccurrences());
EXPECT_TRUE(Symbols.expired());
SwapIndex S(make_unique<MemIndex>(SymbolSlab(), MemIndex::OccurrenceMap(),
std::move(Token)));
EXPECT_FALSE(WeakToken.expired()); // Current MemIndex keeps it alive.
S.reset(make_unique<MemIndex>()); // Now the MemIndex is destroyed.
EXPECT_TRUE(WeakToken.expired()); // So the token is too.
}
TEST(MemIndexTest, MemIndexDeduplicate) {
auto Symbols = generateNumSymbols(0, 10);
// Inject some duplicates and make sure we only match the same symbol once.
auto Sym = symbol("7");
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
symbol("2") /* duplicate */};
FuzzyFindRequest Req;
Req.Query = "7";
MemIndex I;
I.build(std::move(Symbols), emptyOccurrences());
auto Matches = match(I, Req);
EXPECT_EQ(Matches.size(), 1u);
Req.Query = "2";
MemIndex I(Symbols, MemIndex::OccurrenceMap());
EXPECT_THAT(match(I, Req), ElementsAre("2"));
}
TEST(MemIndexTest, MemIndexLimitedNumMatches) {
MemIndex I;
I.build(generateNumSymbols(0, 100), emptyOccurrences());
auto I = MemIndex::build(generateNumSymbols(0, 100), SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "5";
Req.MaxCandidateCount = 3;
bool Incomplete;
auto Matches = match(I, Req, &Incomplete);
auto Matches = match(*I, Req, &Incomplete);
EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
EXPECT_TRUE(Incomplete);
}
TEST(MemIndexTest, FuzzyMatch) {
MemIndex I;
I.build(
auto I = MemIndex::build(
generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
emptyOccurrences());
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "lol";
Req.MaxCandidateCount = 2;
EXPECT_THAT(match(I, Req),
EXPECT_THAT(match(*I, Req),
UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
}
TEST(MemIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
MemIndex I;
I.build(generateSymbols({"a::y1", "b::y2", "y3"}), emptyOccurrences());
auto I = MemIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "y";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
}
TEST(MemIndexTest, MatchQualifiedNamesWithGlobalScope) {
MemIndex I;
I.build(generateSymbols({"a::y1", "b::y2", "y3"}), emptyOccurrences());
auto I = MemIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {""};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3"));
}
TEST(MemIndexTest, MatchQualifiedNamesWithOneScope) {
MemIndex I;
I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}),
emptyOccurrences());
auto I = MemIndex::build(
generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2"));
}
TEST(MemIndexTest, MatchQualifiedNamesWithMultipleScopes) {
MemIndex I;
I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}),
emptyOccurrences());
auto I = MemIndex::build(
generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::", "b::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
}
TEST(MemIndexTest, NoMatchNestedScopes) {
MemIndex I;
I.build(generateSymbols({"a::y1", "a::b::y2"}), emptyOccurrences());
auto I = MemIndex::build(generateSymbols({"a::y1", "a::b::y2"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "y";
Req.Scopes = {"a::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1"));
}
TEST(MemIndexTest, IgnoreCases) {
MemIndex I;
I.build(generateSymbols({"ns::ABC", "ns::abc"}), emptyOccurrences());
auto I = MemIndex::build(generateSymbols({"ns::ABC", "ns::abc"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Query = "AB";
Req.Scopes = {"ns::"};
EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
}
TEST(MemIndexTest, Lookup) {
MemIndex I;
I.build(generateSymbols({"ns::abc", "ns::xyz"}), emptyOccurrences());
EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
auto I = MemIndex::build(generateSymbols({"ns::abc", "ns::xyz"}),
SymbolOccurrenceSlab());
EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::abc", "ns::xyz"));
EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
UnorderedElementsAre("ns::xyz"));
EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
}
TEST(MergeIndexTest, Lookup) {
MemIndex I, J;
I.build(generateSymbols({"ns::A", "ns::B"}), emptyOccurrences());
J.build(generateSymbols({"ns::B", "ns::C"}), emptyOccurrences());
EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")),
UnorderedElementsAre("ns::A"));
EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")),
UnorderedElementsAre("ns::B"));
EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")),
UnorderedElementsAre("ns::C"));
EXPECT_THAT(
lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}),
UnorderedElementsAre("ns::A", "ns::B"));
EXPECT_THAT(
lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}),
UnorderedElementsAre("ns::A", "ns::C"));
EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")),
UnorderedElementsAre());
EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre());
auto I = MemIndex::build(generateSymbols({"ns::A", "ns::B"}),
SymbolOccurrenceSlab()),
J = MemIndex::build(generateSymbols({"ns::B", "ns::C"}),
SymbolOccurrenceSlab());
auto M = mergeIndex(I.get(), J.get());
EXPECT_THAT(lookup(*M, SymbolID("ns::A")), UnorderedElementsAre("ns::A"));
EXPECT_THAT(lookup(*M, SymbolID("ns::B")), UnorderedElementsAre("ns::B"));
EXPECT_THAT(lookup(*M, SymbolID("ns::C")), UnorderedElementsAre("ns::C"));
EXPECT_THAT(lookup(*M, {SymbolID("ns::A"), SymbolID("ns::B")}),
UnorderedElementsAre("ns::A", "ns::B"));
EXPECT_THAT(lookup(*M, {SymbolID("ns::A"), SymbolID("ns::C")}),
UnorderedElementsAre("ns::A", "ns::C"));
EXPECT_THAT(lookup(*M, SymbolID("ns::D")), UnorderedElementsAre());
EXPECT_THAT(lookup(*M, {}), UnorderedElementsAre());
}
TEST(MergeIndexTest, FuzzyFind) {
MemIndex I, J;
I.build(generateSymbols({"ns::A", "ns::B"}), emptyOccurrences());
J.build(generateSymbols({"ns::B", "ns::C"}), emptyOccurrences());
auto I = MemIndex::build(generateSymbols({"ns::A", "ns::B"}),
SymbolOccurrenceSlab()),
J = MemIndex::build(generateSymbols({"ns::B", "ns::C"}),
SymbolOccurrenceSlab());
FuzzyFindRequest Req;
Req.Scopes = {"ns::"};
EXPECT_THAT(match(*mergeIndex(&I, &J), Req),
EXPECT_THAT(match(*mergeIndex(I.get(), J.get()), Req),
UnorderedElementsAre("ns::A", "ns::B", "ns::C"));
}

View File

@ -26,30 +26,18 @@ Symbol symbol(llvm::StringRef QName) {
return Sym;
}
std::shared_ptr<std::vector<const Symbol *>>
generateSymbols(std::vector<std::string> QualifiedNames,
std::weak_ptr<SlabAndPointers> *WeakSymbols) {
SymbolSlab generateSymbols(std::vector<std::string> QualifiedNames) {
SymbolSlab::Builder Slab;
for (llvm::StringRef QName : QualifiedNames)
Slab.insert(symbol(QName));
auto Storage = std::make_shared<SlabAndPointers>();
Storage->Slab = std::move(Slab).build();
for (const auto &Sym : Storage->Slab)
Storage->Pointers.push_back(&Sym);
if (WeakSymbols)
*WeakSymbols = Storage;
auto *Pointers = &Storage->Pointers;
return {std::move(Storage), Pointers};
return std::move(Slab).build();
}
std::shared_ptr<std::vector<const Symbol *>>
generateNumSymbols(int Begin, int End,
std::weak_ptr<SlabAndPointers> *WeakSymbols) {
SymbolSlab generateNumSymbols(int Begin, int End) {
std::vector<std::string> Names;
for (int i = Begin; i <= End; i++)
Names.push_back(std::to_string(i));
return generateSymbols(Names, WeakSymbols);
return generateSymbols(Names);
}
std::string getQualifiedName(const Symbol &Sym) {

View File

@ -23,26 +23,11 @@ namespace clangd {
// Creates Symbol instance and sets SymbolID to given QualifiedName.
Symbol symbol(llvm::StringRef QName);
// Bundles symbol pointers with the actual symbol slab the pointers refer to in
// order to ensure that the slab isn't destroyed while it's used by and index.
struct SlabAndPointers {
SymbolSlab Slab;
std::vector<const Symbol *> Pointers;
};
// Create a slab of symbols with the given qualified names as IDs and names.
SymbolSlab generateSymbols(std::vector<std::string> QualifiedNames);
// Create a slab of symbols with the given qualified names as both IDs and
// names. The life time of the slab is managed by the returned shared pointer.
// If \p WeakSymbols is provided, it will be pointed to the managed object in
// the returned shared pointer.
std::shared_ptr<std::vector<const Symbol *>>
generateSymbols(std::vector<std::string> QualifiedNames,
std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
// to the `generateSymbols` above.
std::shared_ptr<std::vector<const Symbol *>>
generateNumSymbols(int Begin, int End,
std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
// Create a slab of symbols with IDs and names [Begin, End].
SymbolSlab generateNumSymbols(int Begin, int End);
// Returns fully-qualified name out of given symbol.
std::string getQualifiedName(const Symbol &Sym);