forked from OSchip/llvm-project
[clangd] Dex iterator printer shows query structure, not iterator state.
Summary: This makes it suitable for logging (which immediately found a bug, to be fixed in the next patch...) Reviewers: ioeric Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D52715 llvm-svn: 343580
This commit is contained in:
parent
26a2536f8b
commit
7402836042
|
@ -16,6 +16,7 @@
|
|||
#include "index/Index.h"
|
||||
#include "index/dex/Iterator.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/Support/ScopedPrinter.h"
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
|
||||
|
@ -85,13 +86,13 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
|
|||
// ProximityPaths. Boosting factor should depend on the distance to the
|
||||
// Proximity Path: the closer processed path is, the higher boosting factor.
|
||||
for (const auto &ParentURI : ParentURIs.keys()) {
|
||||
const auto It =
|
||||
InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
|
||||
Token Tok(Token::Kind::ProximityURI, ParentURI);
|
||||
const auto It = InvertedIndex.find(Tok);
|
||||
if (It != InvertedIndex.end()) {
|
||||
// FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
|
||||
PathProximitySignals.SymbolURI = ParentURI;
|
||||
BoostingIterators.push_back(
|
||||
createBoost(It->second.iterator(), PathProximitySignals.evaluate()));
|
||||
BoostingIterators.push_back(createBoost(It->second.iterator(&It->first),
|
||||
PathProximitySignals.evaluate()));
|
||||
}
|
||||
}
|
||||
return BoostingIterators;
|
||||
|
@ -142,7 +143,6 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
|||
llvm::function_ref<void(const Symbol &)> Callback) const {
|
||||
assert(!StringRef(Req.Query).contains("::") &&
|
||||
"There must be no :: in query.");
|
||||
// FIXME: attach the query tree to the trace span.
|
||||
trace::Span Tracer("Dex fuzzyFind");
|
||||
FuzzyMatcher Filter(Req.Query);
|
||||
bool More = false;
|
||||
|
@ -156,7 +156,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
|||
for (const auto &Trigram : TrigramTokens) {
|
||||
const auto It = InvertedIndex.find(Trigram);
|
||||
if (It != InvertedIndex.end())
|
||||
TrigramIterators.push_back(It->second.iterator());
|
||||
TrigramIterators.push_back(It->second.iterator(&It->first));
|
||||
}
|
||||
if (!TrigramIterators.empty())
|
||||
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
|
||||
|
@ -164,9 +164,10 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
|||
// Generate scope tokens for search query.
|
||||
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
|
||||
for (const auto &Scope : Req.Scopes) {
|
||||
const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
|
||||
Token Tok(Token::Kind::Scope, Scope);
|
||||
const auto It = InvertedIndex.find(Tok);
|
||||
if (It != InvertedIndex.end())
|
||||
ScopeIterators.push_back(It->second.iterator());
|
||||
ScopeIterators.push_back(It->second.iterator(&It->first));
|
||||
}
|
||||
if (Req.AnyScope)
|
||||
ScopeIterators.push_back(createBoost(createTrue(Symbols.size()),
|
||||
|
@ -189,7 +190,8 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
|||
|
||||
if (Req.RestrictForCodeCompletion)
|
||||
TopLevelChildren.push_back(
|
||||
InvertedIndex.find(RestrictedForCodeCompletion)->second.iterator());
|
||||
InvertedIndex.find(RestrictedForCodeCompletion)
|
||||
->second.iterator(&RestrictedForCodeCompletion));
|
||||
|
||||
// Use TRUE iterator if both trigrams and scopes from the query are not
|
||||
// present in the symbol index.
|
||||
|
@ -203,6 +205,8 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
|||
// when the requested number of items is small.
|
||||
auto Root = Req.Limit ? createLimit(move(QueryIterator), *Req.Limit * 100)
|
||||
: move(QueryIterator);
|
||||
SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));
|
||||
vlog("Dex query tree: {0}", *Root);
|
||||
|
||||
using IDAndScore = std::pair<DocID, float>;
|
||||
std::vector<IDAndScore> IDAndScores = consume(*Root);
|
||||
|
|
|
@ -243,8 +243,7 @@ public:
|
|||
|
||||
private:
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << "(TRUE {" << Index << "} out of " << Size << ")";
|
||||
return OS;
|
||||
return OS << "true";
|
||||
}
|
||||
|
||||
DocID Index = 0;
|
||||
|
@ -273,8 +272,7 @@ public:
|
|||
|
||||
private:
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << "(BOOST " << Factor << ' ' << *Child << ')';
|
||||
return OS;
|
||||
return OS << "(* " << Factor << ' ' << *Child << ')';
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator> Child;
|
||||
|
@ -314,8 +312,7 @@ public:
|
|||
|
||||
private:
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
|
||||
return OS;
|
||||
return OS << "(LIMIT " << Limit << " " << *Child << ')';
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator> Child;
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "PostingList.h"
|
||||
#include "Iterator.h"
|
||||
#include "Token.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
|
@ -23,8 +24,8 @@ namespace {
|
|||
/// them on-the-fly when the contents of chunk are to be seen.
|
||||
class ChunkIterator : public Iterator {
|
||||
public:
|
||||
explicit ChunkIterator(llvm::ArrayRef<Chunk> Chunks)
|
||||
: Chunks(Chunks), CurrentChunk(Chunks.begin()) {
|
||||
explicit ChunkIterator(const Token *Tok, llvm::ArrayRef<Chunk> Chunks)
|
||||
: Tok(Tok), Chunks(Chunks), CurrentChunk(Chunks.begin()) {
|
||||
if (!Chunks.empty()) {
|
||||
DecompressedChunk = CurrentChunk->decompress();
|
||||
CurrentID = DecompressedChunk.begin();
|
||||
|
@ -71,15 +72,16 @@ public:
|
|||
|
||||
private:
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
if (Tok != nullptr)
|
||||
return OS << *Tok;
|
||||
OS << '[';
|
||||
if (CurrentChunk != Chunks.begin() ||
|
||||
(CurrentID != DecompressedChunk.begin() && !DecompressedChunk.empty()))
|
||||
OS << "... ";
|
||||
OS << (reachedEnd() ? "END" : std::to_string(*CurrentID));
|
||||
if (!reachedEnd() && CurrentID < DecompressedChunk.end() - 1)
|
||||
OS << " ...";
|
||||
OS << ']';
|
||||
return OS;
|
||||
const char *Sep = "";
|
||||
for (const Chunk &C : Chunks)
|
||||
for (const DocID Doc : C.decompress()) {
|
||||
OS << Sep << Doc;
|
||||
Sep = " ";
|
||||
}
|
||||
return OS << ']';
|
||||
}
|
||||
|
||||
/// If the cursor is at the end of a chunk, place it at the start of the next
|
||||
|
@ -110,6 +112,7 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
const Token *Tok;
|
||||
llvm::ArrayRef<Chunk> Chunks;
|
||||
/// Iterator over chunks.
|
||||
/// If CurrentChunk is valid, then DecompressedChunk is
|
||||
|
@ -217,8 +220,8 @@ llvm::SmallVector<DocID, Chunk::PayloadSize + 1> Chunk::decompress() const {
|
|||
PostingList::PostingList(llvm::ArrayRef<DocID> Documents)
|
||||
: Chunks(encodeStream(Documents)) {}
|
||||
|
||||
std::unique_ptr<Iterator> PostingList::iterator() const {
|
||||
return llvm::make_unique<ChunkIterator>(Chunks);
|
||||
std::unique_ptr<Iterator> PostingList::iterator(const Token *Tok) const {
|
||||
return llvm::make_unique<ChunkIterator>(Tok, Chunks);
|
||||
}
|
||||
|
||||
} // namespace dex
|
||||
|
|
|
@ -33,8 +33,7 @@
|
|||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace dex {
|
||||
|
||||
class Iterator;
|
||||
struct Token;
|
||||
|
||||
/// NOTE: This is an implementation detail.
|
||||
///
|
||||
|
@ -64,7 +63,8 @@ public:
|
|||
|
||||
/// Constructs DocumentIterator over given posting list. DocumentIterator will
|
||||
/// go through the chunks and decompress them on-the-fly when necessary.
|
||||
std::unique_ptr<Iterator> iterator() const;
|
||||
/// If given, Tok is only used for the string representation.
|
||||
std::unique_ptr<Iterator> iterator(const Token *Tok = nullptr) const;
|
||||
|
||||
/// Returns in-memory size of external storage.
|
||||
size_t bytes() const { return Chunks.capacity() * sizeof(Chunk); }
|
||||
|
|
|
@ -82,6 +82,20 @@ struct Token {
|
|||
Kind TokenKind;
|
||||
|
||||
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
|
||||
switch (T.TokenKind) {
|
||||
case Kind::Trigram:
|
||||
OS << "T=";
|
||||
break;
|
||||
case Kind::Scope:
|
||||
OS << "S=";
|
||||
break;
|
||||
case Kind::ProximityURI:
|
||||
OS << "U=";
|
||||
break;
|
||||
case Kind::Sentinel:
|
||||
OS << "?=";
|
||||
break;
|
||||
}
|
||||
return OS << T.Data;
|
||||
}
|
||||
|
||||
|
|
|
@ -226,18 +226,20 @@ TEST(DexIterators, QueryTree) {
|
|||
}
|
||||
|
||||
TEST(DexIterators, StringRepresentation) {
|
||||
const PostingList L0({4, 7, 8, 20, 42, 100});
|
||||
const PostingList L1({1, 3, 5, 8, 9});
|
||||
const PostingList L2({1, 5, 7, 9});
|
||||
const PostingList L3({0, 5});
|
||||
const PostingList L4({0, 1, 5});
|
||||
const PostingList L1({1, 3, 5});
|
||||
const PostingList L2({1, 7, 9});
|
||||
|
||||
EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4 ...]");
|
||||
auto It = L0.iterator();
|
||||
It->advanceTo(19);
|
||||
EXPECT_EQ(llvm::to_string(*It), "[... 20 ...]");
|
||||
It->advanceTo(9000);
|
||||
EXPECT_EQ(llvm::to_string(*It), "[... END]");
|
||||
// No token given, prints full posting list.
|
||||
auto I1 = L1.iterator();
|
||||
EXPECT_EQ(llvm::to_string(*I1), "[1 3 5]");
|
||||
|
||||
// Token given, uses token's string representation.
|
||||
Token Tok(Token::Kind::Trigram, "L2");
|
||||
auto I2 = L1.iterator(&Tok);
|
||||
EXPECT_EQ(llvm::to_string(*I2), "T=L2");
|
||||
|
||||
auto Tree = createLimit(createAnd(move(I1), move(I2)), 10);
|
||||
EXPECT_EQ(llvm::to_string(*Tree), "(LIMIT 10 (& [1 3 5] T=L2))");
|
||||
}
|
||||
|
||||
TEST(DexIterators, Limit) {
|
||||
|
|
Loading…
Reference in New Issue