[clangd] Dex iterator printer shows query structure, not iterator state.

Summary:
This makes it suitable for logging (which immediately found a bug, to
be fixed in the next patch...)

Reviewers: ioeric

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52715

llvm-svn: 343580
This commit is contained in:
Sam McCall 2018-10-02 11:51:36 +00:00
parent 26a2536f8b
commit 7402836042
6 changed files with 61 additions and 41 deletions

View File

@ -16,6 +16,7 @@
#include "index/Index.h"
#include "index/dex/Iterator.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/ScopedPrinter.h"
#include <algorithm>
#include <queue>
@ -85,13 +86,13 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
// ProximityPaths. Boosting factor should depend on the distance to the
// Proximity Path: the closer processed path is, the higher boosting factor.
for (const auto &ParentURI : ParentURIs.keys()) {
const auto It =
InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
Token Tok(Token::Kind::ProximityURI, ParentURI);
const auto It = InvertedIndex.find(Tok);
if (It != InvertedIndex.end()) {
// FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
PathProximitySignals.SymbolURI = ParentURI;
BoostingIterators.push_back(
createBoost(It->second.iterator(), PathProximitySignals.evaluate()));
BoostingIterators.push_back(createBoost(It->second.iterator(&It->first),
PathProximitySignals.evaluate()));
}
}
return BoostingIterators;
@ -142,7 +143,6 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
llvm::function_ref<void(const Symbol &)> Callback) const {
assert(!StringRef(Req.Query).contains("::") &&
"There must be no :: in query.");
// FIXME: attach the query tree to the trace span.
trace::Span Tracer("Dex fuzzyFind");
FuzzyMatcher Filter(Req.Query);
bool More = false;
@ -156,7 +156,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
for (const auto &Trigram : TrigramTokens) {
const auto It = InvertedIndex.find(Trigram);
if (It != InvertedIndex.end())
TrigramIterators.push_back(It->second.iterator());
TrigramIterators.push_back(It->second.iterator(&It->first));
}
if (!TrigramIterators.empty())
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
@ -164,9 +164,10 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
// Generate scope tokens for search query.
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
for (const auto &Scope : Req.Scopes) {
const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
Token Tok(Token::Kind::Scope, Scope);
const auto It = InvertedIndex.find(Tok);
if (It != InvertedIndex.end())
ScopeIterators.push_back(It->second.iterator());
ScopeIterators.push_back(It->second.iterator(&It->first));
}
if (Req.AnyScope)
ScopeIterators.push_back(createBoost(createTrue(Symbols.size()),
@ -189,7 +190,8 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
if (Req.RestrictForCodeCompletion)
TopLevelChildren.push_back(
InvertedIndex.find(RestrictedForCodeCompletion)->second.iterator());
InvertedIndex.find(RestrictedForCodeCompletion)
->second.iterator(&RestrictedForCodeCompletion));
// Use TRUE iterator if both trigrams and scopes from the query are not
// present in the symbol index.
@ -203,6 +205,8 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
// when the requested number of items is small.
auto Root = Req.Limit ? createLimit(move(QueryIterator), *Req.Limit * 100)
: move(QueryIterator);
SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));
vlog("Dex query tree: {0}", *Root);
using IDAndScore = std::pair<DocID, float>;
std::vector<IDAndScore> IDAndScores = consume(*Root);

View File

@ -243,8 +243,7 @@ public:
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
OS << "(TRUE {" << Index << "} out of " << Size << ")";
return OS;
return OS << "true";
}
DocID Index = 0;
@ -273,8 +272,7 @@ public:
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
OS << "(BOOST " << Factor << ' ' << *Child << ')';
return OS;
return OS << "(* " << Factor << ' ' << *Child << ')';
}
std::unique_ptr<Iterator> Child;
@ -314,8 +312,7 @@ public:
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
return OS;
return OS << "(LIMIT " << Limit << " " << *Child << ')';
}
std::unique_ptr<Iterator> Child;

View File

@ -9,6 +9,7 @@
#include "PostingList.h"
#include "Iterator.h"
#include "Token.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
@ -23,8 +24,8 @@ namespace {
/// them on-the-fly when the contents of chunk are to be seen.
class ChunkIterator : public Iterator {
public:
explicit ChunkIterator(llvm::ArrayRef<Chunk> Chunks)
: Chunks(Chunks), CurrentChunk(Chunks.begin()) {
explicit ChunkIterator(const Token *Tok, llvm::ArrayRef<Chunk> Chunks)
: Tok(Tok), Chunks(Chunks), CurrentChunk(Chunks.begin()) {
if (!Chunks.empty()) {
DecompressedChunk = CurrentChunk->decompress();
CurrentID = DecompressedChunk.begin();
@ -71,15 +72,16 @@ public:
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
if (Tok != nullptr)
return OS << *Tok;
OS << '[';
if (CurrentChunk != Chunks.begin() ||
(CurrentID != DecompressedChunk.begin() && !DecompressedChunk.empty()))
OS << "... ";
OS << (reachedEnd() ? "END" : std::to_string(*CurrentID));
if (!reachedEnd() && CurrentID < DecompressedChunk.end() - 1)
OS << " ...";
OS << ']';
return OS;
const char *Sep = "";
for (const Chunk &C : Chunks)
for (const DocID Doc : C.decompress()) {
OS << Sep << Doc;
Sep = " ";
}
return OS << ']';
}
/// If the cursor is at the end of a chunk, place it at the start of the next
@ -110,6 +112,7 @@ private:
}
}
const Token *Tok;
llvm::ArrayRef<Chunk> Chunks;
/// Iterator over chunks.
/// If CurrentChunk is valid, then DecompressedChunk is
@ -217,8 +220,8 @@ llvm::SmallVector<DocID, Chunk::PayloadSize + 1> Chunk::decompress() const {
PostingList::PostingList(llvm::ArrayRef<DocID> Documents)
: Chunks(encodeStream(Documents)) {}
std::unique_ptr<Iterator> PostingList::iterator() const {
return llvm::make_unique<ChunkIterator>(Chunks);
std::unique_ptr<Iterator> PostingList::iterator(const Token *Tok) const {
return llvm::make_unique<ChunkIterator>(Tok, Chunks);
}
} // namespace dex

View File

@ -33,8 +33,7 @@
namespace clang {
namespace clangd {
namespace dex {
class Iterator;
struct Token;
/// NOTE: This is an implementation detail.
///
@ -64,7 +63,8 @@ public:
/// Constructs DocumentIterator over given posting list. DocumentIterator will
/// go through the chunks and decompress them on-the-fly when necessary.
std::unique_ptr<Iterator> iterator() const;
/// If given, Tok is only used for the string representation.
std::unique_ptr<Iterator> iterator(const Token *Tok = nullptr) const;
/// Returns in-memory size of external storage.
size_t bytes() const { return Chunks.capacity() * sizeof(Chunk); }

View File

@ -82,6 +82,20 @@ struct Token {
Kind TokenKind;
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
switch (T.TokenKind) {
case Kind::Trigram:
OS << "T=";
break;
case Kind::Scope:
OS << "S=";
break;
case Kind::ProximityURI:
OS << "U=";
break;
case Kind::Sentinel:
OS << "?=";
break;
}
return OS << T.Data;
}

View File

@ -226,18 +226,20 @@ TEST(DexIterators, QueryTree) {
}
TEST(DexIterators, StringRepresentation) {
const PostingList L0({4, 7, 8, 20, 42, 100});
const PostingList L1({1, 3, 5, 8, 9});
const PostingList L2({1, 5, 7, 9});
const PostingList L3({0, 5});
const PostingList L4({0, 1, 5});
const PostingList L1({1, 3, 5});
const PostingList L2({1, 7, 9});
EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4 ...]");
auto It = L0.iterator();
It->advanceTo(19);
EXPECT_EQ(llvm::to_string(*It), "[... 20 ...]");
It->advanceTo(9000);
EXPECT_EQ(llvm::to_string(*It), "[... END]");
// No token given, prints full posting list.
auto I1 = L1.iterator();
EXPECT_EQ(llvm::to_string(*I1), "[1 3 5]");
// Token given, uses token's string representation.
Token Tok(Token::Kind::Trigram, "L2");
auto I2 = L1.iterator(&Tok);
EXPECT_EQ(llvm::to_string(*I2), "T=L2");
auto Tree = createLimit(createAnd(move(I1), move(I2)), 10);
EXPECT_EQ(llvm::to_string(*Tree), "(LIMIT 10 (& [1 3 5] T=L2))");
}
TEST(DexIterators, Limit) {