forked from OSchip/llvm-project
[clangd] Proof-of-concept query iterators for Dex symbol index
This patch introduces three essential types of query iterators: `DocumentIterator`, `AndIterator`, `OrIterator`. It provides a convenient API for query tree generation and serves as a building block for the next generation symbol index - Dex. Currently, many optimizations are missed to improve code readability and to serve as the reference implementation. Potential improvements are briefly mentioned in `FIXME`s and will be addressed in the following patches. Dex RFC in the mailing list: http://lists.llvm.org/pipermail/clangd-dev/2018-July/000022.html Iterators, their applications and potential extensions are explained in detail in the design proposal: https://docs.google.com/document/d/1C-A6PGT6TynyaX4PXyExNMiGmJ2jL1UwV91Kyx11gOI/edit#heading=h.903u1zon9nkj Reviewers: ioeric, sammccall, ilya-biryukov Subscribers: cfe-commits, klimek, jfb, mgrang, mgorny, MaskRay, jkorous, arphaman Differential Revision: https://reviews.llvm.org/D49546 llvm-svn: 338017
This commit is contained in:
parent
1387159b93
commit
bea258d3d7
|
@ -43,6 +43,7 @@ add_clang_library(clangDaemon
|
|||
index/SymbolCollector.cpp
|
||||
index/SymbolYAML.cpp
|
||||
|
||||
index/dex/Iterator.cpp
|
||||
index/dex/Trigram.cpp
|
||||
|
||||
LINK_LIBS
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Iterator.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <numeric>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace dex {
|
||||
|
||||
namespace {
|
||||
|
||||
/// Implements Iterator over a PostingList. DocumentIterator is the most basic
|
||||
/// iterator: it doesn't have any children (hence it is the leaf of iterator
|
||||
/// tree) and is simply a wrapper around PostingList::const_iterator.
|
||||
class DocumentIterator : public Iterator {
|
||||
public:
|
||||
DocumentIterator(PostingListRef Documents)
|
||||
: Documents(Documents), Index(std::begin(Documents)) {}
|
||||
|
||||
bool reachedEnd() const override { return Index == std::end(Documents); }
|
||||
|
||||
/// Advances cursor to the next item.
|
||||
void advance() override {
|
||||
assert(!reachedEnd() && "DocumentIterator can't advance at the end.");
|
||||
++Index;
|
||||
}
|
||||
|
||||
/// Applies binary search to advance cursor to the next item with DocID equal
|
||||
/// or higher than the given one.
|
||||
void advanceTo(DocID ID) override {
|
||||
assert(!reachedEnd() && "DocumentIterator can't advance at the end.");
|
||||
Index = std::lower_bound(Index, std::end(Documents), ID);
|
||||
}
|
||||
|
||||
DocID peek() const override {
|
||||
assert(!reachedEnd() && "DocumentIterator can't call peek() at the end.");
|
||||
return *Index;
|
||||
}
|
||||
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << '[';
|
||||
auto Separator = "";
|
||||
for (const auto &ID : Documents) {
|
||||
OS << Separator << ID;
|
||||
Separator = ", ";
|
||||
}
|
||||
OS << ']';
|
||||
return OS;
|
||||
}
|
||||
|
||||
private:
|
||||
PostingListRef Documents;
|
||||
PostingListRef::const_iterator Index;
|
||||
};
|
||||
|
||||
/// Implements Iterator over the intersection of other iterators.
|
||||
///
|
||||
/// AndIterator iterates through common items among all children. It becomes
|
||||
/// exhausted as soon as any child becomes exhausted. After each mutation, the
|
||||
/// iterator restores the invariant: all children must point to the same item.
|
||||
class AndIterator : public Iterator {
|
||||
public:
|
||||
AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
|
||||
: Children(std::move(AllChildren)) {
|
||||
assert(!Children.empty() && "AndIterator should have at least one child.");
|
||||
// Establish invariants.
|
||||
sync();
|
||||
}
|
||||
|
||||
bool reachedEnd() const override { return ReachedEnd; }
|
||||
|
||||
/// Advances all children to the next common item.
|
||||
void advance() override {
|
||||
assert(!reachedEnd() && "AndIterator can't call advance() at the end.");
|
||||
Children.front()->advance();
|
||||
sync();
|
||||
}
|
||||
|
||||
/// Advances all children to the next common item with DocumentID >= ID.
|
||||
void advanceTo(DocID ID) override {
|
||||
assert(!reachedEnd() && "AndIterator can't call advanceTo() at the end.");
|
||||
Children.front()->advanceTo(ID);
|
||||
sync();
|
||||
}
|
||||
|
||||
DocID peek() const override { return Children.front()->peek(); }
|
||||
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << "(& ";
|
||||
auto Separator = "";
|
||||
for (const auto &Child : Children) {
|
||||
OS << Separator << *Child;
|
||||
Separator = " ";
|
||||
}
|
||||
OS << ')';
|
||||
return OS;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Restores class invariants: each child will point to the same element after
|
||||
/// sync.
|
||||
void sync() {
|
||||
ReachedEnd |= Children.front()->reachedEnd();
|
||||
if (ReachedEnd)
|
||||
return;
|
||||
auto SyncID = Children.front()->peek();
|
||||
// Indicates whether any child needs to be advanced to new SyncID.
|
||||
bool NeedsAdvance = false;
|
||||
do {
|
||||
NeedsAdvance = false;
|
||||
for (auto &Child : Children) {
|
||||
Child->advanceTo(SyncID);
|
||||
ReachedEnd |= Child->reachedEnd();
|
||||
// If any child reaches end And iterator can not match any other items.
|
||||
// In this case, just terminate the process.
|
||||
if (ReachedEnd)
|
||||
return;
|
||||
// If any child goes beyond given ID (i.e. ID is not the common item),
|
||||
// all children should be advanced to the next common item.
|
||||
// FIXME(kbobyrev): This is not a very optimized version; after costs
|
||||
// are introduced, cycle should break whenever ID exceeds current one
|
||||
// and cheapest children should be advanced over again.
|
||||
if (Child->peek() > SyncID) {
|
||||
SyncID = Child->peek();
|
||||
NeedsAdvance = true;
|
||||
}
|
||||
}
|
||||
} while (NeedsAdvance);
|
||||
}
|
||||
|
||||
/// AndIterator owns its children and ensures that all of them point to the
|
||||
/// same element. As soon as one child gets exhausted, AndIterator can no
|
||||
/// longer advance and has reached its end.
|
||||
std::vector<std::unique_ptr<Iterator>> Children;
|
||||
/// Indicates whether any child is exhausted. It is cheaper to maintain and
|
||||
/// update the field, rather than traversing the whole subtree in each
|
||||
/// reachedEnd() call.
|
||||
bool ReachedEnd = false;
|
||||
};
|
||||
|
||||
/// Implements Iterator over the union of other iterators.
|
||||
///
|
||||
/// OrIterator iterates through all items which can be pointed to by at least
|
||||
/// one child. To preserve the sorted order, this iterator always advances the
|
||||
/// child with smallest Child->peek() value. OrIterator becomes exhausted as
|
||||
/// soon as all of its children are exhausted.
|
||||
class OrIterator : public Iterator {
|
||||
public:
|
||||
OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
|
||||
: Children(std::move(AllChildren)) {
|
||||
assert(Children.size() > 0 && "Or Iterator must have at least one child.");
|
||||
}
|
||||
|
||||
/// Returns true if all children are exhausted.
|
||||
bool reachedEnd() const override {
|
||||
return std::all_of(begin(Children), end(Children),
|
||||
[](const std::unique_ptr<Iterator> &Child) {
|
||||
return Child->reachedEnd();
|
||||
});
|
||||
}
|
||||
|
||||
/// Moves each child pointing to the smallest DocID to the next item.
|
||||
void advance() override {
|
||||
assert(!reachedEnd() &&
|
||||
"OrIterator must have at least one child to advance().");
|
||||
const auto SmallestID = peek();
|
||||
for (const auto &Child : Children)
|
||||
if (!Child->reachedEnd() && Child->peek() == SmallestID)
|
||||
Child->advance();
|
||||
}
|
||||
|
||||
/// Advances each child to the next existing element with DocumentID >= ID.
|
||||
void advanceTo(DocID ID) override {
|
||||
assert(!reachedEnd() && "Can't advance iterator after it reached the end.");
|
||||
for (const auto &Child : Children)
|
||||
if (!Child->reachedEnd())
|
||||
Child->advanceTo(ID);
|
||||
}
|
||||
|
||||
/// Returns the element under cursor of the child with smallest Child->peek()
|
||||
/// value.
|
||||
DocID peek() const override {
|
||||
assert(!reachedEnd() &&
|
||||
"OrIterator must have at least one child to peek().");
|
||||
DocID Result = std::numeric_limits<DocID>::max();
|
||||
|
||||
for (const auto &Child : Children)
|
||||
if (!Child->reachedEnd())
|
||||
Result = std::min(Result, Child->peek());
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
|
||||
OS << "(| ";
|
||||
auto Separator = "";
|
||||
for (const auto &Child : Children) {
|
||||
OS << Separator << *Child;
|
||||
Separator = " ";
|
||||
}
|
||||
OS << ')';
|
||||
return OS;
|
||||
}
|
||||
|
||||
private:
|
||||
// FIXME(kbobyrev): Would storing Children in min-heap be faster?
|
||||
std::vector<std::unique_ptr<Iterator>> Children;
|
||||
};
|
||||
|
||||
} // end namespace
|
||||
|
||||
std::vector<DocID> consume(Iterator &It) {
|
||||
std::vector<DocID> Result;
|
||||
for (; !It.reachedEnd(); It.advance())
|
||||
Result.push_back(It.peek());
|
||||
return Result;
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator> create(PostingListRef Documents) {
|
||||
return llvm::make_unique<DocumentIterator>(Documents);
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator>
|
||||
createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
|
||||
return llvm::make_unique<AndIterator>(move(Children));
|
||||
}
|
||||
|
||||
std::unique_ptr<Iterator>
|
||||
createOr(std::vector<std::unique_ptr<Iterator>> Children) {
|
||||
return llvm::make_unique<OrIterator>(move(Children));
|
||||
}
|
||||
|
||||
} // namespace dex
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
|
@ -0,0 +1,143 @@
|
|||
//===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Symbol index queries consist of specific requirements for the requested
|
||||
// symbol, such as high fuzzy matching score, scope, type etc. The lists of all
|
||||
// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope)
|
||||
// are expressed in a form of Search Tokens which are stored in the inverted
|
||||
// index. Inverted index maps these tokens to the posting lists - sorted ( by
|
||||
// symbol quality) sequences of symbol IDs matching the token, e.g. scope token
|
||||
// "clangd::clangd::" is mapped to the list of IDs of all symbols which are
|
||||
// declared in this namespace. Search queries are build from a set of
|
||||
// requirements which can be combined with each other forming the query trees.
|
||||
// The leafs of such trees are posting lists, and the nodes are operations on
|
||||
// these posting lists, e.g. intersection or union. Efficient processing of
|
||||
// these multi-level queries is handled by Iterators. Iterators advance through
|
||||
// all leaf posting lists producing the result of search query, which preserves
|
||||
// the sorted order of IDs. Having the resulting IDs sorted is important,
|
||||
// because it allows receiving a certain number of the most valuable items (e.g.
|
||||
// symbols with highest quality which was the sorting key in the first place)
|
||||
// without processing all items with requested properties (this might not be
|
||||
// computationally effective if search request is not very restrictive).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace dex {
|
||||
|
||||
/// Symbol position in the list of all index symbols sorted by a pre-computed
|
||||
/// symbol quality.
|
||||
using DocID = uint32_t;
|
||||
/// Contains sorted sequence of DocIDs all of which belong to symbols matching
|
||||
/// certain criteria, i.e. containing a Search Token. PostingLists are values
|
||||
/// for the inverted index.
|
||||
using PostingList = std::vector<DocID>;
|
||||
/// Immutable reference to PostingList object.
|
||||
using PostingListRef = llvm::ArrayRef<DocID>;
|
||||
|
||||
/// Iterator is the interface for Query Tree node. The simplest type of Iterator
|
||||
/// is DocumentIterator which is simply a wrapper around PostingList iterator
|
||||
/// and serves as the Query Tree leaf. More sophisticated examples of iterators
|
||||
/// can manage intersection, union of the elements produced by other iterators
|
||||
/// (their children) to form a multi-level Query Tree. The interface is designed
|
||||
/// to be extensible in order to support multiple types of iterators.
|
||||
class Iterator {
|
||||
// FIXME(kbobyrev): Provide callback for matched documents.
|
||||
// FIXME(kbobyrev): Implement new types of iterators: Label, Boost (with
|
||||
// scoring), Limit.
|
||||
// FIXME(kbobyrev): Implement iterator cost, an estimate of advance() calls
|
||||
// before iterator exhaustion.
|
||||
public:
|
||||
/// Returns true if all valid DocIDs were processed and hence the iterator is
|
||||
/// exhausted.
|
||||
virtual bool reachedEnd() const = 0;
|
||||
/// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted
|
||||
/// and proceeds to the END.
|
||||
///
|
||||
/// Note: reachedEnd() must be false.
|
||||
virtual void advance() = 0;
|
||||
/// Moves to the first valid DocID which is equal or higher than given ID. If
|
||||
/// it doesn't exist, the iterator is exhausted and proceeds to the END.
|
||||
///
|
||||
/// Note: reachedEnd() must be false.
|
||||
virtual void advanceTo(DocID ID) = 0;
|
||||
/// Returns the current element this iterator points to.
|
||||
///
|
||||
/// Note: reachedEnd() must be false.
|
||||
virtual DocID peek() const = 0;
|
||||
|
||||
virtual ~Iterator() {}
|
||||
|
||||
/// Prints a convenient human-readable iterator representation by recursively
|
||||
/// dumping iterators in the following format:
|
||||
///
|
||||
/// (Type Child1 Child2 ...)
|
||||
///
|
||||
/// Where Type is the iterator type representation: "&" for And, "|" for Or,
|
||||
/// ChildN is N-th iterator child. Raw iterators over PostingList are
|
||||
/// represented as "[ID1, ID2, ...]" where IDN is N-th PostingList entry.
|
||||
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
||||
const Iterator &Iterator) {
|
||||
return Iterator.dump(OS);
|
||||
}
|
||||
|
||||
private:
|
||||
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
|
||||
};
|
||||
|
||||
/// Exhausts given iterator and returns all processed DocIDs. The result
|
||||
/// contains sorted DocumentIDs.
|
||||
std::vector<DocID> consume(Iterator &It);
|
||||
|
||||
/// Returns a document iterator over given PostingList.
|
||||
std::unique_ptr<Iterator> create(PostingListRef Documents);
|
||||
|
||||
/// Returns AND Iterator which performs the intersection of the PostingLists of
|
||||
/// its children.
|
||||
std::unique_ptr<Iterator>
|
||||
createAnd(std::vector<std::unique_ptr<Iterator>> Children);
|
||||
|
||||
/// Returns OR Iterator which performs the union of the PostingLists of its
|
||||
/// children.
|
||||
std::unique_ptr<Iterator>
|
||||
createOr(std::vector<std::unique_ptr<Iterator>> Children);
|
||||
|
||||
/// This allows createAnd({create(...), create(...)}) syntax.
|
||||
template <size_t Size>
|
||||
std::unique_ptr<Iterator>
|
||||
createAnd(std::unique_ptr<Iterator>(&&Children)[Size]) {
|
||||
std::vector<std::unique_ptr<Iterator>> Elements;
|
||||
std::move(begin(Children), end(Children), std::back_inserter(Elements));
|
||||
return createAnd(move(Elements));
|
||||
}
|
||||
|
||||
/// This allows createOr({create(...), create(...)}) syntax.
|
||||
template <size_t Size>
|
||||
std::unique_ptr<Iterator>
|
||||
createOr(std::unique_ptr<Iterator>(&&Children)[Size]) {
|
||||
std::vector<std::unique_ptr<Iterator>> Elements;
|
||||
std::move(begin(Children), end(Children), std::back_inserter(Elements));
|
||||
return createOr(move(Elements));
|
||||
}
|
||||
|
||||
} // namespace dex
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
|
@ -7,11 +7,13 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "index/dex/Iterator.h"
|
||||
#include "index/dex/Token.h"
|
||||
#include "index/dex/Trigram.h"
|
||||
#include "llvm/Support/ScopedPrinter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -19,6 +21,215 @@ namespace clang {
|
|||
namespace clangd {
|
||||
namespace dex {
|
||||
|
||||
using ::testing::ElementsAre;
|
||||
|
||||
TEST(DexIndexIterators, DocumentIterator) {
|
||||
auto DocIterator = create({4, 7, 8, 20, 42, 100});
|
||||
|
||||
EXPECT_EQ(DocIterator->peek(), 4U);
|
||||
EXPECT_EQ(DocIterator->reachedEnd(), false);
|
||||
|
||||
DocIterator->advance();
|
||||
EXPECT_EQ(DocIterator->peek(), 7U);
|
||||
EXPECT_EQ(DocIterator->reachedEnd(), false);
|
||||
|
||||
DocIterator->advanceTo(20);
|
||||
EXPECT_EQ(DocIterator->peek(), 20U);
|
||||
EXPECT_EQ(DocIterator->reachedEnd(), false);
|
||||
|
||||
DocIterator->advanceTo(65);
|
||||
EXPECT_EQ(DocIterator->peek(), 100U);
|
||||
EXPECT_EQ(DocIterator->reachedEnd(), false);
|
||||
|
||||
DocIterator->advanceTo(420);
|
||||
EXPECT_EQ(DocIterator->reachedEnd(), true);
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, AndWithEmpty) {
|
||||
const PostingList L0;
|
||||
const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
|
||||
auto AndEmpty = createAnd({create(L0)});
|
||||
EXPECT_EQ(AndEmpty->reachedEnd(), true);
|
||||
|
||||
auto AndWithEmpty = createAnd({create(L0), create(L1)});
|
||||
EXPECT_EQ(AndWithEmpty->reachedEnd(), true);
|
||||
|
||||
EXPECT_THAT(consume(*AndWithEmpty), ElementsAre());
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, AndTwoLists) {
|
||||
const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
|
||||
|
||||
auto And = createAnd({create(L1), create(L0)});
|
||||
|
||||
EXPECT_EQ(And->reachedEnd(), false);
|
||||
EXPECT_THAT(consume(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
|
||||
|
||||
And = createAnd({create(L0), create(L1)});
|
||||
|
||||
And->advanceTo(0);
|
||||
EXPECT_EQ(And->peek(), 0U);
|
||||
And->advanceTo(5);
|
||||
EXPECT_EQ(And->peek(), 7U);
|
||||
And->advanceTo(10);
|
||||
EXPECT_EQ(And->peek(), 10U);
|
||||
And->advanceTo(42);
|
||||
EXPECT_EQ(And->peek(), 320U);
|
||||
And->advanceTo(8999);
|
||||
EXPECT_EQ(And->peek(), 9000U);
|
||||
And->advanceTo(9001);
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, AndThreeLists) {
|
||||
const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
|
||||
const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
|
||||
|
||||
auto And = createAnd({create(L0), create(L1), create(L2)});
|
||||
EXPECT_EQ(And->peek(), 7U);
|
||||
And->advanceTo(300);
|
||||
EXPECT_EQ(And->peek(), 320U);
|
||||
And->advanceTo(100000);
|
||||
|
||||
EXPECT_EQ(And->reachedEnd(), true);
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, OrWithEmpty) {
|
||||
const PostingList L0;
|
||||
const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
|
||||
auto OrEmpty = createOr({create(L0)});
|
||||
EXPECT_EQ(OrEmpty->reachedEnd(), true);
|
||||
|
||||
auto OrWithEmpty = createOr({create(L0), create(L1)});
|
||||
EXPECT_EQ(OrWithEmpty->reachedEnd(), false);
|
||||
|
||||
EXPECT_THAT(consume(*OrWithEmpty),
|
||||
ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, OrTwoLists) {
|
||||
const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
|
||||
|
||||
auto Or = createOr({create(L0), create(L1)});
|
||||
|
||||
EXPECT_EQ(Or->reachedEnd(), false);
|
||||
EXPECT_EQ(Or->peek(), 0U);
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 4U);
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 5U);
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 7U);
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 10U);
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 30U);
|
||||
Or->advanceTo(42);
|
||||
EXPECT_EQ(Or->peek(), 42U);
|
||||
Or->advanceTo(300);
|
||||
EXPECT_EQ(Or->peek(), 320U);
|
||||
Or->advanceTo(9000);
|
||||
EXPECT_EQ(Or->peek(), 9000U);
|
||||
Or->advanceTo(9001);
|
||||
EXPECT_EQ(Or->reachedEnd(), true);
|
||||
|
||||
Or = createOr({create(L0), create(L1)});
|
||||
|
||||
EXPECT_THAT(consume(*Or),
|
||||
ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, OrThreeLists) {
|
||||
const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
|
||||
const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
|
||||
const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
|
||||
|
||||
auto Or = createOr({create(L0), create(L1), create(L2)});
|
||||
|
||||
EXPECT_EQ(Or->reachedEnd(), false);
|
||||
EXPECT_EQ(Or->peek(), 0U);
|
||||
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 1U);
|
||||
|
||||
Or->advance();
|
||||
EXPECT_EQ(Or->peek(), 4U);
|
||||
|
||||
Or->advanceTo(7);
|
||||
|
||||
Or->advanceTo(59);
|
||||
EXPECT_EQ(Or->peek(), 60U);
|
||||
|
||||
Or->advanceTo(9001);
|
||||
EXPECT_EQ(Or->reachedEnd(), true);
|
||||
}
|
||||
|
||||
// FIXME(kbobyrev): The testcase below is similar to what is expected in real
|
||||
// queries. It should be updated once new iterators (such as boosting, limiting,
|
||||
// etc iterators) appear. However, it is not exhaustive and it would be
|
||||
// beneficial to implement automatic generation of query trees for more
|
||||
// comprehensive testing.
|
||||
TEST(DexIndexIterators, QueryTree) {
|
||||
// An example of more complicated query
|
||||
//
|
||||
// +-----------------+
|
||||
// |And Iterator:1, 5|
|
||||
// +--------+--------+
|
||||
// |
|
||||
// |
|
||||
// +------------------------------------+
|
||||
// | |
|
||||
// | |
|
||||
// +----------v----------+ +----------v---------+
|
||||
// |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 5|
|
||||
// +----------+----------+ +----------+---------+
|
||||
// | |
|
||||
// +------+-----+ +---------+-----------+
|
||||
// | | | | |
|
||||
// +-------v-----+ +----v-----+ +--v--+ +-V--+ +---v---+
|
||||
// |1, 3, 5, 8, 9| |1, 5, 7, 9| |Empty| |0, 5| |0, 1, 5|
|
||||
// +-------------+ +----------+ +-----+ +----+ +-------+
|
||||
|
||||
// Root of the query tree: [1, 5]
|
||||
auto Root = createAnd({
|
||||
// Lower And Iterator: [1, 5, 9]
|
||||
createAnd({create({1, 3, 5, 8, 9}), create({1, 5, 7, 9})}),
|
||||
// Lower Or Iterator: [0, 1, 5]
|
||||
createOr({create({0, 5}), create({0, 1, 5}), create({})}),
|
||||
});
|
||||
|
||||
EXPECT_EQ(Root->reachedEnd(), false);
|
||||
EXPECT_EQ(Root->peek(), 1U);
|
||||
Root->advanceTo(0);
|
||||
// Advance multiple times. Shouldn't do anything.
|
||||
Root->advanceTo(1);
|
||||
Root->advanceTo(0);
|
||||
EXPECT_EQ(Root->peek(), 1U);
|
||||
Root->advance();
|
||||
EXPECT_EQ(Root->peek(), 5U);
|
||||
Root->advanceTo(5);
|
||||
EXPECT_EQ(Root->peek(), 5U);
|
||||
Root->advanceTo(9000);
|
||||
EXPECT_EQ(Root->reachedEnd(), true);
|
||||
}
|
||||
|
||||
TEST(DexIndexIterators, StringRepresentation) {
|
||||
EXPECT_EQ(llvm::to_string(*(create({4, 7, 8, 20, 42, 100}))),
|
||||
"[4, 7, 8, 20, 42, 100]");
|
||||
|
||||
auto Nested = createAnd({
|
||||
createAnd({create({1, 3, 5, 8, 9}), create({1, 5, 7, 9})}),
|
||||
createOr({create({0, 5}), create({0, 1, 5}), create({})}),
|
||||
});
|
||||
|
||||
EXPECT_EQ(llvm::to_string(*Nested),
|
||||
"(& (& [1, 3, 5, 8, 9] [1, 5, 7, 9]) (| [0, 5] [0, 1, 5] []))");
|
||||
}
|
||||
|
||||
testing::Matcher<std::vector<Token>>
|
||||
trigramsAre(std::initializer_list<std::string> Trigrams) {
|
||||
std::vector<Token> Tokens;
|
||||
|
|
Loading…
Reference in New Issue