forked from OSchip/llvm-project
Reland r343589 "[clangd] Dex: add Corpus factory for iterators, rename, fold constant. NFC""
This reverts commit r343610. llvm-svn: 343622
This commit is contained in:
parent
42425ccf50
commit
a659d779f8
|
@ -56,7 +56,8 @@ std::vector<Token> generateSearchTokens(const Symbol &Sym) {
|
||||||
std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
|
std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
|
||||||
llvm::ArrayRef<std::string> ProximityPaths,
|
llvm::ArrayRef<std::string> ProximityPaths,
|
||||||
llvm::ArrayRef<std::string> URISchemes,
|
llvm::ArrayRef<std::string> URISchemes,
|
||||||
const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
|
const llvm::DenseMap<Token, PostingList> &InvertedIndex,
|
||||||
|
const Corpus &Corpus) {
|
||||||
std::vector<std::unique_ptr<Iterator>> BoostingIterators;
|
std::vector<std::unique_ptr<Iterator>> BoostingIterators;
|
||||||
// Deduplicate parent URIs extracted from the ProximityPaths.
|
// Deduplicate parent URIs extracted from the ProximityPaths.
|
||||||
llvm::StringSet<> ParentURIs;
|
llvm::StringSet<> ParentURIs;
|
||||||
|
@ -91,8 +92,8 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
|
||||||
if (It != InvertedIndex.end()) {
|
if (It != InvertedIndex.end()) {
|
||||||
// FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
|
// FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
|
||||||
PathProximitySignals.SymbolURI = ParentURI;
|
PathProximitySignals.SymbolURI = ParentURI;
|
||||||
BoostingIterators.push_back(createBoost(It->second.iterator(&It->first),
|
BoostingIterators.push_back(Corpus.boost(
|
||||||
PathProximitySignals.evaluate()));
|
It->second.iterator(&It->first), PathProximitySignals.evaluate()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return BoostingIterators;
|
return BoostingIterators;
|
||||||
|
@ -101,6 +102,7 @@ std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void Dex::buildIndex() {
|
void Dex::buildIndex() {
|
||||||
|
this->Corpus = dex::Corpus(Symbols.size());
|
||||||
std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
|
std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
|
||||||
|
|
||||||
for (size_t I = 0; I < Symbols.size(); ++I) {
|
for (size_t I = 0; I < Symbols.size(); ++I) {
|
||||||
|
@ -159,7 +161,7 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
||||||
TrigramIterators.push_back(It->second.iterator(&It->first));
|
TrigramIterators.push_back(It->second.iterator(&It->first));
|
||||||
}
|
}
|
||||||
if (!TrigramIterators.empty())
|
if (!TrigramIterators.empty())
|
||||||
TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
|
TopLevelChildren.push_back(Corpus.intersect(move(TrigramIterators)));
|
||||||
|
|
||||||
// Generate scope tokens for search query.
|
// Generate scope tokens for search query.
|
||||||
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
|
std::vector<std::unique_ptr<Iterator>> ScopeIterators;
|
||||||
|
@ -170,22 +172,22 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
||||||
ScopeIterators.push_back(It->second.iterator(&It->first));
|
ScopeIterators.push_back(It->second.iterator(&It->first));
|
||||||
}
|
}
|
||||||
if (Req.AnyScope)
|
if (Req.AnyScope)
|
||||||
ScopeIterators.push_back(createBoost(createTrue(Symbols.size()),
|
ScopeIterators.push_back(
|
||||||
ScopeIterators.empty() ? 1.0 : 0.2));
|
Corpus.boost(Corpus.all(), ScopeIterators.empty() ? 1.0 : 0.2));
|
||||||
|
|
||||||
// Add OR iterator for scopes if there are any Scope Iterators.
|
// Add OR iterator for scopes if there are any Scope Iterators.
|
||||||
if (!ScopeIterators.empty())
|
if (!ScopeIterators.empty())
|
||||||
TopLevelChildren.push_back(createOr(move(ScopeIterators)));
|
TopLevelChildren.push_back(Corpus.unionOf(move(ScopeIterators)));
|
||||||
|
|
||||||
// Add proximity paths boosting.
|
// Add proximity paths boosting.
|
||||||
auto BoostingIterators = createFileProximityIterators(
|
auto BoostingIterators = createFileProximityIterators(
|
||||||
Req.ProximityPaths, URISchemes, InvertedIndex);
|
Req.ProximityPaths, URISchemes, InvertedIndex, Corpus);
|
||||||
// Boosting iterators do not actually filter symbols. In order to preserve
|
// Boosting iterators do not actually filter symbols. In order to preserve
|
||||||
// the validity of resulting query, TRUE iterator should be added along
|
// the validity of resulting query, TRUE iterator should be added along
|
||||||
// BOOSTs.
|
// BOOSTs.
|
||||||
if (!BoostingIterators.empty()) {
|
if (!BoostingIterators.empty()) {
|
||||||
BoostingIterators.push_back(createTrue(Symbols.size()));
|
BoostingIterators.push_back(Corpus.all());
|
||||||
TopLevelChildren.push_back(createOr(move(BoostingIterators)));
|
TopLevelChildren.push_back(Corpus.unionOf(move(BoostingIterators)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Req.RestrictForCodeCompletion)
|
if (Req.RestrictForCodeCompletion)
|
||||||
|
@ -196,14 +198,14 @@ bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
|
||||||
// Use TRUE iterator if both trigrams and scopes from the query are not
|
// Use TRUE iterator if both trigrams and scopes from the query are not
|
||||||
// present in the symbol index.
|
// present in the symbol index.
|
||||||
auto QueryIterator = TopLevelChildren.empty()
|
auto QueryIterator = TopLevelChildren.empty()
|
||||||
? createTrue(Symbols.size())
|
? Corpus.all()
|
||||||
: createAnd(move(TopLevelChildren));
|
: Corpus.intersect(move(TopLevelChildren));
|
||||||
// Retrieve more items than it was requested: some of the items with high
|
// Retrieve more items than it was requested: some of the items with high
|
||||||
// final score might not be retrieved otherwise.
|
// final score might not be retrieved otherwise.
|
||||||
// FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
|
// FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
|
||||||
// using 100x of the requested number might not be good in practice, e.g.
|
// using 100x of the requested number might not be good in practice, e.g.
|
||||||
// when the requested number of items is small.
|
// when the requested number of items is small.
|
||||||
auto Root = Req.Limit ? createLimit(move(QueryIterator), *Req.Limit * 100)
|
auto Root = Req.Limit ? Corpus.limit(move(QueryIterator), *Req.Limit * 100)
|
||||||
: move(QueryIterator);
|
: move(QueryIterator);
|
||||||
SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));
|
SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));
|
||||||
vlog("Dex query tree: {0}", *Root);
|
vlog("Dex query tree: {0}", *Root);
|
||||||
|
|
|
@ -44,7 +44,7 @@ public:
|
||||||
// All symbols must outlive this index.
|
// All symbols must outlive this index.
|
||||||
template <typename Range>
|
template <typename Range>
|
||||||
Dex(Range &&Symbols, llvm::ArrayRef<std::string> Schemes)
|
Dex(Range &&Symbols, llvm::ArrayRef<std::string> Schemes)
|
||||||
: URISchemes(Schemes) {
|
: Corpus(0), URISchemes(Schemes) {
|
||||||
// If Schemes don't contain any items, fall back to SymbolCollector's
|
// If Schemes don't contain any items, fall back to SymbolCollector's
|
||||||
// default URI schemes.
|
// default URI schemes.
|
||||||
if (URISchemes.empty()) {
|
if (URISchemes.empty()) {
|
||||||
|
@ -101,6 +101,7 @@ private:
|
||||||
/// std. Inverted index is used to retrieve posting lists which are processed
|
/// std. Inverted index is used to retrieve posting lists which are processed
|
||||||
/// during the fuzzyFind process.
|
/// during the fuzzyFind process.
|
||||||
llvm::DenseMap<Token, PostingList> InvertedIndex;
|
llvm::DenseMap<Token, PostingList> InvertedIndex;
|
||||||
|
dex::Corpus Corpus;
|
||||||
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
|
std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
|
||||||
// Size of memory retained by KeepAlive.
|
// Size of memory retained by KeepAlive.
|
||||||
size_t BackingDataSize = 0;
|
size_t BackingDataSize = 0;
|
||||||
|
|
|
@ -64,7 +64,7 @@ public:
|
||||||
|
|
||||||
float consume() override {
|
float consume() override {
|
||||||
assert(!reachedEnd() && "AND iterator can't consume() at the end.");
|
assert(!reachedEnd() && "AND iterator can't consume() at the end.");
|
||||||
float Boost = DEFAULT_BOOST_SCORE;
|
float Boost = 1;
|
||||||
for (const auto &Child : Children)
|
for (const auto &Child : Children)
|
||||||
Boost *= Child->consume();
|
Boost *= Child->consume();
|
||||||
return Boost;
|
return Boost;
|
||||||
|
@ -175,12 +175,12 @@ public:
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the maximum boosting score among all Children when iterator is not
|
// Returns the maximum boosting score among all Children when iterator
|
||||||
// exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
|
// points to the current ID.
|
||||||
float consume() override {
|
float consume() override {
|
||||||
assert(!reachedEnd() && "OR iterator can't consume() at the end.");
|
assert(!reachedEnd() && "OR iterator can't consume() at the end.");
|
||||||
const DocID ID = peek();
|
const DocID ID = peek();
|
||||||
float Boost = DEFAULT_BOOST_SCORE;
|
float Boost = 1;
|
||||||
for (const auto &Child : Children)
|
for (const auto &Child : Children)
|
||||||
if (!Child->reachedEnd() && Child->peek() == ID)
|
if (!Child->reachedEnd() && Child->peek() == ID)
|
||||||
Boost = std::max(Boost, Child->consume());
|
Boost = std::max(Boost, Child->consume());
|
||||||
|
@ -236,7 +236,7 @@ public:
|
||||||
|
|
||||||
float consume() override {
|
float consume() override {
|
||||||
assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
|
assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
|
||||||
return DEFAULT_BOOST_SCORE;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t estimateSize() const override { return Size; }
|
size_t estimateSize() const override { return Size; }
|
||||||
|
@ -330,30 +330,30 @@ std::vector<std::pair<DocID, float>> consume(Iterator &It) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Iterator>
|
std::unique_ptr<Iterator>
|
||||||
createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
|
Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
|
||||||
// If there is exactly one child, pull it one level up: AND(Child) -> Child.
|
// If there is exactly one child, pull it one level up: AND(Child) -> Child.
|
||||||
return Children.size() == 1 ? std::move(Children.front())
|
return Children.size() == 1 ? std::move(Children.front())
|
||||||
: llvm::make_unique<AndIterator>(move(Children));
|
: llvm::make_unique<AndIterator>(move(Children));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Iterator>
|
std::unique_ptr<Iterator>
|
||||||
createOr(std::vector<std::unique_ptr<Iterator>> Children) {
|
Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
|
||||||
// If there is exactly one child, pull it one level up: OR(Child) -> Child.
|
// If there is exactly one child, pull it one level up: OR(Child) -> Child.
|
||||||
return Children.size() == 1 ? std::move(Children.front())
|
return Children.size() == 1 ? std::move(Children.front())
|
||||||
: llvm::make_unique<OrIterator>(move(Children));
|
: llvm::make_unique<OrIterator>(move(Children));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Iterator> createTrue(DocID Size) {
|
std::unique_ptr<Iterator> Corpus::all() const {
|
||||||
return llvm::make_unique<TrueIterator>(Size);
|
return llvm::make_unique<TrueIterator>(Size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
|
std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
|
||||||
float Factor) {
|
float Factor) const {
|
||||||
return llvm::make_unique<BoostIterator>(move(Child), Factor);
|
return llvm::make_unique<BoostIterator>(move(Child), Factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
|
std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
|
||||||
size_t Limit) {
|
size_t Limit) const {
|
||||||
return llvm::make_unique<LimitIterator>(move(Child), Limit);
|
return llvm::make_unique<LimitIterator>(move(Child), Limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,8 +98,6 @@ public:
|
||||||
return Iterator.dump(OS);
|
return Iterator.dump(OS);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static float DEFAULT_BOOST_SCORE = 1;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
|
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
|
||||||
};
|
};
|
||||||
|
@ -114,69 +112,74 @@ private:
|
||||||
/// to acquire preliminary scores of requested items.
|
/// to acquire preliminary scores of requested items.
|
||||||
std::vector<std::pair<DocID, float>> consume(Iterator &It);
|
std::vector<std::pair<DocID, float>> consume(Iterator &It);
|
||||||
|
|
||||||
/// Returns AND Iterator which performs the intersection of the PostingLists of
|
namespace detail {
|
||||||
/// its children.
|
// Variadic template machinery.
|
||||||
///
|
inline void populateChildren(std::vector<std::unique_ptr<Iterator>> &) {}
|
||||||
/// consume(): AND Iterator returns the product of Childrens' boosting scores
|
template <typename... TailT>
|
||||||
/// when not exhausted and DEFAULT_BOOST_SCORE otherwise.
|
|
||||||
std::unique_ptr<Iterator>
|
|
||||||
createAnd(std::vector<std::unique_ptr<Iterator>> Children);
|
|
||||||
|
|
||||||
/// Returns OR Iterator which performs the union of the PostingLists of its
|
|
||||||
/// children.
|
|
||||||
///
|
|
||||||
/// consume(): OR Iterator returns the highest boost value among children
|
|
||||||
/// pointing to requested item when not exhausted and DEFAULT_BOOST_SCORE
|
|
||||||
/// otherwise.
|
|
||||||
std::unique_ptr<Iterator>
|
|
||||||
createOr(std::vector<std::unique_ptr<Iterator>> Children);
|
|
||||||
|
|
||||||
/// Returns TRUE Iterator which iterates over "virtual" PostingList containing
|
|
||||||
/// all items in range [0, Size) in an efficient manner.
|
|
||||||
///
|
|
||||||
/// TRUE returns DEFAULT_BOOST_SCORE for each processed item.
|
|
||||||
std::unique_ptr<Iterator> createTrue(DocID Size);
|
|
||||||
|
|
||||||
/// Returns BOOST iterator which multiplies the score of each item by given
|
|
||||||
/// factor. Boosting can be used as a computationally inexpensive filtering.
|
|
||||||
/// Users can return significantly more items using consumeAndBoost() and then
|
|
||||||
/// trim Top K using retrieval score.
|
|
||||||
std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
|
|
||||||
float Factor);
|
|
||||||
|
|
||||||
/// Returns LIMIT iterator, which yields up to N elements of its child iterator.
|
|
||||||
/// Elements only count towards the limit if they are part of the final result
|
|
||||||
/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2),
|
|
||||||
/// not ().
|
|
||||||
std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
|
|
||||||
size_t Limit);
|
|
||||||
|
|
||||||
/// This allows createAnd(create(...), create(...)) syntax.
|
|
||||||
template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
|
|
||||||
std::vector<std::unique_ptr<Iterator>> Children;
|
|
||||||
populateChildren(Children, args...);
|
|
||||||
return createAnd(move(Children));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This allows createOr(create(...), create(...)) syntax.
|
|
||||||
template <typename... Args> std::unique_ptr<Iterator> createOr(Args... args) {
|
|
||||||
std::vector<std::unique_ptr<Iterator>> Children;
|
|
||||||
populateChildren(Children, args...);
|
|
||||||
return createOr(move(Children));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename HeadT, typename... TailT>
|
|
||||||
void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
|
void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
|
||||||
HeadT &Head, TailT &... Tail) {
|
std::unique_ptr<Iterator> Head, TailT... Tail) {
|
||||||
Children.push_back(move(Head));
|
Children.push_back(move(Head));
|
||||||
populateChildren(Children, Tail...);
|
populateChildren(Children, move(Tail)...);
|
||||||
}
|
}
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
template <typename HeadT>
|
// A corpus is a set of documents, and a factory for iterators over them.
|
||||||
void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
|
class Corpus {
|
||||||
HeadT &Head) {
|
DocID Size;
|
||||||
Children.push_back(move(Head));
|
|
||||||
}
|
public:
|
||||||
|
explicit Corpus(DocID Size) : Size(Size) {}
|
||||||
|
|
||||||
|
/// Returns AND Iterator which performs the intersection of the PostingLists
|
||||||
|
/// of its children.
|
||||||
|
///
|
||||||
|
/// consume(): AND Iterator returns the product of Childrens' boosting
|
||||||
|
/// scores.
|
||||||
|
std::unique_ptr<Iterator>
|
||||||
|
intersect(std::vector<std::unique_ptr<Iterator>> Children) const;
|
||||||
|
|
||||||
|
/// Returns OR Iterator which performs the union of the PostingLists of its
|
||||||
|
/// children.
|
||||||
|
///
|
||||||
|
/// consume(): OR Iterator returns the highest boost value among children
|
||||||
|
/// containing the requested item.
|
||||||
|
std::unique_ptr<Iterator>
|
||||||
|
unionOf(std::vector<std::unique_ptr<Iterator>> Children) const;
|
||||||
|
|
||||||
|
/// Returns TRUE Iterator which iterates over "virtual" PostingList
|
||||||
|
/// containing all items in range [0, Size) in an efficient manner.
|
||||||
|
std::unique_ptr<Iterator> all() const;
|
||||||
|
|
||||||
|
/// Returns BOOST iterator which multiplies the score of each item by given
|
||||||
|
/// factor. Boosting can be used as a computationally inexpensive filtering.
|
||||||
|
/// Users can return significantly more items using consumeAndBoost() and
|
||||||
|
/// then trim Top K using retrieval score.
|
||||||
|
std::unique_ptr<Iterator> boost(std::unique_ptr<Iterator> Child,
|
||||||
|
float Factor) const;
|
||||||
|
|
||||||
|
/// Returns LIMIT iterator, which yields up to N elements of its child
|
||||||
|
/// iterator. Elements only count towards the limit if they are part of the
|
||||||
|
/// final result set. Therefore the following iterator (AND (2) (LIMIT (1 2)
|
||||||
|
/// 1)) yields (2), not ().
|
||||||
|
std::unique_ptr<Iterator> limit(std::unique_ptr<Iterator> Child,
|
||||||
|
size_t Limit) const;
|
||||||
|
|
||||||
|
/// This allows intersect(create(...), create(...)) syntax.
|
||||||
|
template <typename... Args>
|
||||||
|
std::unique_ptr<Iterator> intersect(Args... args) const {
|
||||||
|
std::vector<std::unique_ptr<Iterator>> Children;
|
||||||
|
detail::populateChildren(Children, std::forward<Args>(args)...);
|
||||||
|
return intersect(move(Children));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This allows unionOf(create(...), create(...)) syntax.
|
||||||
|
template <typename... Args>
|
||||||
|
std::unique_ptr<Iterator> unionOf(Args... args) const {
|
||||||
|
std::vector<std::unique_ptr<Iterator>> Children;
|
||||||
|
detail::populateChildren(Children, std::forward<Args>(args)...);
|
||||||
|
return unionOf(move(Children));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace dex
|
} // namespace dex
|
||||||
} // namespace clangd
|
} // namespace clangd
|
||||||
|
|
|
@ -63,7 +63,7 @@ public:
|
||||||
float consume() override {
|
float consume() override {
|
||||||
assert(!reachedEnd() &&
|
assert(!reachedEnd() &&
|
||||||
"Posting List iterator can't consume() at the end.");
|
"Posting List iterator can't consume() at the end.");
|
||||||
return DEFAULT_BOOST_SCORE;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t estimateSize() const override {
|
size_t estimateSize() const override {
|
||||||
|
|
|
@ -70,15 +70,16 @@ TEST(DexIterators, DocumentIterator) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, AndTwoLists) {
|
TEST(DexIterators, AndTwoLists) {
|
||||||
|
Corpus C{10000};
|
||||||
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
||||||
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
||||||
|
|
||||||
auto And = createAnd(L1.iterator(), L0.iterator());
|
auto And = C.intersect(L1.iterator(), L0.iterator());
|
||||||
|
|
||||||
EXPECT_FALSE(And->reachedEnd());
|
EXPECT_FALSE(And->reachedEnd());
|
||||||
EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
|
EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
|
||||||
|
|
||||||
And = createAnd(L0.iterator(), L1.iterator());
|
And = C.intersect(L0.iterator(), L1.iterator());
|
||||||
|
|
||||||
And->advanceTo(0);
|
And->advanceTo(0);
|
||||||
EXPECT_EQ(And->peek(), 0U);
|
EXPECT_EQ(And->peek(), 0U);
|
||||||
|
@ -94,11 +95,12 @@ TEST(DexIterators, AndTwoLists) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, AndThreeLists) {
|
TEST(DexIterators, AndThreeLists) {
|
||||||
|
Corpus C{10000};
|
||||||
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
||||||
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
||||||
const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
|
const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
|
||||||
|
|
||||||
auto And = createAnd(L0.iterator(), L1.iterator(), L2.iterator());
|
auto And = C.intersect(L0.iterator(), L1.iterator(), L2.iterator());
|
||||||
EXPECT_EQ(And->peek(), 7U);
|
EXPECT_EQ(And->peek(), 7U);
|
||||||
And->advanceTo(300);
|
And->advanceTo(300);
|
||||||
EXPECT_EQ(And->peek(), 320U);
|
EXPECT_EQ(And->peek(), 320U);
|
||||||
|
@ -108,10 +110,11 @@ TEST(DexIterators, AndThreeLists) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, OrTwoLists) {
|
TEST(DexIterators, OrTwoLists) {
|
||||||
|
Corpus C{10000};
|
||||||
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
||||||
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
||||||
|
|
||||||
auto Or = createOr(L0.iterator(), L1.iterator());
|
auto Or = C.unionOf(L0.iterator(), L1.iterator());
|
||||||
|
|
||||||
EXPECT_FALSE(Or->reachedEnd());
|
EXPECT_FALSE(Or->reachedEnd());
|
||||||
EXPECT_EQ(Or->peek(), 0U);
|
EXPECT_EQ(Or->peek(), 0U);
|
||||||
|
@ -134,18 +137,19 @@ TEST(DexIterators, OrTwoLists) {
|
||||||
Or->advanceTo(9001);
|
Or->advanceTo(9001);
|
||||||
EXPECT_TRUE(Or->reachedEnd());
|
EXPECT_TRUE(Or->reachedEnd());
|
||||||
|
|
||||||
Or = createOr(L0.iterator(), L1.iterator());
|
Or = C.unionOf(L0.iterator(), L1.iterator());
|
||||||
|
|
||||||
EXPECT_THAT(consumeIDs(*Or),
|
EXPECT_THAT(consumeIDs(*Or),
|
||||||
ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
|
ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, OrThreeLists) {
|
TEST(DexIterators, OrThreeLists) {
|
||||||
|
Corpus C{10000};
|
||||||
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
const PostingList L0({0, 5, 7, 10, 42, 320, 9000});
|
||||||
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
|
||||||
const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
|
const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
|
||||||
|
|
||||||
auto Or = createOr(L0.iterator(), L1.iterator(), L2.iterator());
|
auto Or = C.unionOf(L0.iterator(), L1.iterator(), L2.iterator());
|
||||||
|
|
||||||
EXPECT_FALSE(Or->reachedEnd());
|
EXPECT_FALSE(Or->reachedEnd());
|
||||||
EXPECT_EQ(Or->peek(), 0U);
|
EXPECT_EQ(Or->peek(), 0U);
|
||||||
|
@ -194,17 +198,18 @@ TEST(DexIterators, QueryTree) {
|
||||||
// |1, 5, 7, 9| |1, 5| |0, 3, 5|
|
// |1, 5, 7, 9| |1, 5| |0, 3, 5|
|
||||||
// +----------+ +----+ +-------+
|
// +----------+ +----+ +-------+
|
||||||
//
|
//
|
||||||
|
Corpus C{10};
|
||||||
const PostingList L0({1, 3, 5, 8, 9});
|
const PostingList L0({1, 3, 5, 8, 9});
|
||||||
const PostingList L1({1, 5, 7, 9});
|
const PostingList L1({1, 5, 7, 9});
|
||||||
const PostingList L2({1, 5});
|
const PostingList L2({1, 5});
|
||||||
const PostingList L3({0, 3, 5});
|
const PostingList L3({0, 3, 5});
|
||||||
|
|
||||||
// Root of the query tree: [1, 5]
|
// Root of the query tree: [1, 5]
|
||||||
auto Root = createAnd(
|
auto Root = C.intersect(
|
||||||
// Lower And Iterator: [1, 5, 9]
|
// Lower And Iterator: [1, 5, 9]
|
||||||
createAnd(L0.iterator(), createBoost(L1.iterator(), 2U)),
|
C.intersect(L0.iterator(), C.boost(L1.iterator(), 2U)),
|
||||||
// Lower Or Iterator: [0, 1, 5]
|
// Lower Or Iterator: [0, 1, 5]
|
||||||
createOr(createBoost(L2.iterator(), 3U), createBoost(L3.iterator(), 4U)));
|
C.unionOf(C.boost(L2.iterator(), 3U), C.boost(L3.iterator(), 4U)));
|
||||||
|
|
||||||
EXPECT_FALSE(Root->reachedEnd());
|
EXPECT_FALSE(Root->reachedEnd());
|
||||||
EXPECT_EQ(Root->peek(), 1U);
|
EXPECT_EQ(Root->peek(), 1U);
|
||||||
|
@ -226,6 +231,7 @@ TEST(DexIterators, QueryTree) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, StringRepresentation) {
|
TEST(DexIterators, StringRepresentation) {
|
||||||
|
Corpus C{10};
|
||||||
const PostingList L1({1, 3, 5});
|
const PostingList L1({1, 3, 5});
|
||||||
const PostingList L2({1, 7, 9});
|
const PostingList L2({1, 7, 9});
|
||||||
|
|
||||||
|
@ -238,56 +244,60 @@ TEST(DexIterators, StringRepresentation) {
|
||||||
auto I2 = L1.iterator(&Tok);
|
auto I2 = L1.iterator(&Tok);
|
||||||
EXPECT_EQ(llvm::to_string(*I2), "T=L2");
|
EXPECT_EQ(llvm::to_string(*I2), "T=L2");
|
||||||
|
|
||||||
auto Tree = createLimit(createAnd(move(I1), move(I2)), 10);
|
auto Tree = C.limit(C.intersect(move(I1), move(I2)), 10);
|
||||||
EXPECT_EQ(llvm::to_string(*Tree), "(LIMIT 10 (& [1 3 5] T=L2))");
|
EXPECT_EQ(llvm::to_string(*Tree), "(LIMIT 10 (& [1 3 5] T=L2))");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, Limit) {
|
TEST(DexIterators, Limit) {
|
||||||
|
Corpus C{10000};
|
||||||
const PostingList L0({3, 6, 7, 20, 42, 100});
|
const PostingList L0({3, 6, 7, 20, 42, 100});
|
||||||
const PostingList L1({1, 3, 5, 6, 7, 30, 100});
|
const PostingList L1({1, 3, 5, 6, 7, 30, 100});
|
||||||
const PostingList L2({0, 3, 5, 7, 8, 100});
|
const PostingList L2({0, 3, 5, 7, 8, 100});
|
||||||
|
|
||||||
auto DocIterator = createLimit(L0.iterator(), 42);
|
auto DocIterator = C.limit(L0.iterator(), 42);
|
||||||
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
|
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
|
||||||
|
|
||||||
DocIterator = createLimit(L0.iterator(), 3);
|
DocIterator = C.limit(L0.iterator(), 3);
|
||||||
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
|
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
|
||||||
|
|
||||||
DocIterator = createLimit(L0.iterator(), 0);
|
DocIterator = C.limit(L0.iterator(), 0);
|
||||||
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
|
EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
|
||||||
|
|
||||||
auto AndIterator = createAnd(
|
auto AndIterator =
|
||||||
createLimit(createTrue(9000), 343), createLimit(L0.iterator(), 2),
|
C.intersect(C.limit(C.all(), 343), C.limit(L0.iterator(), 2),
|
||||||
createLimit(L1.iterator(), 3), createLimit(L2.iterator(), 42));
|
C.limit(L1.iterator(), 3), C.limit(L2.iterator(), 42));
|
||||||
EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
|
EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, True) {
|
TEST(DexIterators, True) {
|
||||||
auto TrueIterator = createTrue(0U);
|
Corpus C{0};
|
||||||
|
auto TrueIterator = C.all();
|
||||||
EXPECT_TRUE(TrueIterator->reachedEnd());
|
EXPECT_TRUE(TrueIterator->reachedEnd());
|
||||||
EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
|
EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
|
||||||
|
|
||||||
|
C = Corpus{7};
|
||||||
const PostingList L0({1, 2, 5, 7});
|
const PostingList L0({1, 2, 5, 7});
|
||||||
TrueIterator = createTrue(7U);
|
TrueIterator = C.all();
|
||||||
EXPECT_THAT(TrueIterator->peek(), 0);
|
EXPECT_THAT(TrueIterator->peek(), 0);
|
||||||
auto AndIterator = createAnd(L0.iterator(), move(TrueIterator));
|
auto AndIterator = C.intersect(L0.iterator(), move(TrueIterator));
|
||||||
EXPECT_FALSE(AndIterator->reachedEnd());
|
EXPECT_FALSE(AndIterator->reachedEnd());
|
||||||
EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
|
EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DexIterators, Boost) {
|
TEST(DexIterators, Boost) {
|
||||||
auto BoostIterator = createBoost(createTrue(5U), 42U);
|
Corpus C{5};
|
||||||
|
auto BoostIterator = C.boost(C.all(), 42U);
|
||||||
EXPECT_FALSE(BoostIterator->reachedEnd());
|
EXPECT_FALSE(BoostIterator->reachedEnd());
|
||||||
auto ElementBoost = BoostIterator->consume();
|
auto ElementBoost = BoostIterator->consume();
|
||||||
EXPECT_THAT(ElementBoost, 42U);
|
EXPECT_THAT(ElementBoost, 42U);
|
||||||
|
|
||||||
const PostingList L0({2, 4});
|
const PostingList L0({2, 4});
|
||||||
const PostingList L1({1, 4});
|
const PostingList L1({1, 4});
|
||||||
auto Root = createOr(createTrue(5U), createBoost(L0.iterator(), 2U),
|
auto Root = C.unionOf(C.all(), C.boost(L0.iterator(), 2U),
|
||||||
createBoost(L1.iterator(), 3U));
|
C.boost(L1.iterator(), 3U));
|
||||||
|
|
||||||
ElementBoost = Root->consume();
|
ElementBoost = Root->consume();
|
||||||
EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
|
EXPECT_THAT(ElementBoost, 1);
|
||||||
Root->advance();
|
Root->advance();
|
||||||
EXPECT_THAT(Root->peek(), 1U);
|
EXPECT_THAT(Root->peek(), 1U);
|
||||||
ElementBoost = Root->consume();
|
ElementBoost = Root->consume();
|
||||||
|
|
Loading…
Reference in New Issue