llvm-project/clang-tools-extra/clangd/Quality.h

168 lines
5.6 KiB
C
Raw Normal View History

//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
///
/// Some operations such as code completion produce a set of candidates.
/// Usually the user can choose between them, but we should put the best options
/// at the top (they're easier to select, and more likely to be seen).
///
/// This file defines building blocks for ranking candidates.
/// It's used by the features directly and also in the implementation of
/// indexes, as indexes also need to heuristically limit their results.
///
/// The facilities here are:
/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
/// These are structured in a way that they can be debugged, and are fairly
/// consistent regardless of the source.
/// - compute scores from scoring signals. These are suitable for sorting.
/// - sorting utilities like the TopN container.
/// These could be split up further to isolate dependencies if we care.
///
//===---------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include <algorithm>
#include <functional>
#include <vector>
namespace llvm {
class raw_ostream;
}
namespace clang {
class CodeCompletionResult;
namespace clangd {
struct Symbol;
class URIDistance;
// Signals structs are designed to be aggregated from 0 or more sources.
// A default instance has neutral signals, and sources are merged into it.
// They can be dumped for debugging, and evaluate()d into a score.
/// Attributes of a symbol that affect how much we like it.
struct SymbolQualitySignals {
bool Deprecated = false;
bool ReservedName = false; // __foo, _Foo are usually implementation details.
// FIXME: make these findable once user types _.
unsigned References = 0;
enum SymbolCategory {
Unknown = 0,
Variable,
Macro,
Type,
Function,
[clangd] Tune down quality score for class constructors so that it's ranked after class types. Summary: Currently, class constructors have the same score as the class types, and they are often ranked before class types. This is often not desireable and can be annoying when snippet is enabled and constructor signatures are added. Metrics: ``` ================================================================================================== OVERALL ================================================================================================== Total measurements: 111117 (+0) All measurements: MRR: 64.06 (+0.20) Top-5: 75.73% (+0.14%) Top-100: 93.71% (+0.01%) Full identifiers: MRR: 98.25 (+0.55) Top-5: 99.04% (+0.03%) Top-100: 99.16% (+0.00%) Filter length 0-5: MRR: 15.23 (+0.02) 50.50 (-0.02) 65.04 (+0.11) 70.75 (+0.19) 74.37 (+0.25) 79.43 (+0.32) Top-5: 40.90% (+0.03%) 74.52% (+0.03%) 87.23% (+0.15%) 91.68% (+0.08%) 93.68% (+0.14%) 95.87% (+0.12%) Top-100: 68.21% (+0.02%) 96.28% (+0.07%) 98.43% (+0.00%) 98.72% (+0.00%) 98.74% (+0.01%) 98.81% (+0.00%) ================================================================================================== DEFAULT ================================================================================================== Total measurements: 57535 (+0) All measurements: MRR: 58.07 (+0.37) Top-5: 69.94% (+0.26%) Top-100: 90.14% (+0.03%) Full identifiers: MRR: 97.13 (+1.05) Top-5: 98.14% (+0.06%) Top-100: 98.34% (+0.00%) Filter length 0-5: MRR: 13.91 (+0.00) 38.53 (+0.01) 55.58 (+0.21) 63.63 (+0.30) 69.23 (+0.47) 72.87 (+0.60) Top-5: 24.99% (+0.00%) 62.70% (+0.06%) 82.80% (+0.30%) 88.66% (+0.16%) 92.02% (+0.27%) 93.53% (+0.21%) Top-100: 51.56% (+0.05%) 93.19% (+0.13%) 97.30% (+0.00%) 97.81% (+0.00%) 97.85% (+0.01%) 97.79% (+0.00%) ``` Remark: - The full-id completions have +1.05 MRR improvement. - There is no noticeable impact on EXPLICIT_MEMBER_ACCESS and WANT_LOCAL. Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D49667 llvm-svn: 337816
2018-07-24 16:51:52 +08:00
Constructor,
Namespace,
Keyword,
} Category = Unknown;
void merge(const CodeCompletionResult &SemaCCResult);
void merge(const Symbol &IndexResult);
// Condense these signals down to a single number, higher is better.
float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const SymbolQualitySignals &);
/// Attributes of a symbol-query pair that affect how much we like it.
struct SymbolRelevanceSignals {
/// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
float NameMatch = 1;
bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
URIDistance *FileProximityMatch = nullptr;
/// This is used to calculate proximity between the index symbol and the
/// query.
llvm::StringRef SymbolURI;
/// Proximity between best declaration and the query. [0-1], 1 is closest.
/// FIXME: unify with index proximity score - signals should be
/// source-independent.
float SemaProximityScore = 0;
// An approximate measure of where we expect the symbol to be used.
enum AccessibleScope {
FunctionScope,
ClassScope,
FileScope,
GlobalScope,
} Scope = GlobalScope;
enum QueryType {
CodeComplete,
Generic,
} Query = Generic;
CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
// Whether symbol is an instance member of a class.
bool IsInstanceMember = false;
void merge(const CodeCompletionResult &SemaResult);
void merge(const Symbol &IndexResult);
// Condense these signals down to a single number, higher is better.
float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const SymbolRelevanceSignals &);
/// Combine symbol quality and relevance into a single score.
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
/// TopN<T> is a lossy container that preserves only the "best" N elements.
template <typename T, typename Compare = std::greater<T>> class TopN {
public:
using value_type = T;
TopN(size_t N, Compare Greater = Compare())
: N(N), Greater(std::move(Greater)) {}
// Adds a candidate to the set.
// Returns true if a candidate was dropped to get back under N.
bool push(value_type &&V) {
bool Dropped = false;
if (Heap.size() >= N) {
Dropped = true;
if (N > 0 && Greater(V, Heap.front())) {
std::pop_heap(Heap.begin(), Heap.end(), Greater);
Heap.back() = std::move(V);
std::push_heap(Heap.begin(), Heap.end(), Greater);
}
} else {
Heap.push_back(std::move(V));
std::push_heap(Heap.begin(), Heap.end(), Greater);
}
assert(Heap.size() <= N);
assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
return Dropped;
}
// Returns candidates from best to worst.
std::vector<value_type> items() && {
std::sort_heap(Heap.begin(), Heap.end(), Greater);
assert(Heap.size() <= N);
return std::move(Heap);
}
private:
const size_t N;
std::vector<value_type> Heap; // Min-heap, comparator is Greater.
Compare Greater;
};
/// Returns a string that sorts in the same order as (-Score, Tiebreak), for
/// LSP. (The highest score compares smallest so it sorts at the top).
std::string sortText(float Score, llvm::StringRef Tiebreak = "");
} // namespace clangd
} // namespace clang
#endif