[clangd] Support scope proximity in code completion.

Summary:
This should make all-scope completion more usable. Scope proximity for
indexes will be added in followup patch.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D53131

llvm-svn: 344688
This commit is contained in:
Eric Liu 2018-10-17 11:19:02 +00:00
parent e2566b5d87
commit 3fac4ef1fd
10 changed files with 233 additions and 39 deletions

View File

@ -54,6 +54,14 @@ std::string printQualifiedName(const NamedDecl &ND) {
return QName;
}
std::string printNamespaceScope(const DeclContext &DC) {
for (const auto *Ctx = &DC; Ctx != nullptr; Ctx = Ctx->getParent())
if (const auto *NS = dyn_cast<NamespaceDecl>(Ctx))
if (!NS->isAnonymousNamespace() && !NS->isInlineNamespace())
return printQualifiedName(*NS) + "::";
return "";
}
llvm::Optional<SymbolID> getSymbolID(const Decl *D) {
llvm::SmallString<128> USR;
if (index::generateUSRForDecl(D, USR))

View File

@ -34,6 +34,9 @@ SourceLocation findNameLoc(const clang::Decl *D);
/// like inline namespaces.
std::string printQualifiedName(const NamedDecl &ND);
/// Returns the first enclosing namespace scope starting from \p DC.
std::string printNamespaceScope(const DeclContext &DC);
/// Gets the symbol ID for a declaration, if possible.
llvm::Optional<SymbolID> getSymbolID(const Decl *D);

View File

@ -34,6 +34,8 @@
#include "Trace.h"
#include "URI.h"
#include "index/Index.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
@ -44,6 +46,7 @@
#include "clang/Sema/CodeCompleteConsumer.h"
#include "clang/Sema/Sema.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Error.h"
@ -549,9 +552,10 @@ struct SpecifiedScope {
};
// Get all scopes that will be queried in indexes and whether symbols from
// any scope is allowed.
// any scope is allowed. The first scope in the list is the preferred scope
// (e.g. enclosing namespace).
std::pair<std::vector<std::string>, bool>
getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM,
getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
const CodeCompleteOptions &Opts) {
auto GetAllAccessibleScopes = [](CodeCompletionContext &CCContext) {
SpecifiedScope Info;
@ -559,7 +563,7 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM,
if (isa<TranslationUnitDecl>(Context))
Info.AccessibleScopes.push_back(""); // global namespace
else if (const auto *NS = dyn_cast<NamespaceDecl>(Context))
Info.AccessibleScopes.push_back(NS->getQualifiedNameAsString() + "::");
Info.AccessibleScopes.push_back(printNamespaceScope(*Context));
}
return Info;
};
@ -568,12 +572,13 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM,
// Unqualified completion (e.g. "vec^").
if (!SS) {
// FIXME: Once we can insert namespace qualifiers and use the in-scope
// namespaces for scoring, search in all namespaces.
// FIXME: Capture scopes and use for scoring, for example,
// "using namespace std; namespace foo {v^}" =>
// foo::value > std::vector > boost::variant
auto Scopes = GetAllAccessibleScopes(CCContext).scopesForIndexQuery();
std::vector<std::string> Scopes;
std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
Scopes.push_back(EnclosingScope);
for (auto &S : GetAllAccessibleScopes(CCContext).scopesForIndexQuery()) {
if (EnclosingScope != S)
Scopes.push_back(std::move(S));
}
// Allow AllScopes completion only for there is no explicit scope qualifier.
return {Scopes, Opts.AllScopes};
}
@ -592,8 +597,8 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM,
Info.AccessibleScopes.push_back(""); // global namespace
Info.UnresolvedQualifier =
Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()), SM,
clang::LangOptions())
Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()),
CCSema.SourceMgr, clang::LangOptions())
.ltrim("::");
// Sema excludes the trailing "::".
if (!Info.UnresolvedQualifier->empty())
@ -1216,6 +1221,8 @@ class CodeCompleteFlow {
bool Incomplete = false; // Would more be available with a higher limit?
llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
std::vector<std::string> QueryScopes; // Initialized once Sema runs.
// Initialized once QueryScopes is initialized, if there are scopes.
llvm::Optional<ScopeDistance> ScopeProximity;
// Whether to query symbols from any scope. Initialized once Sema runs.
bool AllScopes = false;
// Include-insertion and proximity scoring rely on the include structure.
@ -1341,8 +1348,10 @@ private:
}
Filter = FuzzyMatcher(
Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
std::tie(QueryScopes, AllScopes) = getQueryScopes(
Recorder->CCContext, Recorder->CCSema->getSourceManager(), Opts);
std::tie(QueryScopes, AllScopes) =
getQueryScopes(Recorder->CCContext, *Recorder->CCSema, Opts);
if (!QueryScopes.empty())
ScopeProximity.emplace(QueryScopes);
// Sema provides the needed context to query the index.
// FIXME: in addition to querying for extra/overlapping symbols, we should
// explicitly request symbols corresponding to Sema results.
@ -1479,7 +1488,8 @@ private:
Relevance.Context = Recorder->CCContext.getKind();
Relevance.Query = SymbolRelevanceSignals::CodeComplete;
Relevance.FileProximityMatch = FileProximity.getPointer();
// FIXME: incorparate scope proximity into relevance score.
if (ScopeProximity)
Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
auto &First = Bundle.front();
if (auto FuzzyScore = fuzzyScore(First))

View File

@ -176,5 +176,46 @@ FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
return *Delegate;
}
static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
SmallVector<StringRef, 4> Split;
Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
return {"/" + llvm::join(Split, "/"), Split.size()};
}
static FileDistance
createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
FileDistanceOptions Opts;
Opts.UpCost = 2;
Opts.DownCost = 4;
Opts.AllowDownTraversalFromRoot = false;
StringMap<SourceParams> Sources;
StringRef Preferred = QueryScopes.empty() ? "" : QueryScopes.front().c_str();
for (StringRef S : QueryScopes) {
SourceParams Param;
// Penalize the global scope even it's preferred, as all projects can define
// symbols in it, and there is pattern where using-namespace is used in
// place of enclosing namespaces (e.g. in implementation files).
if (S == Preferred)
Param.Cost = S == "" ? 2 : 0;
else if (Preferred.startswith(S) && !S.empty())
continue; // just rely on up-traversals.
else
Param.Cost = S == "" ? 5 : 2;
auto Path = scopeToPath(S);
// The global namespace is not 'near' its children.
Param.MaxUpTraversals = std::max(Path.second - 1, 0);
Sources[Path.first] = std::move(Param);
}
return FileDistance(Sources, Opts);
}
ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
: Distance(createScopeFileDistance(QueryScopes)) {}
unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
return Distance.distance(scopeToPath(SymbolScope).first);
}
} // namespace clangd
} // namespace clang

View File

@ -48,6 +48,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
#include <memory>
namespace clang {
namespace clangd {
@ -111,6 +112,19 @@ private:
FileDistanceOptions Opts;
};
/// Support lookups like FileDistance, but the lookup keys are symbol scopes.
/// For example, a scope "na::nb::" is converted to "/na/nb".
class ScopeDistance {
public:
/// QueryScopes[0] is the preferred scope.
ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);
unsigned distance(llvm::StringRef SymbolScope);
private:
FileDistance Distance;
};
} // namespace clangd
} // namespace clang

View File

@ -18,10 +18,16 @@
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cmath>
namespace clang {
@ -268,6 +274,7 @@ void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
// relevant to non-completion requests, we should recognize class members etc.
SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
SymbolScope = IndexResult.Scope;
IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
}
@ -277,6 +284,7 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
Forbidden = true;
if (SemaCCResult.Declaration) {
SemaSaysInScope = true;
// We boost things that have decls in the main file. We give a fixed score
// for all other declarations in sema as they are already included in the
// translation unit.
@ -284,7 +292,7 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
hasUsingDeclInMainFile(SemaCCResult))
? 1.0
: 0.6;
SemaProximityScore = std::max(DeclProximity, SemaProximityScore);
SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
}
@ -295,8 +303,8 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
NeedsFixIts = !SemaCCResult.FixIts.empty();
}
static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
URIDistance *D) {
static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI,
URIDistance *D) {
if (!D || SymbolURI.empty())
return {0.f, 0u};
unsigned Distance = D->distance(SymbolURI);
@ -304,6 +312,16 @@ static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
}
static float scopeBoost(ScopeDistance &Distance,
llvm::Optional<llvm::StringRef> SymbolScope) {
if (!SymbolScope)
return 1;
auto D = Distance.distance(*SymbolScope);
if (D == FileDistance::Unreachable)
return 0.4;
return std::max(0.5, 2.0 * std::pow(0.6, D / 2.0));
}
float SymbolRelevanceSignals::evaluate() const {
float Score = 1;
@ -312,10 +330,18 @@ float SymbolRelevanceSignals::evaluate() const {
Score *= NameMatch;
// Proximity scores are [0,1] and we translate them into a multiplier in the
// range from 1 to 3.
Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
SemaProximityScore);
// File proximity scores are [0,1] and we translate them into a multiplier in
// the range from 1 to 3.
Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first,
SemaFileProximityScore);
if (ScopeProximityMatch)
// Use a constant scope boost for sema results, as scopes of sema results
// can be tricky (e.g. class/function scope). Set to the max boost as we
// don't load top-level symbols from the preamble and sema results are
// always in the accessible scope.
Score *=
SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
// Symbols like local variables may only be referenced within their scope.
// Conversely if we're in that scope, it's likely we'll reference them.
@ -358,15 +384,25 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
OS << formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
OS << formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
OS << formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
if (S.FileProximityMatch) {
auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
Score.second);
}
OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
OS << formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
OS << formatv("\tSymbol scope: {0}\n",
S.SymbolScope ? *S.SymbolScope : "<None>");
if (S.FileProximityMatch) {
auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch);
OS << formatv("\tIndex URI proximity: {0} (distance={1})\n", Score.first,
Score.second);
}
OS << formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
OS << formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
if (S.ScopeProximityMatch)
OS << formatv("\tIndex scope boost: {0}\n",
scopeBoost(*S.ScopeProximityMatch, S.SymbolScope));
return OS;
}

View File

@ -28,6 +28,7 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#include "FileDistance.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@ -87,13 +88,19 @@ struct SymbolRelevanceSignals {
bool NeedsFixIts = false;
URIDistance *FileProximityMatch = nullptr;
/// This is used to calculate proximity between the index symbol and the
/// These are used to calculate proximity between the index symbol and the
/// query.
llvm::StringRef SymbolURI;
/// Proximity between best declaration and the query. [0-1], 1 is closest.
/// FIXME: unify with index proximity score - signals should be
/// source-independent.
float SemaProximityScore = 0;
/// Proximity between best declaration and the query. [0-1], 1 is closest.
float SemaFileProximityScore = 0;
// Scope proximity is only considered (both index and sema) when this is set.
ScopeDistance *ScopeProximityMatch = nullptr;
llvm::Optional<llvm::StringRef> SymbolScope;
// A symbol from sema should be accessible from the current scope.
bool SemaSaysInScope = false;
// An approximate measure of where we expect the symbol to be used.
enum AccessibleScope {

View File

@ -1040,6 +1040,28 @@ TEST(CompletionTest, UnqualifiedIdQuery) {
UnorderedElementsAre("", "ns::", "std::"))));
}
TEST(CompletionTest, EnclosingScopeComesFirst) {
auto Requests = captureIndexRequests(R"cpp(
namespace std {}
using namespace std;
namespace nx {
namespace ns {
namespace {
void f() {
vec^
}
}
}
}
)cpp");
EXPECT_THAT(Requests,
ElementsAre(Field(
&FuzzyFindRequest::Scopes,
UnorderedElementsAre("", "std::", "nx::ns::", "nx::"))));
EXPECT_EQ(Requests[0].Scopes[0], "nx::ns::");
}
TEST(CompletionTest, ResolvedQualifiedIdQuery) {
auto Requests = captureIndexRequests(R"cpp(
namespace ns1 {}

View File

@ -109,6 +109,16 @@ TEST(FileDistance, DisallowDownTraversalsFromRoot) {
EXPECT_EQ(D.distance("/x"), FileDistance::Unreachable);
}
TEST(ScopeDistance, Smoke) {
ScopeDistance D({"x::y::z", "x::", "", "a::"});
EXPECT_EQ(D.distance("x::y::z::"), 0u);
EXPECT_GT(D.distance("x::y::"), D.distance("x::y::z::"));
EXPECT_GT(D.distance("x::"), D.distance("x::y::"));
EXPECT_GT(D.distance("x::y::z::down::"), D.distance("x::y::"));
EXPECT_GT(D.distance(""), D.distance("a::"));
EXPECT_GT(D.distance("x::"), D.distance("a::"));
}
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -28,6 +28,7 @@
#include "llvm/Support/Casting.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <vector>
namespace clang {
namespace clangd {
@ -117,13 +118,14 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) {
Relevance = {};
Relevance.merge(CodeCompletionResult(&findDecl(AST, "main"), 42));
EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f) << "Decl in current file";
EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f)
<< "Decl in current file";
Relevance = {};
Relevance.merge(CodeCompletionResult(&findDecl(AST, "header"), 42));
EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 0.6f) << "Decl from header";
EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 0.6f) << "Decl from header";
Relevance = {};
Relevance.merge(CodeCompletionResult(&findDecl(AST, "header_main"), 42));
EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f)
EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f)
<< "Current file and header";
auto constructShadowDeclCompletionResult = [&](const std::string DeclName) {
@ -146,10 +148,10 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) {
Relevance = {};
Relevance.merge(constructShadowDeclCompletionResult("Bar"));
EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f)
EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f)
<< "Using declaration in main file";
Relevance.merge(constructShadowDeclCompletionResult("FLAGS_FOO"));
EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f)
EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f)
<< "Using declaration in main file";
Relevance = {};
@ -210,9 +212,21 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) {
PoorNameMatch.NameMatch = 0.2f;
EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
SymbolRelevanceSignals WithSemaProximity;
WithSemaProximity.SemaProximityScore = 0.2f;
EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate());
SymbolRelevanceSignals WithSemaFileProximity;
WithSemaFileProximity.SemaFileProximityScore = 0.2f;
EXPECT_GT(WithSemaFileProximity.evaluate(), Default.evaluate());
ScopeDistance ScopeProximity({"x::y::"});
SymbolRelevanceSignals WithSemaScopeProximity;
WithSemaScopeProximity.ScopeProximityMatch = &ScopeProximity;
WithSemaScopeProximity.SemaSaysInScope = true;
EXPECT_GT(WithSemaScopeProximity.evaluate(), Default.evaluate());
SymbolRelevanceSignals WithIndexScopeProximity;
WithIndexScopeProximity.ScopeProximityMatch = &ScopeProximity;
WithIndexScopeProximity.SymbolScope = "x::";
EXPECT_GT(WithSemaScopeProximity.evaluate(), Default.evaluate());
SymbolRelevanceSignals IndexProximate;
IndexProximate.SymbolURI = "unittest:/foo/bar.h";
@ -242,6 +256,35 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) {
EXPECT_EQ(Instance.evaluate(), Default.evaluate());
}
TEST(QualityTests, ScopeProximity) {
SymbolRelevanceSignals Relevance;
ScopeDistance ScopeProximity({"x::y::z::", "x::", "llvm::", ""});
Relevance.ScopeProximityMatch = &ScopeProximity;
Relevance.SymbolScope = "other::";
float NotMatched = Relevance.evaluate();
Relevance.SymbolScope = "";
float Global = Relevance.evaluate();
EXPECT_GT(Global, NotMatched);
Relevance.SymbolScope = "llvm::";
float NonParent = Relevance.evaluate();
EXPECT_GT(NonParent, Global);
Relevance.SymbolScope = "x::";
float GrandParent = Relevance.evaluate();
EXPECT_GT(GrandParent, Global);
Relevance.SymbolScope = "x::y::";
float Parent = Relevance.evaluate();
EXPECT_GT(Parent, GrandParent);
Relevance.SymbolScope = "x::y::z::";
float Enclosing = Relevance.evaluate();
EXPECT_GT(Enclosing, Parent);
}
TEST(QualityTests, SortText) {
EXPECT_LT(sortText(std::numeric_limits<float>::infinity()),
sortText(1000.2f));