diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp index 661c6aaf4032..ffaf63ae7603 100644 --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -54,6 +54,14 @@ std::string printQualifiedName(const NamedDecl &ND) { return QName; } +std::string printNamespaceScope(const DeclContext &DC) { + for (const auto *Ctx = &DC; Ctx != nullptr; Ctx = Ctx->getParent()) + if (const auto *NS = dyn_cast(Ctx)) + if (!NS->isAnonymousNamespace() && !NS->isInlineNamespace()) + return printQualifiedName(*NS) + "::"; + return ""; +} + llvm::Optional getSymbolID(const Decl *D) { llvm::SmallString<128> USR; if (index::generateUSRForDecl(D, USR)) diff --git a/clang-tools-extra/clangd/AST.h b/clang-tools-extra/clangd/AST.h index 88244f3e947a..32cab4c08cb1 100644 --- a/clang-tools-extra/clangd/AST.h +++ b/clang-tools-extra/clangd/AST.h @@ -34,6 +34,9 @@ SourceLocation findNameLoc(const clang::Decl *D); /// like inline namespaces. std::string printQualifiedName(const NamedDecl &ND); +/// Returns the first enclosing namespace scope starting from \p DC. +std::string printNamespaceScope(const DeclContext &DC); + /// Gets the symbol ID for a declaration, if possible. llvm::Optional getSymbolID(const Decl *D); diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 92018b193254..91a3b04c5fd4 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -34,6 +34,8 @@ #include "Trace.h" #include "URI.h" #include "index/Index.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" @@ -44,6 +46,7 @@ #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/Sema.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" @@ -549,9 +552,10 @@ struct SpecifiedScope { }; // Get all scopes that will be queried in indexes and whether symbols from -// any scope is allowed. +// any scope is allowed. The first scope in the list is the preferred scope +// (e.g. enclosing namespace). std::pair, bool> -getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM, +getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema, const CodeCompleteOptions &Opts) { auto GetAllAccessibleScopes = [](CodeCompletionContext &CCContext) { SpecifiedScope Info; @@ -559,7 +563,7 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM, if (isa(Context)) Info.AccessibleScopes.push_back(""); // global namespace else if (const auto *NS = dyn_cast(Context)) - Info.AccessibleScopes.push_back(NS->getQualifiedNameAsString() + "::"); + Info.AccessibleScopes.push_back(printNamespaceScope(*Context)); } return Info; }; @@ -568,12 +572,13 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM, // Unqualified completion (e.g. "vec^"). if (!SS) { - // FIXME: Once we can insert namespace qualifiers and use the in-scope - // namespaces for scoring, search in all namespaces. - // FIXME: Capture scopes and use for scoring, for example, - // "using namespace std; namespace foo {v^}" => - // foo::value > std::vector > boost::variant - auto Scopes = GetAllAccessibleScopes(CCContext).scopesForIndexQuery(); + std::vector Scopes; + std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext); + Scopes.push_back(EnclosingScope); + for (auto &S : GetAllAccessibleScopes(CCContext).scopesForIndexQuery()) { + if (EnclosingScope != S) + Scopes.push_back(std::move(S)); + } // Allow AllScopes completion only for there is no explicit scope qualifier. return {Scopes, Opts.AllScopes}; } @@ -592,8 +597,8 @@ getQueryScopes(CodeCompletionContext &CCContext, const SourceManager &SM, Info.AccessibleScopes.push_back(""); // global namespace Info.UnresolvedQualifier = - Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()), SM, - clang::LangOptions()) + Lexer::getSourceText(CharSourceRange::getCharRange((*SS)->getRange()), + CCSema.SourceMgr, clang::LangOptions()) .ltrim("::"); // Sema excludes the trailing "::". if (!Info.UnresolvedQualifier->empty()) @@ -1216,6 +1221,8 @@ class CodeCompleteFlow { bool Incomplete = false; // Would more be available with a higher limit? llvm::Optional Filter; // Initialized once Sema runs. std::vector QueryScopes; // Initialized once Sema runs. + // Initialized once QueryScopes is initialized, if there are scopes. + llvm::Optional ScopeProximity; // Whether to query symbols from any scope. Initialized once Sema runs. bool AllScopes = false; // Include-insertion and proximity scoring rely on the include structure. @@ -1341,8 +1348,10 @@ private: } Filter = FuzzyMatcher( Recorder->CCSema->getPreprocessor().getCodeCompletionFilter()); - std::tie(QueryScopes, AllScopes) = getQueryScopes( - Recorder->CCContext, Recorder->CCSema->getSourceManager(), Opts); + std::tie(QueryScopes, AllScopes) = + getQueryScopes(Recorder->CCContext, *Recorder->CCSema, Opts); + if (!QueryScopes.empty()) + ScopeProximity.emplace(QueryScopes); // Sema provides the needed context to query the index. // FIXME: in addition to querying for extra/overlapping symbols, we should // explicitly request symbols corresponding to Sema results. @@ -1479,7 +1488,8 @@ private: Relevance.Context = Recorder->CCContext.getKind(); Relevance.Query = SymbolRelevanceSignals::CodeComplete; Relevance.FileProximityMatch = FileProximity.getPointer(); - // FIXME: incorparate scope proximity into relevance score. + if (ScopeProximity) + Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); auto &First = Bundle.front(); if (auto FuzzyScore = fuzzyScore(First)) diff --git a/clang-tools-extra/clangd/FileDistance.cpp b/clang-tools-extra/clangd/FileDistance.cpp index 2ae9043a0fcb..ecf8200f7f2e 100644 --- a/clang-tools-extra/clangd/FileDistance.cpp +++ b/clang-tools-extra/clangd/FileDistance.cpp @@ -176,5 +176,46 @@ FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { return *Delegate; } +static std::pair scopeToPath(llvm::StringRef Scope) { + SmallVector Split; + Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + return {"/" + llvm::join(Split, "/"), Split.size()}; +} + +static FileDistance +createScopeFileDistance(llvm::ArrayRef QueryScopes) { + FileDistanceOptions Opts; + Opts.UpCost = 2; + Opts.DownCost = 4; + Opts.AllowDownTraversalFromRoot = false; + + StringMap Sources; + StringRef Preferred = QueryScopes.empty() ? "" : QueryScopes.front().c_str(); + for (StringRef S : QueryScopes) { + SourceParams Param; + // Penalize the global scope even it's preferred, as all projects can define + // symbols in it, and there is pattern where using-namespace is used in + // place of enclosing namespaces (e.g. in implementation files). + if (S == Preferred) + Param.Cost = S == "" ? 2 : 0; + else if (Preferred.startswith(S) && !S.empty()) + continue; // just rely on up-traversals. + else + Param.Cost = S == "" ? 5 : 2; + auto Path = scopeToPath(S); + // The global namespace is not 'near' its children. + Param.MaxUpTraversals = std::max(Path.second - 1, 0); + Sources[Path.first] = std::move(Param); + } + return FileDistance(Sources, Opts); +} + +ScopeDistance::ScopeDistance(llvm::ArrayRef QueryScopes) + : Distance(createScopeFileDistance(QueryScopes)) {} + +unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { + return Distance.distance(scopeToPath(SymbolScope).first); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/FileDistance.h b/clang-tools-extra/clangd/FileDistance.h index 520b78c36c3d..d60afa28ce3e 100644 --- a/clang-tools-extra/clangd/FileDistance.h +++ b/clang-tools-extra/clangd/FileDistance.h @@ -48,6 +48,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" +#include namespace clang { namespace clangd { @@ -111,6 +112,19 @@ private: FileDistanceOptions Opts; }; +/// Support lookups like FileDistance, but the lookup keys are symbol scopes. +/// For example, a scope "na::nb::" is converted to "/na/nb". +class ScopeDistance { +public: + /// QueryScopes[0] is the preferred scope. + ScopeDistance(llvm::ArrayRef QueryScopes); + + unsigned distance(llvm::StringRef SymbolScope); + +private: + FileDistance Distance; +}; + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index 5060a14d2aa1..1fb47d5654ef 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -18,10 +18,16 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceManager.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include #include namespace clang { @@ -268,6 +274,7 @@ void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { // relevant to non-completion requests, we should recognize class members etc. SymbolURI = IndexResult.CanonicalDeclaration.FileURI; + SymbolScope = IndexResult.Scope; IsInstanceMember |= isInstanceMember(IndexResult.SymInfo); } @@ -277,6 +284,7 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { Forbidden = true; if (SemaCCResult.Declaration) { + SemaSaysInScope = true; // We boost things that have decls in the main file. We give a fixed score // for all other declarations in sema as they are already included in the // translation unit. @@ -284,7 +292,7 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { hasUsingDeclInMainFile(SemaCCResult)) ? 1.0 : 0.6; - SemaProximityScore = std::max(DeclProximity, SemaProximityScore); + SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore); IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration); } @@ -295,8 +303,8 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { NeedsFixIts = !SemaCCResult.FixIts.empty(); } -static std::pair proximityScore(llvm::StringRef SymbolURI, - URIDistance *D) { +static std::pair uriProximity(llvm::StringRef SymbolURI, + URIDistance *D) { if (!D || SymbolURI.empty()) return {0.f, 0u}; unsigned Distance = D->distance(SymbolURI); @@ -304,6 +312,16 @@ static std::pair proximityScore(llvm::StringRef SymbolURI, return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; } +static float scopeBoost(ScopeDistance &Distance, + llvm::Optional SymbolScope) { + if (!SymbolScope) + return 1; + auto D = Distance.distance(*SymbolScope); + if (D == FileDistance::Unreachable) + return 0.4; + return std::max(0.5, 2.0 * std::pow(0.6, D / 2.0)); +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -312,10 +330,18 @@ float SymbolRelevanceSignals::evaluate() const { Score *= NameMatch; - // Proximity scores are [0,1] and we translate them into a multiplier in the - // range from 1 to 3. - Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first, - SemaProximityScore); + // File proximity scores are [0,1] and we translate them into a multiplier in + // the range from 1 to 3. + Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first, + SemaFileProximityScore); + + if (ScopeProximityMatch) + // Use a constant scope boost for sema results, as scopes of sema results + // can be tricky (e.g. class/function scope). Set to the max boost as we + // don't load top-level symbols from the preamble and sema results are + // always in the accessible scope. + Score *= + SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. @@ -358,15 +384,25 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) { OS << formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts); OS << formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember); OS << formatv("\tContext: {0}\n", getCompletionKindString(S.Context)); - OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI); - if (S.FileProximityMatch) { - auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch); - OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first, - Score.second); - } - OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore); OS << formatv("\tQuery type: {0}\n", static_cast(S.Query)); OS << formatv("\tScope: {0}\n", static_cast(S.Scope)); + + OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI); + OS << formatv("\tSymbol scope: {0}\n", + S.SymbolScope ? *S.SymbolScope : ""); + + if (S.FileProximityMatch) { + auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch); + OS << formatv("\tIndex URI proximity: {0} (distance={1})\n", Score.first, + Score.second); + } + OS << formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore); + + OS << formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope); + if (S.ScopeProximityMatch) + OS << formatv("\tIndex scope boost: {0}\n", + scopeBoost(*S.ScopeProximityMatch, S.SymbolScope)); + return OS; } diff --git a/clang-tools-extra/clangd/Quality.h b/clang-tools-extra/clangd/Quality.h index fda90560c433..a692917d4823 100644 --- a/clang-tools-extra/clangd/Quality.h +++ b/clang-tools-extra/clangd/Quality.h @@ -28,6 +28,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H +#include "FileDistance.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" @@ -87,13 +88,19 @@ struct SymbolRelevanceSignals { bool NeedsFixIts = false; URIDistance *FileProximityMatch = nullptr; - /// This is used to calculate proximity between the index symbol and the + /// These are used to calculate proximity between the index symbol and the /// query. llvm::StringRef SymbolURI; - /// Proximity between best declaration and the query. [0-1], 1 is closest. /// FIXME: unify with index proximity score - signals should be /// source-independent. - float SemaProximityScore = 0; + /// Proximity between best declaration and the query. [0-1], 1 is closest. + float SemaFileProximityScore = 0; + + // Scope proximity is only considered (both index and sema) when this is set. + ScopeDistance *ScopeProximityMatch = nullptr; + llvm::Optional SymbolScope; + // A symbol from sema should be accessible from the current scope. + bool SemaSaysInScope = false; // An approximate measure of where we expect the symbol to be used. enum AccessibleScope { diff --git a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp index e056699ab553..c0e59e800dbb 100644 --- a/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp +++ b/clang-tools-extra/unittests/clangd/CodeCompleteTests.cpp @@ -1040,6 +1040,28 @@ TEST(CompletionTest, UnqualifiedIdQuery) { UnorderedElementsAre("", "ns::", "std::")))); } +TEST(CompletionTest, EnclosingScopeComesFirst) { + auto Requests = captureIndexRequests(R"cpp( + namespace std {} + using namespace std; + namespace nx { + namespace ns { + namespace { + void f() { + vec^ + } + } + } + } + )cpp"); + + EXPECT_THAT(Requests, + ElementsAre(Field( + &FuzzyFindRequest::Scopes, + UnorderedElementsAre("", "std::", "nx::ns::", "nx::")))); + EXPECT_EQ(Requests[0].Scopes[0], "nx::ns::"); +} + TEST(CompletionTest, ResolvedQualifiedIdQuery) { auto Requests = captureIndexRequests(R"cpp( namespace ns1 {} diff --git a/clang-tools-extra/unittests/clangd/FileDistanceTests.cpp b/clang-tools-extra/unittests/clangd/FileDistanceTests.cpp index bb5d3d2f00ba..6d7d4777930e 100644 --- a/clang-tools-extra/unittests/clangd/FileDistanceTests.cpp +++ b/clang-tools-extra/unittests/clangd/FileDistanceTests.cpp @@ -109,6 +109,16 @@ TEST(FileDistance, DisallowDownTraversalsFromRoot) { EXPECT_EQ(D.distance("/x"), FileDistance::Unreachable); } +TEST(ScopeDistance, Smoke) { + ScopeDistance D({"x::y::z", "x::", "", "a::"}); + EXPECT_EQ(D.distance("x::y::z::"), 0u); + EXPECT_GT(D.distance("x::y::"), D.distance("x::y::z::")); + EXPECT_GT(D.distance("x::"), D.distance("x::y::")); + EXPECT_GT(D.distance("x::y::z::down::"), D.distance("x::y::")); + EXPECT_GT(D.distance(""), D.distance("a::")); + EXPECT_GT(D.distance("x::"), D.distance("a::")); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/unittests/clangd/QualityTests.cpp b/clang-tools-extra/unittests/clangd/QualityTests.cpp index ca13d4b793c0..9dd992c2c101 100644 --- a/clang-tools-extra/unittests/clangd/QualityTests.cpp +++ b/clang-tools-extra/unittests/clangd/QualityTests.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Casting.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -117,13 +118,14 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) { Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "main"), 42)); - EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f) << "Decl in current file"; + EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f) + << "Decl in current file"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header"), 42)); - EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 0.6f) << "Decl from header"; + EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 0.6f) << "Decl from header"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header_main"), 42)); - EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f) + EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f) << "Current file and header"; auto constructShadowDeclCompletionResult = [&](const std::string DeclName) { @@ -146,10 +148,10 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) { Relevance = {}; Relevance.merge(constructShadowDeclCompletionResult("Bar")); - EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f) + EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f) << "Using declaration in main file"; Relevance.merge(constructShadowDeclCompletionResult("FLAGS_FOO")); - EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0f) + EXPECT_FLOAT_EQ(Relevance.SemaFileProximityScore, 1.0f) << "Using declaration in main file"; Relevance = {}; @@ -210,9 +212,21 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) { PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithSemaProximity; - WithSemaProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaFileProximity; + WithSemaFileProximity.SemaFileProximityScore = 0.2f; + EXPECT_GT(WithSemaFileProximity.evaluate(), Default.evaluate()); + + ScopeDistance ScopeProximity({"x::y::"}); + + SymbolRelevanceSignals WithSemaScopeProximity; + WithSemaScopeProximity.ScopeProximityMatch = &ScopeProximity; + WithSemaScopeProximity.SemaSaysInScope = true; + EXPECT_GT(WithSemaScopeProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals WithIndexScopeProximity; + WithIndexScopeProximity.ScopeProximityMatch = &ScopeProximity; + WithIndexScopeProximity.SymbolScope = "x::"; + EXPECT_GT(WithSemaScopeProximity.evaluate(), Default.evaluate()); SymbolRelevanceSignals IndexProximate; IndexProximate.SymbolURI = "unittest:/foo/bar.h"; @@ -242,6 +256,35 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) { EXPECT_EQ(Instance.evaluate(), Default.evaluate()); } +TEST(QualityTests, ScopeProximity) { + SymbolRelevanceSignals Relevance; + ScopeDistance ScopeProximity({"x::y::z::", "x::", "llvm::", ""}); + Relevance.ScopeProximityMatch = &ScopeProximity; + + Relevance.SymbolScope = "other::"; + float NotMatched = Relevance.evaluate(); + + Relevance.SymbolScope = ""; + float Global = Relevance.evaluate(); + EXPECT_GT(Global, NotMatched); + + Relevance.SymbolScope = "llvm::"; + float NonParent = Relevance.evaluate(); + EXPECT_GT(NonParent, Global); + + Relevance.SymbolScope = "x::"; + float GrandParent = Relevance.evaluate(); + EXPECT_GT(GrandParent, Global); + + Relevance.SymbolScope = "x::y::"; + float Parent = Relevance.evaluate(); + EXPECT_GT(Parent, GrandParent); + + Relevance.SymbolScope = "x::y::z::"; + float Enclosing = Relevance.evaluate(); + EXPECT_GT(Enclosing, Parent); +} + TEST(QualityTests, SortText) { EXPECT_LT(sortText(std::numeric_limits::infinity()), sortText(1000.2f));