forked from OSchip/llvm-project
[clangd] Boost code completion results that were named in the last few lines.
Summary: The hope is this will catch a few patterns with repetition: SomeClass* S = ^SomeClass::Create() int getFrobnicator() { return ^frobnicator_; } // discard the factory, it's no longer valid. ^MyFactory.reset(); Without triggering antipatterns too often: return Point(x.first, x.^second); I'm going to gather some data on whether this turns out to be a win overall. Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, jfb, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D61537 llvm-svn: 360030
This commit is contained in:
parent
4c3d579096
commit
9fb22b2c86
|
@ -54,7 +54,9 @@
|
|||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
|
@ -1215,6 +1217,7 @@ class CodeCompleteFlow {
|
|||
llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
|
||||
// Whether to query symbols from any scope. Initialized once Sema runs.
|
||||
bool AllScopes = false;
|
||||
llvm::StringSet<> ContextWords;
|
||||
// Include-insertion and proximity scoring rely on the include structure.
|
||||
// This is available after Sema has run.
|
||||
llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema.
|
||||
|
@ -1237,6 +1240,7 @@ public:
|
|||
trace::Span Tracer("CodeCompleteFlow");
|
||||
HeuristicPrefix =
|
||||
guessCompletionPrefix(SemaCCInput.Contents, SemaCCInput.Offset);
|
||||
populateContextWords(SemaCCInput.Contents);
|
||||
if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
|
||||
assert(!SpecFuzzyFind->Result.valid());
|
||||
SpecReq = speculativeFuzzyFindRequestForCompletion(
|
||||
|
@ -1323,6 +1327,7 @@ public:
|
|||
trace::Span Tracer("CodeCompleteWithoutSema");
|
||||
// Fill in fields normally set by runWithSema()
|
||||
HeuristicPrefix = guessCompletionPrefix(Content, Offset);
|
||||
populateContextWords(Content);
|
||||
CCContextKind = CodeCompletionContext::CCC_Recovery;
|
||||
Filter = FuzzyMatcher(HeuristicPrefix.Name);
|
||||
auto Pos = offsetToPosition(Content, Offset);
|
||||
|
@ -1380,6 +1385,24 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
void populateContextWords(llvm::StringRef Content) {
|
||||
// Take last 3 lines before the completion point.
|
||||
unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
|
||||
RangeBegin = RangeEnd;
|
||||
for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
|
||||
auto PrevNL = Content.rfind('\n', RangeBegin - 1);
|
||||
if (PrevNL == StringRef::npos) {
|
||||
RangeBegin = 0;
|
||||
break;
|
||||
}
|
||||
RangeBegin = PrevNL + 1;
|
||||
}
|
||||
|
||||
ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
|
||||
dlog("Completion context words: {0}",
|
||||
llvm::join(ContextWords.keys(), ", "));
|
||||
}
|
||||
|
||||
// This is called by run() once Sema code completion is done, but before the
|
||||
// Sema data structures are torn down. It does all the real work.
|
||||
CodeCompleteResult runWithSema() {
|
||||
|
@ -1563,12 +1586,14 @@ private:
|
|||
SymbolQualitySignals Quality;
|
||||
SymbolRelevanceSignals Relevance;
|
||||
Relevance.Context = CCContextKind;
|
||||
Relevance.Name = Bundle.front().Name;
|
||||
Relevance.Query = SymbolRelevanceSignals::CodeComplete;
|
||||
Relevance.FileProximityMatch = FileProximity.getPointer();
|
||||
if (ScopeProximity)
|
||||
Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
|
||||
if (PreferredType)
|
||||
Relevance.HadContextType = true;
|
||||
Relevance.ContextWords = &ContextWords;
|
||||
|
||||
auto &First = Bundle.front();
|
||||
if (auto FuzzyScore = fuzzyScore(First))
|
||||
|
|
|
@ -100,6 +100,7 @@ getWorkspaceSymbols(llvm::StringRef Query, int Limit,
|
|||
SymbolQualitySignals Quality;
|
||||
Quality.merge(Sym);
|
||||
SymbolRelevanceSignals Relevance;
|
||||
Relevance.Name = Sym.Name;
|
||||
Relevance.Query = SymbolRelevanceSignals::Generic;
|
||||
if (auto NameMatch = Filter.match(Sym.Name))
|
||||
Relevance.NameMatch = *NameMatch;
|
||||
|
|
|
@ -336,6 +336,15 @@ static float scopeBoost(ScopeDistance &Distance,
|
|||
return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0));
|
||||
}
|
||||
|
||||
static llvm::Optional<llvm::StringRef>
|
||||
wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
|
||||
if (ContextWords)
|
||||
for (const auto& Word : ContextWords->keys())
|
||||
if (Name.contains_lower(Word))
|
||||
return Word;
|
||||
return llvm::None;
|
||||
}
|
||||
|
||||
float SymbolRelevanceSignals::evaluate() const {
|
||||
float Score = 1;
|
||||
|
||||
|
@ -357,6 +366,9 @@ float SymbolRelevanceSignals::evaluate() const {
|
|||
Score *=
|
||||
SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
|
||||
|
||||
if (wordMatching(Name, ContextWords))
|
||||
Score *= 1.5;
|
||||
|
||||
// Symbols like local variables may only be referenced within their scope.
|
||||
// Conversely if we're in that scope, it's likely we'll reference them.
|
||||
if (Query == CodeComplete) {
|
||||
|
@ -413,7 +425,12 @@ float SymbolRelevanceSignals::evaluate() const {
|
|||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
||||
const SymbolRelevanceSignals &S) {
|
||||
OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate());
|
||||
OS << llvm::formatv("\tName: {0}\n", S.Name);
|
||||
OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
|
||||
if (S.ContextWords)
|
||||
OS << llvm::formatv(
|
||||
"\tMatching context word: {0}\n",
|
||||
wordMatching(S.Name, S.ContextWords).getValueOr("<none>"));
|
||||
OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
|
||||
OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
|
||||
OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
|
||||
|
|
|
@ -32,13 +32,14 @@
|
|||
#include "clang/Sema/CodeCompleteConsumer.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
class raw_ostream;
|
||||
}
|
||||
} // namespace llvm
|
||||
|
||||
namespace clang {
|
||||
class CodeCompletionResult;
|
||||
|
@ -84,8 +85,12 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
|||
|
||||
/// Attributes of a symbol-query pair that affect how much we like it.
|
||||
struct SymbolRelevanceSignals {
|
||||
/// The name of the symbol (for ContextWords). Must be explicitly assigned.
|
||||
llvm::StringRef Name;
|
||||
/// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
|
||||
float NameMatch = 1;
|
||||
/// Lowercase words relevant to the context (e.g. near the completion point).
|
||||
llvm::StringSet<>* ContextWords = nullptr;
|
||||
bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
|
||||
/// Whether fixits needs to be applied for that completion or not.
|
||||
bool NeedsFixIts = false;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "SourceCode.h"
|
||||
|
||||
#include "Context.h"
|
||||
#include "FuzzyMatch.h"
|
||||
#include "Logger.h"
|
||||
#include "Protocol.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
|
@ -18,6 +19,7 @@
|
|||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
@ -602,5 +604,43 @@ std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
|
|||
return Found;
|
||||
}
|
||||
|
||||
llvm::StringSet<> collectWords(llvm::StringRef Content) {
|
||||
// We assume short words are not significant.
|
||||
// We may want to consider other stopwords, e.g. language keywords.
|
||||
// (A very naive implementation showed no benefit, but lexing might do better)
|
||||
static constexpr int MinWordLength = 4;
|
||||
|
||||
std::vector<CharRole> Roles(Content.size());
|
||||
calculateRoles(Content, Roles);
|
||||
|
||||
llvm::StringSet<> Result;
|
||||
llvm::SmallString<256> Word;
|
||||
auto Flush = [&] {
|
||||
if (Word.size() >= MinWordLength) {
|
||||
for (char &C : Word)
|
||||
C = llvm::toLower(C);
|
||||
Result.insert(Word);
|
||||
}
|
||||
Word.clear();
|
||||
};
|
||||
for (unsigned I = 0; I < Content.size(); ++I) {
|
||||
switch (Roles[I]) {
|
||||
case Head:
|
||||
Flush();
|
||||
LLVM_FALLTHROUGH;
|
||||
case Tail:
|
||||
Word.push_back(Content[I]);
|
||||
break;
|
||||
case Unknown:
|
||||
case Separator:
|
||||
Flush();
|
||||
break;
|
||||
}
|
||||
}
|
||||
Flush();
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "clang/Basic/SourceManager.h"
|
||||
#include "clang/Format/Format.h"
|
||||
#include "clang/Tooling/Core/Replacement.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/SHA1.h"
|
||||
|
||||
|
@ -165,6 +166,13 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
|
|||
llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
|
||||
const format::FormatStyle &Style);
|
||||
|
||||
/// Collects words from the source code.
|
||||
/// Unlike collectIdentifiers:
|
||||
/// - also finds text in comments:
|
||||
/// - splits text into words
|
||||
/// - drops stopwords like "get" and "for"
|
||||
llvm::StringSet<> collectWords(llvm::StringRef Content);
|
||||
|
||||
/// Heuristically determine namespaces visible at a point, without parsing Code.
|
||||
/// This considers using-directives and enclosing namespace-declarations that
|
||||
/// are visible (and not obfuscated) in the file itself (not headers).
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
# CHECK-NEXT: "insertTextFormat": 1,
|
||||
# CHECK-NEXT: "kind": 5,
|
||||
# CHECK-NEXT: "label": " size",
|
||||
# CHECK-NEXT: "sortText": "3eacccccsize",
|
||||
# CHECK-NEXT: "sortText": "{{.*}}size",
|
||||
# CHECK-NEXT: "textEdit": {
|
||||
# CHECK-NEXT: "newText": "size",
|
||||
# CHECK-NEXT: "range": {
|
||||
|
@ -45,7 +45,7 @@
|
|||
# CHECK-NEXT: "insertTextFormat": 1,
|
||||
# CHECK-NEXT: "kind": 10,
|
||||
# CHECK-NEXT: "label": " default_capacity",
|
||||
# CHECK-NEXT: "sortText": "3fd70a3ddefault_capacity",
|
||||
# CHECK-NEXT: "sortText": "{{.*}}default_capacity",
|
||||
# CHECK-NEXT: "textEdit": {
|
||||
# CHECK-NEXT: "newText": "default_capacity",
|
||||
# CHECK-NEXT: "range": {
|
||||
|
@ -84,7 +84,7 @@
|
|||
# CHECK-NEXT: "insertTextFormat": 1,
|
||||
# CHECK-NEXT: "kind": 6,
|
||||
# CHECK-NEXT: "label": " ns_member",
|
||||
# CHECK-NEXT: "sortText": "3f2cccccns_member",
|
||||
# CHECK-NEXT: "sortText": "{{.*}}ns_member",
|
||||
# CHECK-NEXT: "textEdit": {
|
||||
# CHECK-NEXT: "newText": "ns_member",
|
||||
# CHECK-NEXT: "range": {
|
||||
|
|
|
@ -174,6 +174,7 @@ struct ClassWithMembers {
|
|||
int BBB();
|
||||
int CCC();
|
||||
};
|
||||
|
||||
int main() { ClassWithMembers().^ }
|
||||
)cpp",
|
||||
/*IndexSymbols=*/{}, Opts);
|
||||
|
@ -324,7 +325,7 @@ TEST(CompletionTest, CompletionOptions) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(CompletionTest, Priorities) {
|
||||
TEST(CompletionTest, Accessible) {
|
||||
auto Internal = completions(R"cpp(
|
||||
class Foo {
|
||||
public: void pub();
|
||||
|
@ -334,7 +335,7 @@ TEST(CompletionTest, Priorities) {
|
|||
void Foo::pub() { this->^ }
|
||||
)cpp");
|
||||
EXPECT_THAT(Internal.Completions,
|
||||
HasSubsequence(Named("priv"), Named("prot"), Named("pub")));
|
||||
AllOf(Has("priv"), Has("prot"), Has("pub")));
|
||||
|
||||
auto External = completions(R"cpp(
|
||||
class Foo {
|
||||
|
@ -502,6 +503,21 @@ TEST(CompletionTest, ReferencesAffectRanking) {
|
|||
HasSubsequence(Named("absl"), Named("absb")));
|
||||
}
|
||||
|
||||
TEST(CompletionTest, ContextWords) {
|
||||
auto Results = completions(R"cpp(
|
||||
enum class Color { RED, YELLOW, BLUE };
|
||||
|
||||
// (blank lines so the definition above isn't "context")
|
||||
|
||||
// "It was a yellow car," he said. "Big yellow car, new."
|
||||
auto Finish = Color::^
|
||||
)cpp");
|
||||
// Yellow would normally sort last (alphabetic).
|
||||
// But the recent mention shuold bump it up.
|
||||
ASSERT_THAT(Results.Completions,
|
||||
HasSubsequence(Named("YELLOW"), Named("BLUE")));
|
||||
}
|
||||
|
||||
TEST(CompletionTest, GlobalQualified) {
|
||||
auto Results = completions(
|
||||
R"cpp(
|
||||
|
|
|
@ -292,6 +292,16 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) {
|
|||
SymbolRelevanceSignals InBaseClass;
|
||||
InBaseClass.InBaseClass = true;
|
||||
EXPECT_LT(InBaseClass.evaluate(), Default.evaluate());
|
||||
|
||||
llvm::StringSet<> Words = {"one", "two", "three"};
|
||||
SymbolRelevanceSignals WithoutMatchingWord;
|
||||
WithoutMatchingWord.ContextWords = &Words;
|
||||
WithoutMatchingWord.Name = "four";
|
||||
EXPECT_EQ(WithoutMatchingWord.evaluate(), Default.evaluate());
|
||||
SymbolRelevanceSignals WithMatchingWord;
|
||||
WithMatchingWord.ContextWords = &Words;
|
||||
WithMatchingWord.Name = "TheTwoTowers";
|
||||
EXPECT_GT(WithMatchingWord.evaluate(), Default.evaluate());
|
||||
}
|
||||
|
||||
TEST(QualityTests, ScopeProximity) {
|
||||
|
|
|
@ -22,6 +22,7 @@ namespace {
|
|||
|
||||
using llvm::Failed;
|
||||
using llvm::HasValue;
|
||||
using ::testing::UnorderedElementsAreArray;
|
||||
|
||||
MATCHER_P2(Pos, Line, Col, "") {
|
||||
return arg.line == int(Line) && arg.character == int(Col);
|
||||
|
@ -322,6 +323,19 @@ TEST(SourceCodeTests, CollectIdentifiers) {
|
|||
EXPECT_EQ(IDs["foo"], 2u);
|
||||
}
|
||||
|
||||
TEST(SourceCodeTests, CollectWords) {
|
||||
auto Words = collectWords(R"cpp(
|
||||
#define FIZZ_BUZZ
|
||||
// this is a comment
|
||||
std::string getSomeText() { return "magic word"; }
|
||||
)cpp");
|
||||
std::set<std::string> ActualWords(Words.keys().begin(), Words.keys().end());
|
||||
std::set<std::string> ExpectedWords = {"define", "fizz", "buzz", "this",
|
||||
"comment", "string", "some", "text",
|
||||
"return", "magic", "word"};
|
||||
EXPECT_EQ(ActualWords, ExpectedWords);
|
||||
}
|
||||
|
||||
TEST(SourceCodeTests, VisibleNamespaces) {
|
||||
std::vector<std::pair<const char *, std::vector<std::string>>> Cases = {
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue