[clangd] Implement getBeginning for overloaded operators.

Summary: This will fix some bugs where navigation doesn't work on cases like `std::cout <^< "hello"`. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67695 llvm-svn: 373323
2019-10-01 11:03:56 +00:00 · 2019-10-01 11:03:56 +00:00 · 9f2bf666bc
parent 339b1b5bb0
commit 9f2bf666bc
4 changed files with 113 additions and 20 deletions
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@ -237,6 +237,45 @@ llvm::Optional<Range> getTokenRange(const SourceManager &SM,
  return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
 }

+namespace {
+
+enum TokenFlavor { Identifier, Operator, Whitespace, Other };
+
+bool isOverloadedOperator(const Token &Tok) {
+  switch (Tok.getKind()) {
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly)     \
+  case tok::Token:
+#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly)
+#include "clang/Basic/OperatorKinds.def"
+    return true;
+
+  default:
+    break;
+  }
+  return false;
+}
+
+TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM,
+                           const LangOptions &LangOpts) {
+  Token Tok;
+  Tok.setKind(tok::NUM_TOKENS);
+  if (Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+                         /*IgnoreWhiteSpace*/ false))
+    return Other;
+
+  // getRawToken will return false without setting Tok when the token is
+  // whitespace, so if the flag is not set, we are sure this is a whitespace.
+  if (Tok.is(tok::TokenKind::NUM_TOKENS))
+    return Whitespace;
+  if (Tok.is(tok::TokenKind::raw_identifier))
+    return Identifier;
+  if (isOverloadedOperator(Tok))
+    return Operator;
+  return Other;
+}
+
+} // namespace
+
 SourceLocation getBeginningOfIdentifier(const Position &Pos,
                                        const SourceManager &SM,
                                        const LangOptions &LangOpts) {
@ -247,27 +286,57 @@ SourceLocation getBeginningOfIdentifier(const Position &Pos,
    return SourceLocation();
  }

-  // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
-  // if the cursor is at the end of the identifier.
-  // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
-  //  1) at the beginning of an identifier, we'll be looking at something
-  //  that isn't an identifier.
-  //  2) at the middle or end of an identifier, we get the identifier.
-  //  3) anywhere outside an identifier, we'll get some non-identifier thing.
-  // We can't actually distinguish cases 1 and 3, but returning the original
-  // location is correct for both!
+  // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
+  // thing if the cursor is at the end of the token (identifier or operator).
+  // The cases are:
+  //   1) at the beginning of the token
+  //   2) at the middle of the token
+  //   3) at the end of the token
+  //   4) anywhere outside the identifier or operator
+  // To distinguish all cases, we lex both at the
+  // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
+  // cases 1 and 4, we just return the original location.
  SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset);
-  if (*Offset == 0) // Case 1 or 3.
+  if (*Offset == 0) // Case 1 or 4.
    return InputLoc;
  SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1);
+  SourceLocation BeforeTokBeginning =
+      Lexer::GetBeginningOfToken(Before, SM, LangOpts);
+  TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts);

-  Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts);
-  Token Tok;
-  if (Before.isValid() &&
-      !Lexer::getRawToken(Before, Tok, SM, LangOpts, false) &&
-      Tok.is(tok::raw_identifier))
-    return Before; // Case 2.
-  return InputLoc; // Case 1 or 3.
+  SourceLocation CurrentTokBeginning =
+      Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts);
+  TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts);
+
+  // At the middle of the token.
+  if (BeforeTokBeginning == CurrentTokBeginning) {
+    // For interesting token, we return the beginning of the token.
+    if (CurrentKind == Identifier || CurrentKind == Operator)
+      return CurrentTokBeginning;
+    // otherwise, we return the original loc.
+    return InputLoc;
+  }
+
+  // Whitespace is not interesting.
+  if (BeforeKind == Whitespace)
+    return CurrentTokBeginning;
+  if (CurrentKind == Whitespace)
+    return BeforeTokBeginning;
+
+  // The cursor is at the token boundary, e.g. "Before^Current", we prefer
+  // identifiers to other tokens.
+  if (CurrentKind == Identifier)
+    return CurrentTokBeginning;
+  if (BeforeKind == Identifier)
+    return BeforeTokBeginning;
+  // Then prefer overloaded operators to other tokens.
+  if (CurrentKind == Operator)
+    return CurrentTokBeginning;
+  if (BeforeKind == Operator)
+    return BeforeTokBeginning;
+
+  // Non-interesting case, we just return the original location.
+  return InputLoc;
 }

 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@ -79,7 +79,7 @@ llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
                                                        Position P);

 /// Get the beginning SourceLocation at a specified \p Pos in the main file.
-/// May be invalid if Pos is, or if there's no identifier.
+/// May be invalid if Pos is, or if there's no identifier or operators.
 /// The returned position is in the main file, callers may prefer to
 /// obtain the macro expansion location.
 SourceLocation getBeginningOfIdentifier(const Position &Pos,
--- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
@ -319,14 +319,29 @@ struct Bar { int func(); };
 Bar* bar;
  )cpp";
  // First ^ is the expected beginning, last is the search position.
-  for (std::string Text : std::vector<std::string>{
+  for (const std::string &Text : std::vector<std::string>{
           "int ^f^oo();", // inside identifier
           "int ^foo();",  // beginning of identifier
           "int ^foo^();", // end of identifier
           "int foo(^);",  // non-identifier
           "^int foo();",  // beginning of file (can't back up)
           "int ^f0^0();", // after a digit (lexing at N-1 is wrong)
-           "int ^λλ^λ();", // UTF-8 handled properly when backing up
+           "/^/ comments", // non-interesting token
+           "void f(int abc) { abc ^ ++; }",    // whitespace
+           "void f(int abc) { ^abc^++; }",     // range of identifier
+           "void f(int abc) { ++^abc^; }",     // range of identifier
+           "void f(int abc) { ++^abc; }",      // range of identifier
+           "void f(int abc) { ^+^+abc; }",     // range of operator
+           "void f(int abc) { ^abc^ ++; }",    // range of identifier
+           "void f(int abc) { abc ^++^; }",    // range of operator
+           "void f(int abc) { ^++^ abc; }",    // range of operator
+           "void f(int abc) { ++ ^abc^; }",    // range of identifier
+           "void f(int abc) { ^++^/**/abc; }", // range of operator
+           "void f(int abc) { ++/**/^abc; }",  // range of identifier
+           "void f(int abc) { ^abc^/**/++; }", // range of identifier
+           "void f(int abc) { abc/**/^++; }",  // range of operator
+           "void f() {^ }", // outside of identifier and operator
+           "int ^λλ^λ();",  // UTF-8 handled properly when backing up

           // identifier in macro arg
           "MACRO(bar->^func())",  // beginning of identifier
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@ -441,6 +441,15 @@ TEST(LocateSymbol, All) {
          auto x = m^akeX();
        }
      )cpp",
+
+      R"cpp(
+        struct X {
+          X& [[operator]]++() {}
+        };
+        void foo(X& x) {
+          +^+x;
+        }
+      )cpp",
  };
  for (const char *Test : Tests) {
    Annotations T(Test);