[clangd] Support textDocument/semanticTokens/edits

Summary: This returns incremental highlights as a set of edits against the previous highlights. Server-side, we compute the full set of highlights, this just saves wire-format size. For now, the diff used is trivial: everything from the first change to the last change is sent as a single edit. The wire format is grungy - the replacement offset/length refer to positions in the encoded array instead of the logical list of tokens. We use token-oriented structs and translating to LSP forms when serializing. This departs from LSP (but is consistent with semanticTokens today). Tested in VSCode insiders (with a patched client to enable experimental features). Reviewers: hokein Subscribers: ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77225
2020-04-01 16:21:44 +02:00 · 2020-04-01 16:21:44 +02:00 · 9e3063eace
parent fc830106e1
commit 9e3063eace
9 changed files with 298 additions and 42 deletions
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@ -586,7 +586,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
             }},
            {"semanticTokensProvider",
             llvm::json::Object{
-                 {"documentProvider", true},
+                 {"documentProvider", llvm::json::Object{{"edits", true}}},
                 {"rangeProvider", false},
                 {"legend",
                  llvm::json::Object{{"tokenTypes", semanticTokenTypes()},
@ -833,6 +833,10 @@ void ClangdLSPServer::onDocumentDidClose(
    std::lock_guard<std::mutex> HLock(HighlightingsMutex);
    FileToHighlightings.erase(File);
  }
+  {
+    std::lock_guard<std::mutex> HLock(SemanticTokensMutex);
+    LastSemanticTokens.erase(File);
+  }
  // clangd will not send updates for this file anymore, so we empty out the
  // list of diagnostics shown on the client (e.g. in the "Problems" pane of
  // VSCode). Note that this cannot race with actual diagnostics responses
@ -1243,16 +1247,71 @@ void ClangdLSPServer::onDocumentLink(
      });
 }

+// Increment a numeric string: "" -> 1 -> 2 -> ... -> 9 -> 10 -> 11 ...
+static void increment(std::string &S) {
+  for (char &C : llvm::reverse(S)) {
+    if (C != '9') {
+      ++C;
+      return;
+    }
+    C = '0';
+  }
+  S.insert(S.begin(), '1');
+}
+
 void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params,
                                       Callback<SemanticTokens> CB) {
  Server->semanticHighlights(
      Params.textDocument.uri.file(),
-      [CB(std::move(CB))](
-          llvm::Expected<std::vector<HighlightingToken>> Toks) mutable {
-        if (!Toks)
-          return CB(Toks.takeError());
+      [this, File(Params.textDocument.uri.file().str()), CB(std::move(CB))](
+          llvm::Expected<std::vector<HighlightingToken>> HT) mutable {
+        if (!HT)
+          return CB(HT.takeError());
        SemanticTokens Result;
-        Result.data = toSemanticTokens(*Toks);
+        Result.tokens = toSemanticTokens(*HT);
+        {
+          std::lock_guard<std::mutex> Lock(SemanticTokensMutex);
+          auto& Last = LastSemanticTokens[File];
+
+          Last.tokens = Result.tokens;
+          increment(Last.resultId);
+          Result.resultId = Last.resultId;
+        }
+        CB(std::move(Result));
+      });
+}
+
+void ClangdLSPServer::onSemanticTokensEdits(
+    const SemanticTokensEditsParams &Params,
+    Callback<SemanticTokensOrEdits> CB) {
+  Server->semanticHighlights(
+      Params.textDocument.uri.file(),
+      [this, PrevResultID(Params.previousResultId),
+       File(Params.textDocument.uri.file().str()), CB(std::move(CB))](
+          llvm::Expected<std::vector<HighlightingToken>> HT) mutable {
+        if (!HT)
+          return CB(HT.takeError());
+        std::vector<SemanticToken> Toks = toSemanticTokens(*HT);
+
+        SemanticTokensOrEdits Result;
+        {
+          std::lock_guard<std::mutex> Lock(SemanticTokensMutex);
+          auto& Last = LastSemanticTokens[File];
+
+          if (PrevResultID == Last.resultId) {
+            Result.edits = diffTokens(Last.tokens, Toks);
+          } else {
+            vlog("semanticTokens/edits: wanted edits vs {0} but last result "
+                 "had ID {1}. Returning full token list.",
+                 PrevResultID, Last.resultId);
+            Result.tokens = Toks;
+          }
+
+          Last.tokens = std::move(Toks);
+          increment(Last.resultId);
+          Result.resultId = Last.resultId;
+        }
+
        CB(std::move(Result));
      });
 }
@ -1305,6 +1364,7 @@ ClangdLSPServer::ClangdLSPServer(
  MsgHandler->bind("textDocument/selectionRange", &ClangdLSPServer::onSelectionRange);
  MsgHandler->bind("textDocument/documentLink", &ClangdLSPServer::onDocumentLink);
  MsgHandler->bind("textDocument/semanticTokens", &ClangdLSPServer::onSemanticTokens);
+  MsgHandler->bind("textDocument/semanticTokens/edits", &ClangdLSPServer::onSemanticTokensEdits);
  // clang-format on
 }

--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@ -120,6 +120,8 @@ private:
  void onDocumentLink(const DocumentLinkParams &,
                      Callback<std::vector<DocumentLink>>);
  void onSemanticTokens(const SemanticTokensParams &, Callback<SemanticTokens>);
+  void onSemanticTokensEdits(const SemanticTokensEditsParams &,
+                             Callback<SemanticTokensOrEdits>);

  std::vector<Fix> getFixes(StringRef File, const clangd::Diagnostic &D);

@ -162,6 +164,9 @@ private:
  llvm::StringMap<DiagnosticToReplacementMap> FixItsMap;
  std::mutex HighlightingsMutex;
  llvm::StringMap<std::vector<HighlightingToken>> FileToHighlightings;
+  // Last semantic-tokens response, for incremental requests.
+  std::mutex SemanticTokensMutex;
+  llvm::StringMap<SemanticTokens> LastSemanticTokens;

  // Most code should not deal with Transport directly.
  // MessageHandler deals with incoming messages, use call() etc for outgoing.
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@ -986,21 +986,45 @@ llvm::json::Value toJSON(const FileStatus &FStatus) {
  };
 }

-void SemanticToken::encode(std::vector<unsigned int> &Out) const {
-  Out.push_back(deltaLine);
-  Out.push_back(deltaStart);
-  Out.push_back(length);
-  Out.push_back(tokenType);
-  Out.push_back(tokenModifiers);
+constexpr unsigned SemanticTokenEncodingSize = 5;
+static llvm::json::Value encodeTokens(llvm::ArrayRef<SemanticToken> Toks) {
+  llvm::json::Array Result;
+  for (const auto &Tok : Toks) {
+    Result.push_back(Tok.deltaLine);
+    Result.push_back(Tok.deltaStart);
+    Result.push_back(Tok.length);
+    Result.push_back(Tok.tokenType);
+    Result.push_back(Tok.tokenModifiers);
+  }
+  assert(Result.size() == SemanticTokenEncodingSize * Toks.size());
+  return Result;
+}
+
+bool operator==(const SemanticToken &L, const SemanticToken &R) {
+  return std::tie(L.deltaLine, L.deltaStart, L.length, L.tokenType,
+                  L.tokenModifiers) == std::tie(R.deltaLine, R.deltaStart,
+                                                R.length, R.tokenType,
+                                                R.tokenModifiers);
 }

 llvm::json::Value toJSON(const SemanticTokens &Tokens) {
-  std::vector<unsigned> Data;
-  for (const auto &Tok : Tokens.data)
-    Tok.encode(Data);
-  llvm::json::Object Result{{"data", std::move(Data)}};
-  if (Tokens.resultId)
-    Result["resultId"] = *Tokens.resultId;
+  return llvm::json::Object{{"resultId", Tokens.resultId},
+                            {"data", encodeTokens(Tokens.tokens)}};
+}
+
+llvm::json::Value toJSON(const SemanticTokensEdit &Edit) {
+  return llvm::json::Object{
+      {"start", SemanticTokenEncodingSize * Edit.startToken},
+      {"deleteCount", SemanticTokenEncodingSize * Edit.deleteTokens},
+      {"data", encodeTokens(Edit.tokens)}};
+}
+
+llvm::json::Value toJSON(const SemanticTokensOrEdits &TE) {
+  llvm::json::Object Result{{"resultId", TE.resultId}};
+  if (TE.edits)
+    Result["edits"] = *TE.edits;
+  if (TE.tokens)
+    Result["data"] = encodeTokens(*TE.tokens);
  return Result;
 }

@ -1009,6 +1033,12 @@ bool fromJSON(const llvm::json::Value &Params, SemanticTokensParams &R) {
  return O && O.map("textDocument", R.textDocument);
 }

+bool fromJSON(const llvm::json::Value &Params, SemanticTokensEditsParams &R) {
+  llvm::json::ObjectMapper O(Params);
+  return O && O.map("textDocument", R.textDocument) &&
+         O.map("previousResultId", R.previousResultId);
+}
+
 llvm::raw_ostream &operator<<(llvm::raw_ostream &O,
                              const DocumentHighlight &V) {
  O << V.range;
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@ -1362,9 +1362,8 @@ struct SemanticToken {
  unsigned tokenType = 0;
  /// each set bit will be looked up in `SemanticTokensLegend.tokenModifiers`
  unsigned tokenModifiers = 0;
-
-  void encode(std::vector<unsigned> &Out) const;
 };
+bool operator==(const SemanticToken &, const SemanticToken &);

 /// A versioned set of tokens.
 struct SemanticTokens {
@ -1372,12 +1371,12 @@ struct SemanticTokens {
  // the client will include the result id in the next semantic token request.
  // A server can then instead of computing all semantic tokens again simply
  // send a delta.
-  llvm::Optional<std::string> resultId;
+  std::string resultId;

  /// The actual tokens. For a detailed description about how the data is
  /// structured pls see
  /// https://github.com/microsoft/vscode-extension-samples/blob/5ae1f7787122812dcc84e37427ca90af5ee09f14/semantic-tokens-sample/vscode.proposed.d.ts#L71
-  std::vector<SemanticToken> data;
+  std::vector<SemanticToken> tokens;
 };
 llvm::json::Value toJSON(const SemanticTokens &);

@ -1387,6 +1386,37 @@ struct SemanticTokensParams {
 };
 bool fromJSON(const llvm::json::Value &, SemanticTokensParams &);

+/// Requests the changes in semantic tokens since a previous response.
+struct SemanticTokensEditsParams {
+  /// The text document.
+  TextDocumentIdentifier textDocument;
+  /// The previous result id.
+  std::string previousResultId;
+};
+bool fromJSON(const llvm::json::Value &Params, SemanticTokensEditsParams &R);
+
+/// Describes a a replacement of a contiguous range of semanticTokens.
+struct SemanticTokensEdit {
+  // LSP specifies `start` and `deleteCount` which are relative to the array
+  // encoding of the previous tokens.
+  // We use token counts instead, and translate when serializing this struct.
+  unsigned startToken = 0;
+  unsigned deleteTokens = 0;
+  std::vector<SemanticToken> tokens;
+};
+llvm::json::Value toJSON(const SemanticTokensEdit &);
+
+/// This models LSP SemanticTokensEdits | SemanticTokens, which is the result of
+/// textDocument/semanticTokens/edits.
+struct SemanticTokensOrEdits {
+  std::string resultId;
+  /// Set if we computed edits relative to a previous set of tokens.
+  llvm::Optional<std::vector<SemanticTokensEdit>> edits;
+  /// Set if we computed a fresh set of tokens.
+  llvm::Optional<std::vector<SemanticToken>> tokens;
+};
+llvm::json::Value toJSON(const SemanticTokensOrEdits &);
+
 /// Represents a semantic highlighting information that has to be applied on a
 /// specific line of the text document.
 struct TheiaSemanticHighlightingInformation {
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@ -600,5 +600,31 @@ llvm::StringRef toTextMateScope(HighlightingKind Kind) {
  llvm_unreachable("unhandled HighlightingKind");
 }

+std::vector<SemanticTokensEdit>
+diffTokens(llvm::ArrayRef<SemanticToken> Old,
+           llvm::ArrayRef<SemanticToken> New) {
+  // For now, just replace everything from the first-last modification.
+  // FIXME: use a real diff instead, this is bad with include-insertion.
+
+  unsigned Offset = 0;
+  while (!Old.empty() && !New.empty() && Old.front() == New.front()) {
+    ++Offset;
+    Old = Old.drop_front();
+    New = New.drop_front();
+  }
+  while (!Old.empty() && !New.empty() && Old.back() == New.back()) {
+    Old = Old.drop_back();
+    New = New.drop_back();
+  }
+
+  if (Old.empty() && New.empty())
+    return {};
+  SemanticTokensEdit Edit;
+  Edit.startToken = Offset;
+  Edit.deleteTokens = Old.size();
+  Edit.tokens = New;
+  return {std::move(Edit)};
+}
+
 } // namespace clangd
 } // namespace clang
--- a/clang-tools-extra/clangd/SemanticHighlighting.h
+++ b/clang-tools-extra/clangd/SemanticHighlighting.h
@ -90,6 +90,8 @@ std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST);

 std::vector<SemanticToken> toSemanticTokens(llvm::ArrayRef<HighlightingToken>);
 llvm::StringRef toSemanticTokenType(HighlightingKind Kind);
+std::vector<SemanticTokensEdit> diffTokens(llvm::ArrayRef<SemanticToken> Before,
+                                           llvm::ArrayRef<SemanticToken> After);

 /// Converts a HighlightingKind to a corresponding TextMate scope
 /// (https://manual.macromates.com/en/language_grammars).
--- a/clang-tools-extra/clangd/test/initialize-params.test
+++ b/clang-tools-extra/clangd/test/initialize-params.test
@ -39,7 +39,9 @@
 # CHECK-NEXT:      "renameProvider": true,
 # CHECK-NEXT:      "selectionRangeProvider": true,
 # CHECK-NEXT:      "semanticTokensProvider": {
-# CHECK-NEXT:        "documentProvider": true,
+# CHECK-NEXT:        "documentProvider": {
+# CHECK-NEXT:          "edits": true
+# CHECK-NEXT:        },
 # CHECK-NEXT:        "legend": {
 # CHECK-NEXT:          "tokenModifiers": [],
 # CHECK-NEXT:          "tokenTypes": [
--- a/clang-tools-extra/clangd/test/semantic-tokens.test
+++ b/clang-tools-extra/clangd/test/semantic-tokens.test
@ -6,8 +6,13 @@
  "semanticTokens":{"dynamicRegistration":true}
 }}}}
 ---
-{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///foo.cpp","languageId":"cpp","text":"int x = 2;"}}}
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{
+  "uri": "test:///foo.cpp",
+  "languageId": "cpp",
+  "text": "int x = 2;"
+}}}
 ---
+# Non-incremental token request.
 {"jsonrpc":"2.0","id":1,"method":"textDocument/semanticTokens","params":{"textDocument":{"uri":"test:///foo.cpp"}}}
 # CHECK:       "id": 1,
 # CHECK-NEXT:  "jsonrpc": "2.0",
@ -19,9 +24,64 @@
 # CHECK-NEXT:      1,
 # CHECK-NEXT:      0,
 # CHECK-NEXT:      0
-# CHECK-NEXT:    ]
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "resultId": "1"
 # CHECK-NEXT:  }
 ---
-{"jsonrpc":"2.0","id":2,"method":"shutdown"}
+{"jsonrpc":"2.0","method":"textDocument/didChange","params":{
+  "textDocument": {"uri":"test:///foo.cpp","version":2},
+  "contentChanges":[{"text":"int x = 2;\nint y = 3;"}]
+}}
+---
+# Incremental token request, based on previous response.
+{"jsonrpc":"2.0","id":2,"method":"textDocument/semanticTokens/edits","params":{
+  "textDocument": {"uri":"test:///foo.cpp"},
+  "previousResultId": "1"
+}}
+# CHECK:       "id": 2,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "edits": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "data": [
+#                      Next line, char 5, variable, no modifiers
+# CHECK-NEXT:          1,
+# CHECK-NEXT:          4,
+# CHECK-NEXT:          1,
+# CHECK-NEXT:          0,
+# CHECK-NEXT:          0
+# CHECK-NEXT:        ],
+#                    Inserted at position 1
+# CHECK-NEXT:        "deleteCount": 0,
+# CHECK-NEXT:        "start": 5
+# CHECK-NEXT:      }
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "resultId": "2"
+# CHECK-NEXT:  }
+---
+# Incremental token request with incorrect baseline => full tokens list.
+{"jsonrpc":"2.0","id":2,"method":"textDocument/semanticTokens/edits","params":{
+  "textDocument": {"uri":"test:///foo.cpp"},
+  "previousResultId": "bogus"
+}}
+# CHECK:       "id": 2,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "data": [
+# CHECK-NEXT:      0,
+# CHECK-NEXT:      4,
+# CHECK-NEXT:      1,
+# CHECK-NEXT:      0,
+# CHECK-NEXT:      0,
+# CHECK-NEXT:      1,
+# CHECK-NEXT:      4,
+# CHECK-NEXT:      1,
+# CHECK-NEXT:      0,
+# CHECK-NEXT:      0
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "resultId": "3"
+# CHECK-NEXT:  }
+---
+{"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@ -14,8 +14,10 @@
 #include "TestFS.h"
 #include "TestTU.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include <algorithm>

@ -23,6 +25,9 @@ namespace clang {
 namespace clangd {
 namespace {

+using testing::IsEmpty;
+using testing::SizeIs;
+
 MATCHER_P(LineNumber, L, "") { return arg.Line == L; }
 MATCHER(EmptyHighlightings, "") { return arg.Tokens.empty(); }

@ -720,25 +725,29 @@ TEST(SemanticHighlighting, GeneratesHighlightsWhenFileChange) {
  ASSERT_EQ(Counter.Count, 1);
 }

+// Ranges are highlighted as variables, unless highlighted as $Function etc.
+std::vector<HighlightingToken> tokens(llvm::StringRef MarkedText) {
+  Annotations A(MarkedText);
+  std::vector<HighlightingToken> Results;
+  for (const Range& R : A.ranges())
+    Results.push_back({HighlightingKind::Variable, R});
+  for (unsigned I = 0; I < static_cast<unsigned>(HighlightingKind::LastKind); ++I) {
+    HighlightingKind Kind = static_cast<HighlightingKind>(I);
+    for (const Range& R : A.ranges(llvm::to_string(Kind)))
+      Results.push_back({Kind, R});
+  }
+  llvm::sort(Results);
+  return Results;
+}
+
 TEST(SemanticHighlighting, toSemanticTokens) {
-  auto CreatePosition = [](int Line, int Character) -> Position {
-    Position Pos;
-    Pos.line = Line;
-    Pos.character = Character;
-    return Pos;
-  };
+  auto Results = toSemanticTokens(tokens(R"(
+ [[blah]]

-  std::vector<HighlightingToken> Tokens = {
-      {HighlightingKind::Variable,
-       Range{CreatePosition(1, 1), CreatePosition(1, 5)}},
-      {HighlightingKind::Function,
-       Range{CreatePosition(3, 4), CreatePosition(3, 7)}},
-      {HighlightingKind::Variable,
-       Range{CreatePosition(3, 8), CreatePosition(3, 12)}},
-  };
+    $Function[[big]] [[bang]]
+  )"));

-  std::vector<SemanticToken> Results = toSemanticTokens(Tokens);
-  EXPECT_EQ(Tokens.size(), Results.size());
+  ASSERT_THAT(Results, SizeIs(3));
  EXPECT_EQ(Results[0].tokenType, unsigned(HighlightingKind::Variable));
  EXPECT_EQ(Results[0].deltaLine, 1u);
  EXPECT_EQ(Results[0].deltaStart, 1u);
@ -755,6 +764,38 @@ TEST(SemanticHighlighting, toSemanticTokens) {
  EXPECT_EQ(Results[2].length, 4u);
 }

+TEST(SemanticHighlighting, diffSemanticTokens) {
+  auto Before = toSemanticTokens(tokens(R"(
+    [[foo]] [[bar]] [[baz]]
+    [[one]] [[two]] [[three]]
+  )"));
+  EXPECT_THAT(diffTokens(Before, Before), IsEmpty());
+
+  auto After = toSemanticTokens(tokens(R"(
+    [[foo]] [[hello]] [[world]] [[baz]]
+    [[one]] [[two]] [[three]]
+  )"));
+
+  // Replace [bar, baz] with [hello, world, baz]
+  auto Diff = diffTokens(Before, After);
+  ASSERT_THAT(Diff, SizeIs(1));
+  EXPECT_EQ(1u, Diff.front().startToken);
+  EXPECT_EQ(2u, Diff.front().deleteTokens);
+  ASSERT_THAT(Diff.front().tokens, SizeIs(3));
+  // hello
+  EXPECT_EQ(0u, Diff.front().tokens[0].deltaLine);
+  EXPECT_EQ(4u, Diff.front().tokens[0].deltaStart);
+  EXPECT_EQ(5u, Diff.front().tokens[0].length);
+  // world
+  EXPECT_EQ(0u, Diff.front().tokens[1].deltaLine);
+  EXPECT_EQ(6u, Diff.front().tokens[1].deltaStart);
+  EXPECT_EQ(5u, Diff.front().tokens[1].length);
+  // baz
+  EXPECT_EQ(0u, Diff.front().tokens[2].deltaLine);
+  EXPECT_EQ(6u, Diff.front().tokens[2].deltaStart);
+  EXPECT_EQ(3u, Diff.front().tokens[2].length);
+}
+
 TEST(SemanticHighlighting, toTheiaSemanticHighlightingInformation) {
  auto CreatePosition = [](int Line, int Character) -> Position {
    Position Pos;