From d8ba6b4ab3eceb6bbcdf4371d4ffaab9d1a5cebe Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov <platonov.aleksandr@huawei.com>
Date: Tue, 29 Sep 2020 19:54:33 +0300
Subject: [PATCH] [clangd] findNearbyIdentifier(): guaranteed to give up after
 2^N lines

As @kadircet mentions in D84912#2184144, `findNearbyIdentifier()` traverses the whole file if there is no identifier for the word.
This patch ensures give up after 2^N lines in any case.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D87891
---
 clang-tools-extra/clangd/XRefs.cpp            | 29 ++++++++++++++-----
 .../clangd/unittests/XRefsTests.cpp           |  5 ++++
 2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 9e8791c2a765..9532e1418cca 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -562,19 +562,34 @@ const syntax::Token *findNearbyIdentifier(const SpelledWord &Word,
   auto Cost = [&](SourceLocation Loc) -> unsigned {
     assert(SM.getFileID(Loc) == File && "spelled token in wrong file?");
     unsigned Line = SM.getSpellingLineNumber(Loc);
-    if (Line > WordLine)
-      return 1 + llvm::Log2_64(Line - WordLine);
-    if (Line < WordLine)
-      return 2 + llvm::Log2_64(WordLine - Line);
-    return 0;
+    return Line >= WordLine ? Line - WordLine : 2 * (WordLine - Line);
   };
   const syntax::Token *BestTok = nullptr;
-  // Search bounds are based on word length: 2^N lines forward.
-  unsigned BestCost = Word.Text.size() + 1;
+  unsigned BestCost = -1;
+  // Search bounds are based on word length:
+  // - forward: 2^N lines
+  // - backward: 2^(N-1) lines.
+  unsigned MaxDistance =
+      1U << std::min<unsigned>(Word.Text.size(),
+                               std::numeric_limits<unsigned>::digits - 1);
+  // Line number for SM.translateLineCol() should be one-based, also
+  // SM.translateLineCol() can handle line number greater than
+  // number of lines in the file.
+  // - LineMin = max(1, WordLine + 1 - 2^(N-1))
+  // - LineMax = WordLine + 1 + 2^N
+  unsigned LineMin =
+      WordLine + 1 <= MaxDistance / 2 ? 1 : WordLine + 1 - MaxDistance / 2;
+  unsigned LineMax = WordLine + 1 + MaxDistance;
+  SourceLocation LocMin = SM.translateLineCol(File, LineMin, 1);
+  assert(LocMin.isValid());
+  SourceLocation LocMax = SM.translateLineCol(File, LineMax, 1);
+  assert(LocMax.isValid());
 
   // Updates BestTok and BestCost if Tok is a good candidate.
   // May return true if the cost is too high for this token.
   auto Consider = [&](const syntax::Token &Tok) {
+    if (Tok.location() < LocMin || Tok.location() > LocMax)
+      return true; // we are too far from the word, break the outer loop.
     if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word.Text))
       return false;
     // No point guessing the same location we started with.
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index a48bb9c8c182..40637b5fa758 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -1428,6 +1428,11 @@ TEST(LocateSymbol, NearbyIdentifier) {
 
 
       // h^i
+
+
+
+
+      int x = hi;
     )cpp",
       R"cpp(
       // prefer nearest occurrence even if several matched tokens