Re-apply r357823 "[Lexer] NFC: Fix an off-by-one bug in getAsCharRange()."

It now comes with a follow-up fix for the clients of this API in clangd and clang-tidy. Differential Revision: https://reviews.llvm.org/D59977 llvm-svn: 359035
2019-04-23 21:15:26 +00:00 · 2019-04-23 21:15:26 +00:00 · 8c099ce72d
parent fc79ab9857
commit 8c099ce72d
6 changed files with 31 additions and 4 deletions
--- a/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp
@ -39,6 +39,7 @@ static bool alreadyConcatenated(std::size_t NumCandidates,
                                const SourceRange &ReplacementRange,
                                const SourceManager &Sources,
                                const LangOptions &LangOpts) {
+  // FIXME: This logic breaks when there is a comment with ':'s in the middle.
  CharSourceRange TextRange =
      Lexer::getAsCharRange(ReplacementRange, Sources, LangOpts);
  StringRef CurrentNamespacesText =
--- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp
@ -102,11 +102,14 @@ void NamespaceCommentCheck::check(const MatchFinder::MatchResult &Result) {
    }
  }

+  // FIXME: This probably breaks on comments between the namespace and its '{'.
  auto TextRange =
      Lexer::getAsCharRange(SourceRange(NestedNamespaceBegin, LBracketLocation),
                            Sources, getLangOpts());
  StringRef NestedNamespaceName =
-      Lexer::getSourceText(TextRange, Sources, getLangOpts()).rtrim();
+      Lexer::getSourceText(TextRange, Sources, getLangOpts())
+          .rtrim('{') // Drop the { itself.
+          .rtrim();   // Drop any whitespace before it.
  bool IsNested = NestedNamespaceName.contains(':');

  if (IsNested)
--- a/clang-tools-extra/unittests/clangd/SelectionTests.cpp
+++ b/clang-tools-extra/unittests/clangd/SelectionTests.cpp
@ -45,7 +45,7 @@ Range nodeRange(const SelectionTree::Node *N, ParsedAST &AST) {
  CharSourceRange R =
      Lexer::getAsCharRange(SR, SM, AST.getASTContext().getLangOpts());
  return Range{offsetToPosition(Buffer, SM.getFileOffset(R.getBegin())),
-               offsetToPosition(Buffer, SM.getFileOffset(R.getEnd()) + 1)};
+               offsetToPosition(Buffer, SM.getFileOffset(R.getEnd()))};
 }

 std::string nodeKind(const SelectionTree::Node *N) {
--- a/clang/include/clang/Basic/PlistSupport.h
+++ b/clang/include/clang/Basic/PlistSupport.h
@ -127,7 +127,11 @@ inline void EmitRange(raw_ostream &o, const SourceManager &SM,
  assert(R.isCharRange() && "cannot handle a token range");
  Indent(o, indent) << "<array>\n";
  EmitLocation(o, SM, R.getBegin(), FM, indent + 1);
-  EmitLocation(o, SM, R.getEnd(), FM, indent + 1);
+
+  // The ".getLocWithOffset(-1)" emulates the behavior of an off-by-one bug
+  // in Lexer that is already fixed. It is here for backwards compatibility
+  // even though it is incorrect.
+  EmitLocation(o, SM, R.getEnd().getLocWithOffset(-1), FM, indent + 1);
  Indent(o, indent) << "</array>\n";
 }

--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@ -382,7 +382,7 @@ public:
    SourceLocation End = getLocForEndOfToken(Range.getEnd(), 0, SM, LangOpts);
    return End.isInvalid() ? CharSourceRange()
                           : CharSourceRange::getCharRange(
-                                 Range.getBegin(), End.getLocWithOffset(-1));
+                                 Range.getBegin(), End);
  }
  static CharSourceRange getAsCharRange(CharSourceRange Range,
                                        const SourceManager &SM,
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@ -513,4 +513,23 @@ TEST_F(LexerTest, StringizingRasString) {
  EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
 }

+TEST_F(LexerTest, CharRangeOffByOne) {
+  std::vector<Token> toks = Lex(R"(#define MOO 1
+    void foo() { MOO; })");
+  const Token &moo = toks[5];
+
+  EXPECT_EQ(getSourceText(moo, moo), "MOO");
+
+  SourceRange R{moo.getLocation(), moo.getLocation()};
+
+  EXPECT_TRUE(
+      Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
+  EXPECT_TRUE(
+      Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
+
+  CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
+
+  EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
+}
+
 } // anonymous namespace