[clang-format] Fix misformatted macro definitions after D86959

After D86959 the code `#define lambda [](const decltype(x) &ptr) {}` was formatted as `#define lambda [](const decltype(x) & ptr) {}` due to now parsing the '&' token as a BinaryOperator. The problem was caused by the condition `Line.InPPDirective && (!Left->Previous || !Left->Previous->is(tok::identifier))) {` being matched and therefore not performing the checks for "previous token is one of decltype/_Atomic/etc.". This patch moves those checks after the existing if/else chain to ensure the left-parent token classification is always run after checking whether the contents of the parens is an expression or not. This change also introduces a new TokenAnnotatorTest that checks the token kind and Role of Tokens after analyzing them. This is used to check for TT_PointerOrReference, in addition to indirectly testing this based on the resulting formatting. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D88956
2020-10-27 10:05:55 +00:00 · 2020-10-27 10:05:55 +00:00 · 850325348a
parent d323c8f791
commit 850325348a
5 changed files with 133 additions and 20 deletions
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@ -256,16 +256,6 @@ private:
    } else if (Contexts[Contexts.size() - 2].CaretFound) {
      // This is the parameter list of an ObjC block.
      Contexts.back().IsExpression = false;
-    } else if (PrevNonComment && PrevNonComment->is(tok::kw___attribute)) {
-      Left->setType(TT_AttributeParen);
-    } else if (PrevNonComment &&
-               PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
-                                       tok::kw_typeof, tok::kw__Atomic,
-                                       tok::kw___underlying_type)) {
-      Left->setType(TT_TypeDeclarationParen);
-      // decltype() and typeof() usually contain expressions.
-      if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
-        Contexts.back().IsExpression = true;
    } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
      // The first argument to a foreach macro is a declaration.
      Contexts.back().IsForEachMacro = true;
@ -279,6 +269,21 @@ private:
      Contexts.back().IsExpression = !IsForOrCatch;
    }

+    // Infer the role of the l_paren based on the previous token if we haven't
+    // detected one one yet.
+    if (PrevNonComment && Left->is(TT_Unknown)) {
+      if (PrevNonComment->is(tok::kw___attribute)) {
+        Left->setType(TT_AttributeParen);
+      } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
+                                         tok::kw_typeof, tok::kw__Atomic,
+                                         tok::kw___underlying_type)) {
+        Left->setType(TT_TypeDeclarationParen);
+        // decltype() and typeof() usually contain expressions.
+        if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
+          Contexts.back().IsExpression = true;
+      }
+    }
+
    if (StartsObjCMethodExpr) {
      Contexts.back().ColonIsObjCMethodExpr = true;
      Left->setType(TT_ObjCMethodExpr);
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@ -21,6 +21,7 @@ add_clang_unittest(FormatTests
  SortImportsTestJava.cpp
  SortIncludesTest.cpp
  UsingDeclarationsSorterTest.cpp
+  TokenAnnotatorTest.cpp
  )

 clang_target_link_libraries(FormatTests
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@ -8197,6 +8197,12 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
  verifyFormat("void f() { MACRO(A * B); }");
  verifyFormat("void f() { MACRO(A & B); }");

+  // This lambda was mis-formatted after D88956 (treating it as a binop):
+  verifyFormat("auto x = [](const decltype(x) &ptr) {};");
+  verifyFormat("auto x = [](const decltype(x) *ptr) {};");
+  verifyFormat("#define lambda [](const decltype(x) &ptr) {}");
+  verifyFormat("#define lambda [](const decltype(x) *ptr) {}");
+
  verifyFormat("DatumHandle const *operator->() const { return input_; }");
  verifyFormat("return options != nullptr && operator==(*options);");

--- a/clang/unittests/Format/TestLexer.h
+++ b/clang/unittests/Format/TestLexer.h
@ -16,6 +16,9 @@
 #define CLANG_UNITTESTS_FORMAT_TESTLEXER_H

 #include "../../lib/Format/FormatTokenLexer.h"
+#include "../../lib/Format/TokenAnalyzer.h"
+#include "../../lib/Format/TokenAnnotator.h"
+#include "../../lib/Format/UnwrappedLineParser.h"

 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
@ -29,7 +32,8 @@ namespace format {
 typedef llvm::SmallVector<FormatToken *, 8> TokenList;

 inline std::ostream &operator<<(std::ostream &Stream, const FormatToken &Tok) {
-  Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\")";
+  Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\" , "
+         << getTokenTypeName(Tok.getType()) << ")";
  return Stream;
 }
 inline std::ostream &operator<<(std::ostream &Stream, const TokenList &Tokens) {
@ -37,7 +41,7 @@ inline std::ostream &operator<<(std::ostream &Stream, const TokenList &Tokens) {
  for (size_t I = 0, E = Tokens.size(); I != E; ++I) {
    Stream << (I > 0 ? ", " : "") << *Tokens[I];
  }
-  Stream << "}";
+  Stream << "} (" << Tokens.size() << " tokens)";
  return Stream;
 }

@ -53,35 +57,62 @@ inline std::string text(llvm::ArrayRef<FormatToken *> Tokens) {
                         });
 }

-class TestLexer {
+class TestLexer : public UnwrappedLineConsumer {
 public:
  TestLexer(FormatStyle Style = getLLVMStyle())
      : Style(Style), SourceMgr("test.cpp", ""),
        IdentTable(getFormattingLangOpts(Style)) {}

  TokenList lex(llvm::StringRef Code) {
-    Buffers.push_back(
-        llvm::MemoryBuffer::getMemBufferCopy(Code, "<scratch space>"));
-    clang::FileID FID =
-        SourceMgr.get().createFileID(Buffers.back()->getMemBufferRef());
-    FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator,
-                         IdentTable);
-    auto Result = Lex.lex();
+    auto Result = getNewLexer(Code).lex();
    return TokenList(Result.begin(), Result.end());
  }

+  TokenList annotate(llvm::StringRef Code) {
+    FormatTokenLexer Lex = getNewLexer(Code);
+    auto Tokens = Lex.lex();
+    UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this);
+    Parser.parse();
+    TokenAnnotator Annotator(Style, Lex.getKeywords());
+    for (auto &Line : UnwrappedLines) {
+      AnnotatedLine Annotated(Line);
+      Annotator.annotate(Annotated);
+    }
+    UnwrappedLines.clear();
+    return TokenList(Tokens.begin(), Tokens.end());
+  }
+
  FormatToken *id(llvm::StringRef Code) {
    auto Result = uneof(lex(Code));
    assert(Result.size() == 1U && "Code must expand to 1 token.");
    return Result[0];
  }

+protected:
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
+    UnwrappedLines.push_back(TheLine);
+  }
+  void finishRun() override {}
+
+  FormatTokenLexer getNewLexer(StringRef Code) {
+    Buffers.push_back(
+        llvm::MemoryBuffer::getMemBufferCopy(Code, "<scratch space>"));
+    clang::FileID FID =
+        SourceMgr.get().createFileID(Buffers.back()->getMemBufferRef());
+    FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator,
+                         IdentTable);
+    return FormatTokenLexer(SourceMgr.get(), FID, 0, Style, Encoding, Allocator,
+                            IdentTable);
+  }
+
+public:
  FormatStyle Style;
  encoding::Encoding Encoding = encoding::Encoding_UTF8;
  std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
  clang::SourceManagerForFile SourceMgr;
  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
  IdentifierTable IdentTable;
+  SmallVector<UnwrappedLine, 16> UnwrappedLines;
 };

 } // namespace format
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@ -0,0 +1,70 @@
+//===- unittest/Format/TokenAnnotatorTest.cpp - Formatting unit tests -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "FormatTestUtils.h"
+#include "TestLexer.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class TokenAnnotatorTest : public ::testing::Test {
+protected:
+  TokenList annotate(llvm::StringRef Code,
+                     const FormatStyle &Style = getLLVMStyle()) {
+    return TestLexer(Style).annotate(Code);
+  }
+};
+
+#define EXPECT_TOKEN_KIND(FormatTok, Kind)                                     \
+  EXPECT_EQ((FormatTok)->Tok.getKind(), Kind) << *(FormatTok)
+#define EXPECT_TOKEN_TYPE(FormatTok, Type)                                     \
+  EXPECT_EQ((FormatTok)->getType(), Type) << *(FormatTok)
+#define EXPECT_TOKEN(FormatTok, Kind, Type)                                    \
+  do {                                                                         \
+    EXPECT_TOKEN_KIND(FormatTok, Kind);                                        \
+    EXPECT_TOKEN_TYPE(FormatTok, Type);                                        \
+  } while (false);
+
+TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmpInMacroDefinition) {
+  // This is a regression test for mis-parsing the & after decltype as a binary
+  // operator instead of a reference (when inside a macro definition).
+  auto Tokens = annotate("auto x = [](const decltype(x) &ptr) {};");
+  EXPECT_EQ(Tokens.size(), 18u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_decltype, TT_Unknown);
+  EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[9], tok::identifier, TT_Unknown);
+  EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[11], tok::amp, TT_PointerOrReference);
+  // Same again with * instead of &:
+  Tokens = annotate("auto x = [](const decltype(x) *ptr) {};");
+  EXPECT_EQ(Tokens.size(), 18u) << Tokens;
+  EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference);
+
+  // Also check that we parse correctly within a macro definition:
+  Tokens = annotate("#define lambda [](const decltype(x) &ptr) {}");
+  EXPECT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_decltype, TT_Unknown);
+  EXPECT_TOKEN(Tokens[8], tok::l_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[9], tok::identifier, TT_Unknown);
+  EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[11], tok::amp, TT_PointerOrReference);
+  // Same again with * instead of &:
+  Tokens = annotate("#define lambda [](const decltype(x) *ptr) {}");
+  EXPECT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference);
+}
+
+} // namespace
+} // namespace format
+} // namespace clang