[clang][Syntax] Handle macro arguments in spelledForExpanded

Reviewers: sammccall Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75446
2020-02-29 13:37:31 +01:00 · 2020-02-29 13:37:31 +01:00 · 9619c2cc9a
parent 14db82c929
commit 9619c2cc9a
3 changed files with 137 additions and 40 deletions
--- a/clang/include/clang/Tooling/Syntax/Tokens.h
+++ b/clang/include/clang/Tooling/Syntax/Tokens.h
@ -197,18 +197,20 @@ public:
  /// token range R.
  llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;

-  /// Find the subrange of spelled tokens that produced the corresponding \p
-  /// Expanded tokens.
+  /// Returns the subrange of spelled tokens corresponding to AST node spanning
+  /// \p Expanded. This is the text that should be replaced if a refactoring
+  /// were to rewrite the node. If \p Expanded is empty, the returned value is
+  /// llvm::None.
  ///
-  /// EXPECTS: \p Expanded is a subrange of expandedTokens().
-  ///
-  /// Will fail if the expanded tokens do not correspond to a
-  /// sequence of spelled tokens. E.g. for the following example:
+  /// Will fail if the expanded tokens do not correspond to a sequence of
+  /// spelled tokens. E.g. for the following example:
  ///
  ///   #define FIRST f1 f2 f3
  ///   #define SECOND s1 s2 s3
+  ///   #define ID2(X, Y) X Y
  ///
  ///   a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c
+  ///   d ID2(e f g, h) i  // expanded tokens are: d e f g h i
  ///
  /// the results would be:
  ///   expanded   => spelled
@ -218,8 +220,10 @@ public:
  ///   a f1 f2 f3 => a FIRST
  ///         a f1 => can't map
  ///        s1 s2 => can't map
+  ///         e f  => e f
+  ///         g h  => can't map
  ///
-  /// If \p Expanded is empty, the returned value is llvm::None.
+  /// EXPECTS: \p Expanded is a subrange of expandedTokens().
  /// Complexity is logarithmic.
  llvm::Optional<llvm::ArrayRef<syntax::Token>>
  spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@ -35,6 +35,69 @@
 using namespace clang;
 using namespace clang::syntax;

+namespace {
+// Finds the smallest consecutive subsuquence of Toks that covers R.
+llvm::ArrayRef<syntax::Token>
+getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
+                  const SourceManager &SM) {
+  if (R.isInvalid())
+    return {};
+  const syntax::Token *Begin =
+      llvm::partition_point(Toks, [&](const syntax::Token &T) {
+        return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
+      });
+  const syntax::Token *End =
+      llvm::partition_point(Toks, [&](const syntax::Token &T) {
+        return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
+      });
+  if (Begin > End)
+    return {};
+  return {Begin, End};
+}
+
+// Finds the smallest expansion range that contains expanded tokens First and
+// Last, e.g.:
+// #define ID(x) x
+// ID(ID(ID(a1) a2))
+//          ~~       -> a1
+//              ~~   -> a2
+//       ~~~~~~~~~   -> a1 a2
+SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
+                                        const syntax::Token &Last,
+                                        const SourceManager &SM) {
+  SourceRange Res;
+  auto FirstLoc = First.location(), LastLoc = Last.location();
+  // Keep traversing up the spelling chain as longs as tokens are part of the
+  // same expansion.
+  while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
+    auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
+    auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
+    // Stop if expansions have diverged.
+    if (ExpInfoFirst.getExpansionLocStart() !=
+        ExpInfoLast.getExpansionLocStart())
+      break;
+    // Do not continue into macro bodies.
+    if (!ExpInfoFirst.isMacroArgExpansion() ||
+        !ExpInfoLast.isMacroArgExpansion())
+      break;
+    FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
+    LastLoc = SM.getImmediateSpellingLoc(LastLoc);
+    // Update the result afterwards, as we want the tokens that triggered the
+    // expansion.
+    Res = {FirstLoc, LastLoc};
+  }
+  // Normally mapping back to expansion location here only changes FileID, as
+  // we've already found some tokens expanded from the same macro argument, and
+  // they should map to a consecutive subset of spelled tokens. Unfortunately
+  // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
+  // based on their FileID in addition to offsets. So even though we are
+  // referring to same tokens, SourceManager might tell us that one is before
+  // the other if they've got different FileIDs.
+  return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
+}
+
+} // namespace
+
 syntax::Token::Token(SourceLocation Location, unsigned Length,
                     tok::TokenKind Kind)
    : Location(Location), Length(Length), Kind(Kind) {
@ -121,19 +184,7 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const {
 }

 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
-  if (R.isInvalid())
-    return {};
-  const Token *Begin =
-      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
-        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
-      });
-  const Token *End =
-      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
-        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
-      });
-  if (Begin > End)
-    return {};
-  return {Begin, End};
+  return getTokensCovering(expandedTokens(), R, *SourceMgr);
 }

 CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
@ -206,8 +257,6 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
  if (Expanded.empty())
    return llvm::None;

-  // FIXME: also allow changes uniquely mapping to macro arguments.
-
  const syntax::Token *BeginSpelled;
  const Mapping *BeginMapping;
  std::tie(BeginSpelled, BeginMapping) =
@ -225,12 +274,28 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {

  const MarkedFile &File = Files.find(FID)->second;

-  // Do not allow changes that cross macro expansion boundaries.
+  // If both tokens are coming from a macro argument expansion, try and map to
+  // smallest part of the macro argument. BeginMapping && LastMapping check is
+  // only for performance, they are a prerequisite for Expanded.front() and
+  // Expanded.back() being part of a macro arg expansion.
+  if (BeginMapping && LastMapping &&
+      SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
+      SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
+    auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
+                                                   Expanded.back(), *SourceMgr);
+    // It might be the case that tokens are arguments of different macro calls,
+    // in that case we should continue with the logic below instead of returning
+    // an empty range.
+    if (CommonRange.isValid())
+      return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
+  }
+
+  // Do not allow changes that doesn't cover full expansion.
  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
-  if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
+  if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
    return llvm::None;
-  if (LastMapping && EndExpanded < LastMapping->EndExpanded)
+  if (LastMapping && LastMapping->EndExpanded != EndExpanded)
    return llvm::None;
  // All is good, return the result.
  return llvm::makeArrayRef(
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@ -627,6 +627,7 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {

    A split B
  )cpp");
+  // Ranges going across expansion boundaries.
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
              ValueIs(SameRange(findSpelled("A split B"))));
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
@ -647,22 +648,28 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
    ID(ID(ID(a1) a2 a3)) split ID(B)
  )cpp");

-  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
-              ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
-              ValueIs(SameRange(findSpelled("ID ( B )"))));
+              ValueIs(SameRange(findSpelled("( B").drop_front())));
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
              ValueIs(SameRange(findSpelled(
                  "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
-  // Ranges crossing macro call boundaries.
-  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
-            llvm::None);
-  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
-            llvm::None);
-  // FIXME: next two examples should map to macro arguments, but currently they
-  //        fail.
-  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
-  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+  // Mixed ranges with expanded and spelled tokens.
+  EXPECT_THAT(
+      Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
+      ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
+              ValueIs(SameRange(findSpelled("split ID ( B )"))));
+  // Macro arguments
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
+              ValueIs(SameRange(findSpelled("a1"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
+              ValueIs(SameRange(findSpelled("a2"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
+              ValueIs(SameRange(findSpelled("a3"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
+              ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+              ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));

  // Empty macro expansions.
  recordTokens(R"cpp(
@ -674,11 +681,11 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
    ID(7 8 9) EMPTY EMPTY
  )cpp");
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
-              ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
+              ValueIs(SameRange(findSpelled("1 2 3"))));
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
-              ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
+              ValueIs(SameRange(findSpelled("4 5 6"))));
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
-              ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
+              ValueIs(SameRange(findSpelled("7 8 9"))));

  // Empty mappings coming from various directives.
  recordTokens(R"cpp(
@ -689,6 +696,27 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
  )cpp");
  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
              ValueIs(SameRange(findSpelled("not_mapped"))));
+
+  // Multiple macro arguments
+  recordTokens(R"cpp(
+    #define ID(X) X
+    #define ID2(X, Y) X Y
+
+    ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
+  )cpp");
+  // Should fail, spans multiple arguments.
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
+              ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
+  EXPECT_THAT(
+      Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+      ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
+              ValueIs(SameRange(findSpelled("a5 a6"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
+              ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
+  // Should fail, spans multiple invocations.
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None);
 }

 TEST_F(TokenBufferTest, ExpandedTokensForRange) {