[syntax] Introduce a TokenManager interface.

TokenManager defines Token interfaces for the clang syntax-tree. This is the level
of abstraction that the syntax-tree should use to operate on Tokens.

It decouples the syntax-tree from a particular token implementation (TokenBuffer
previously).  This enables us to use a different underlying token implementation
for the syntax Leaf node -- in clang pseudoparser, we want to produce a
syntax-tree with its own pseudo::Token rather than syntax::Token.

Differential Revision: https://reviews.llvm.org/D128411
This commit is contained in:
Haojian Wu 2022-07-07 14:44:27 +02:00
parent 51b9e099d5
commit 263dcf452f
22 changed files with 377 additions and 239 deletions

View File

@ -16,6 +16,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Syntax/BuildTree.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Casting.h"
@ -52,8 +53,9 @@ llvm::Optional<FoldingRange> toFoldingRange(SourceRange SR,
return Range;
}
llvm::Optional<FoldingRange> extractFoldingRange(const syntax::Node *Node,
const SourceManager &SM) {
llvm::Optional<FoldingRange>
extractFoldingRange(const syntax::Node *Node,
const syntax::TokenBufferTokenManager &TM) {
if (const auto *Stmt = dyn_cast<syntax::CompoundStatement>(Node)) {
const auto *LBrace = cast_or_null<syntax::Leaf>(
Stmt->findChild(syntax::NodeRole::OpenParen));
@ -65,9 +67,12 @@ llvm::Optional<FoldingRange> extractFoldingRange(const syntax::Node *Node,
if (!LBrace || !RBrace)
return llvm::None;
// Fold the entire range within braces, including whitespace.
const SourceLocation LBraceLocInfo = LBrace->getToken()->endLocation(),
RBraceLocInfo = RBrace->getToken()->location();
auto Range = toFoldingRange(SourceRange(LBraceLocInfo, RBraceLocInfo), SM);
const SourceLocation LBraceLocInfo =
TM.getToken(LBrace->getTokenKey())->endLocation(),
RBraceLocInfo =
TM.getToken(RBrace->getTokenKey())->location();
auto Range = toFoldingRange(SourceRange(LBraceLocInfo, RBraceLocInfo),
TM.sourceManager());
// Do not generate folding range for compound statements without any
// nodes and newlines.
if (Range && Range->startLine != Range->endLine)
@ -77,15 +82,16 @@ llvm::Optional<FoldingRange> extractFoldingRange(const syntax::Node *Node,
}
// Traverse the tree and collect folding ranges along the way.
std::vector<FoldingRange> collectFoldingRanges(const syntax::Node *Root,
const SourceManager &SM) {
std::vector<FoldingRange>
collectFoldingRanges(const syntax::Node *Root,
const syntax::TokenBufferTokenManager &TM) {
std::queue<const syntax::Node *> Nodes;
Nodes.push(Root);
std::vector<FoldingRange> Result;
while (!Nodes.empty()) {
const syntax::Node *Node = Nodes.front();
Nodes.pop();
const auto Range = extractFoldingRange(Node, SM);
const auto Range = extractFoldingRange(Node, TM);
if (Range)
Result.push_back(*Range);
if (const auto *T = dyn_cast<syntax::Tree>(Node))
@ -157,9 +163,11 @@ llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos) {
// control flow statement bodies).
// Related issue: https://github.com/clangd/clangd/issues/310
llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), AST.getTokens());
const auto *SyntaxTree = syntax::buildSyntaxTree(A, AST.getASTContext());
return collectFoldingRanges(SyntaxTree, AST.getSourceManager());
syntax::Arena A;
syntax::TokenBufferTokenManager TM(AST.getTokens(), AST.getLangOpts(),
AST.getSourceManager());
const auto *SyntaxTree = syntax::buildSyntaxTree(A, TM, AST.getASTContext());
return collectFoldingRanges(SyntaxTree, TM);
}
} // namespace clangd

View File

@ -13,6 +13,7 @@
#include "clang/AST/Decl.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tree.h"
namespace clang {
@ -21,19 +22,21 @@ namespace syntax {
/// Build a syntax tree for the main file.
/// This usually covers the whole TranslationUnitDecl, but can be restricted by
/// the ASTContext's traversal scope.
syntax::TranslationUnit *buildSyntaxTree(Arena &A, ASTContext &Context);
syntax::TranslationUnit *
buildSyntaxTree(Arena &A, TokenBufferTokenManager &TBTM, ASTContext &Context);
// Create syntax trees from subtrees not backed by the source code.
// Synthesis of Leafs
/// Create `Leaf` from token with `Spelling` and assert it has the desired
/// `TokenKind`.
syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K,
StringRef Spelling);
syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM,
tok::TokenKind K, StringRef Spelling);
/// Infer the token spelling from its `TokenKind`, then create `Leaf` from
/// this token
syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K);
syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM,
tok::TokenKind K);
// Synthesis of Trees
/// Creates the concrete syntax node according to the specified `NodeKind` `K`.
@ -44,7 +47,8 @@ createTree(syntax::Arena &A,
syntax::NodeKind K);
// Synthesis of Syntax Nodes
syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A);
syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A,
TokenBufferTokenManager &TBTM);
/// Creates a completely independent copy of `N` with its macros expanded.
///
@ -52,7 +56,9 @@ syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A);
/// * Detached, i.e. `Parent == NextSibling == nullptr` and
/// `Role == Detached`.
/// * Synthesized, i.e. `Original == false`.
syntax::Node *deepCopyExpandingMacros(syntax::Arena &A, const syntax::Node *N);
syntax::Node *deepCopyExpandingMacros(syntax::Arena &A,
TokenBufferTokenManager &TBTM,
const syntax::Node *N);
} // namespace syntax
} // namespace clang
#endif

View File

@ -13,6 +13,7 @@
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tree.h"
namespace clang {
@ -20,7 +21,7 @@ namespace syntax {
/// Computes textual replacements required to mimic the tree modifications made
/// to the syntax tree.
tooling::Replacements computeReplacements(const Arena &A,
tooling::Replacements computeReplacements(const TokenBufferTokenManager &TBTM,
const syntax::TranslationUnit &TU);
/// Removes a statement or replaces it with an empty statement where one is
@ -29,7 +30,8 @@ tooling::Replacements computeReplacements(const Arena &A,
/// One can remove `foo();` completely and to remove `bar();` we would need to
/// replace it with an empty statement.
/// EXPECTS: S->canModify() == true
void removeStatement(syntax::Arena &A, syntax::Statement *S);
void removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM,
syntax::Statement *S);
} // namespace syntax
} // namespace clang

View File

@ -21,9 +21,7 @@
#ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H
#define LLVM_CLANG_TOOLING_SYNTAX_NODES_H
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Basic/LLVM.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"

View File

@ -0,0 +1,70 @@
//===- TokenBufferTokenManager.h -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H
#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H
#include "clang/Tooling/Syntax/TokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
namespace clang {
namespace syntax {
/// A TokenBuffer-powered token manager.
/// It tracks the underlying token buffers, source manager, etc.
class TokenBufferTokenManager : public TokenManager {
public:
TokenBufferTokenManager(const TokenBuffer &Tokens,
const LangOptions &LangOpts, SourceManager &SourceMgr)
: Tokens(Tokens), LangOpts(LangOpts), SM(SourceMgr) {}
static bool classof(const TokenManager *N) { return N->kind() == Kind; }
llvm::StringLiteral kind() const override { return Kind; }
llvm::StringRef getText(Key I) const override {
const auto *Token = getToken(I);
assert(Token);
// Handle 'eof' separately, calling text() on it produces an empty string.
// FIXME: this special logic is for syntax::Leaf dump, move it when we
// have a direct way to retrive token kind in the syntax::Leaf.
if (Token->kind() == tok::eof)
return "<eof>";
return Token->text(SM);
}
const syntax::Token *getToken(Key I) const {
return reinterpret_cast<const syntax::Token *>(I);
}
SourceManager &sourceManager() { return SM; }
const SourceManager &sourceManager() const { return SM; }
const TokenBuffer &tokenBuffer() const { return Tokens; }
private:
// This manager is powered by the TokenBuffer.
static constexpr llvm::StringLiteral Kind = "TokenBuffer";
/// Add \p Buffer to the underlying source manager, tokenize it and store the
/// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
/// that were not written in user code.
std::pair<FileID, ArrayRef<Token>>
lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
friend class FactoryImpl;
const TokenBuffer &Tokens;
const LangOptions &LangOpts;
/// The underlying source manager for the ExtraTokens.
SourceManager &SM;
/// IDs and storage for additional tokenized files.
llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
};
} // namespace syntax
} // namespace clang
#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H

View File

@ -0,0 +1,45 @@
//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines Token interfaces for the clang syntax-tree. This is the level of
// abstraction that the syntax-tree uses to operate on Token.
//
// TokenManager decouples the syntax-tree from a particular token
// implementation. For example, a TokenBuffer captured from a clang parser may
// track macro expansions and associate tokens with clang's SourceManager, while
// a clang pseudoparser would use a flat array of raw-lexed tokens in memory.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
#include "llvm/ADT/StringRef.h"
#include <cstdint>
namespace clang {
namespace syntax {
/// Defines interfaces for operating "Token" in the clang syntax-tree.
class TokenManager {
public:
/// Describes what the exact class kind of the TokenManager is.
virtual llvm::StringLiteral kind() const = 0;
/// A key to identify a specific token. The token concept depends on the
/// underlying implementation -- it can be a spelled token from the original
/// source file or an expanded token.
/// The syntax-tree Leaf node holds a Key.
using Key = uintptr_t;
virtual llvm::StringRef getText(Key K) const = 0;
};
} // namespace syntax
} // namespace clang
#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H

View File

@ -33,6 +33,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Syntax/TokenManager.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"

View File

@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
// - leaf nodes correspond to a token in the expanded token stream,
// - leaf nodes correspond to tokens,
// - tree nodes correspond to language grammar constructs.
//
// The tree is initially built from an AST. Each node of a newly built tree
@ -21,11 +21,8 @@
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/TokenManager.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/iterator.h"
@ -36,33 +33,12 @@
namespace clang {
namespace syntax {
/// A memory arena for syntax trees. Also tracks the underlying token buffers,
/// source manager, etc.
/// A memory arena for syntax trees.
// FIXME: use BumpPtrAllocator directly.
class Arena {
public:
Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
const TokenBuffer &Tokens);
const SourceManager &getSourceManager() const { return SourceMgr; }
const LangOptions &getLangOptions() const { return LangOpts; }
const TokenBuffer &getTokenBuffer() const;
llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
private:
/// Add \p Buffer to the underlying source manager, tokenize it and store the
/// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
/// that were not written in user code.
std::pair<FileID, ArrayRef<Token>>
lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
friend class FactoryImpl;
private:
SourceManager &SourceMgr;
const LangOptions &LangOpts;
const TokenBuffer &Tokens;
/// IDs and storage for additional tokenized files.
llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
/// Keeps all the allocated nodes and their intermediate data structures.
llvm::BumpPtrAllocator Allocator;
};
@ -122,9 +98,9 @@ public:
Node *getPreviousSibling() { return PreviousSibling; }
/// Dumps the structure of a subtree. For debugging and testing purposes.
std::string dump(const SourceManager &SM) const;
std::string dump(const TokenManager &SM) const;
/// Dumps the tokens forming this subtree.
std::string dumpTokens(const SourceManager &SM) const;
std::string dumpTokens(const TokenManager &SM) const;
/// Asserts invariants on this node of the tree and its immediate children.
/// Will not recurse into the subtree. No-op if NDEBUG is set.
@ -153,16 +129,17 @@ private:
unsigned CanModify : 1;
};
/// A leaf node points to a single token inside the expanded token stream.
/// A leaf node points to a single token.
// FIXME: add TokenKind field (borrow some bits from the Node::kind).
class Leaf final : public Node {
public:
Leaf(const Token *T);
Leaf(TokenManager::Key K);
static bool classof(const Node *N);
const Token *getToken() const { return Tok; }
TokenManager::Key getTokenKey() const { return K; }
private:
const Token *Tok;
TokenManager::Key K;
};
/// A node that has children and represents a syntactic language construct.

View File

@ -27,6 +27,7 @@
#include "clang/Lex/Lexer.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/ArrayRef.h"
@ -365,21 +366,24 @@ private:
/// Call finalize() to finish building the tree and consume the root node.
class syntax::TreeBuilder {
public:
TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
for (const auto &T : Arena.getTokenBuffer().expandedTokens())
TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM)
: Arena(Arena),
TBTM(TBTM),
Pending(Arena, TBTM.tokenBuffer()) {
for (const auto &T : TBTM.tokenBuffer().expandedTokens())
LocationToToken.insert({T.location(), &T});
}
llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); }
const SourceManager &sourceManager() const {
return Arena.getSourceManager();
return TBTM.sourceManager();
}
/// Populate children for \p New node, assuming it covers tokens from \p
/// Range.
void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) {
assert(New);
Pending.foldChildren(Arena, Range, New);
Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
if (From)
Mapping.add(From, New);
}
@ -392,7 +396,7 @@ public:
void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
NestedNameSpecifierLoc From) {
assert(New);
Pending.foldChildren(Arena, Range, New);
Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
if (From)
Mapping.add(From, New);
}
@ -403,7 +407,7 @@ public:
ASTPtr From) {
assert(New);
auto ListRange = Pending.shrinkToFitList(SuperRange);
Pending.foldChildren(Arena, ListRange, New);
Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New);
if (From)
Mapping.add(From, New);
}
@ -434,12 +438,12 @@ public:
/// Finish building the tree and consume the root node.
syntax::TranslationUnit *finalize() && {
auto Tokens = Arena.getTokenBuffer().expandedTokens();
auto Tokens = TBTM.tokenBuffer().expandedTokens();
assert(!Tokens.empty());
assert(Tokens.back().kind() == tok::eof);
// Build the root of the tree, consuming all the children.
Pending.foldChildren(Arena, Tokens.drop_back(),
Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(),
new (Arena.getAllocator()) syntax::TranslationUnit);
auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
@ -464,7 +468,7 @@ public:
assert(First.isValid());
assert(Last.isValid());
assert(First == Last ||
Arena.getSourceManager().isBeforeInTranslationUnit(First, Last));
TBTM.sourceManager().isBeforeInTranslationUnit(First, Last));
return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
}
@ -564,15 +568,16 @@ private:
///
/// Ensures that added nodes properly nest and cover the whole token stream.
struct Forest {
Forest(syntax::Arena &A) {
assert(!A.getTokenBuffer().expandedTokens().empty());
assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof);
Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) {
assert(!TB.expandedTokens().empty());
assert(TB.expandedTokens().back().kind() == tok::eof);
// Create all leaf nodes.
// Note that we do not have 'eof' in the tree.
for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) {
auto *L = new (A.getAllocator()) syntax::Leaf(&T);
for (const auto &T : TB.expandedTokens().drop_back()) {
auto *L = new (A.getAllocator())
syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T));
L->Original = true;
L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value();
L->CanModify = TB.spelledForExpanded(T).has_value();
Trees.insert(Trees.end(), {&T, L});
}
}
@ -620,8 +625,8 @@ private:
}
/// Add \p Node to the forest and attach child nodes based on \p Tokens.
void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens,
syntax::Tree *Node) {
void foldChildren(const syntax::TokenBuffer &TB,
ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) {
// Attach children to `Node`.
assert(Node->getFirstChild() == nullptr && "node already has children");
@ -646,7 +651,7 @@ private:
// Mark that this node came from the AST and is backed by the source code.
Node->Original = true;
Node->CanModify =
A.getTokenBuffer().spelledForExpanded(Tokens).has_value();
TB.spelledForExpanded(Tokens).has_value();
Trees.erase(BeginChildren, EndChildren);
Trees.insert({FirstToken, Node});
@ -660,18 +665,18 @@ private:
return Root;
}
std::string str(const syntax::Arena &A) const {
std::string str(const syntax::TokenBufferTokenManager &STM) const {
std::string R;
for (auto It = Trees.begin(); It != Trees.end(); ++It) {
unsigned CoveredTokens =
It != Trees.end()
? (std::next(It)->first - It->first)
: A.getTokenBuffer().expandedTokens().end() - It->first;
: STM.tokenBuffer().expandedTokens().end() - It->first;
R += std::string(
formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(),
It->first->text(A.getSourceManager()), CoveredTokens));
R += It->second->dump(A.getSourceManager());
It->first->text(STM.sourceManager()), CoveredTokens));
R += It->second->dump(STM);
}
return R;
}
@ -684,9 +689,10 @@ private:
};
/// For debugging purposes.
std::string str() { return Pending.str(Arena); }
std::string str() { return Pending.str(TBTM); }
syntax::Arena &Arena;
TokenBufferTokenManager& TBTM;
/// To quickly find tokens by their start location.
llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken;
Forest Pending;
@ -1718,7 +1724,7 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
markExprChild(ChildExpr, NodeRole::Expression);
ChildNode = new (allocator()) syntax::ExpressionStatement;
// (!) 'getStmtRange()' ensures this covers a trailing semicolon.
Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode);
} else {
ChildNode = Mapping.find(Child);
}
@ -1745,8 +1751,9 @@ const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
}
syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A,
TokenBufferTokenManager& TBTM,
ASTContext &Context) {
TreeBuilder Builder(A);
TreeBuilder Builder(A, TBTM);
BuildTreeVisitor(Context, Builder).TraverseAST(Context);
return std::move(Builder).finalize();
}

View File

@ -5,6 +5,7 @@ add_clang_library(clangToolingSyntax
ComputeReplacements.cpp
Nodes.cpp
Mutations.cpp
TokenBufferTokenManager.cpp
Synthesis.cpp
Tokens.cpp
Tree.cpp

View File

@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Syntax/Mutations.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/Support/Error.h"
using namespace clang;
@ -16,10 +18,13 @@ namespace {
using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>,
bool /*IsOriginal*/)>;
/// Enumerates spans of tokens from the tree consecutively laid out in memory.
void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
void enumerateTokenSpans(const syntax::Tree *Root,
const syntax::TokenBufferTokenManager &STM,
ProcessTokensFn Callback) {
struct Enumerator {
Enumerator(ProcessTokensFn Callback)
: SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
Enumerator(const syntax::TokenBufferTokenManager &STM,
ProcessTokensFn Callback)
: STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
Callback(Callback) {}
void run(const syntax::Tree *Root) {
@ -39,7 +44,8 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
}
auto *L = cast<syntax::Leaf>(N);
if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) {
if (SpanEnd == STM.getToken(L->getTokenKey()) &&
SpanIsOriginal == L->isOriginal()) {
// Extend the current span.
++SpanEnd;
return;
@ -48,24 +54,25 @@ void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
if (SpanBegin)
Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
// Start recording a new span.
SpanBegin = L->getToken();
SpanBegin = STM.getToken(L->getTokenKey());
SpanEnd = SpanBegin + 1;
SpanIsOriginal = L->isOriginal();
}
const syntax::TokenBufferTokenManager &STM;
const syntax::Token *SpanBegin;
const syntax::Token *SpanEnd;
bool SpanIsOriginal;
ProcessTokensFn Callback;
};
return Enumerator(Callback).run(Root);
return Enumerator(STM, Callback).run(Root);
}
syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
syntax::FileRange rangeOfExpanded(const syntax::TokenBufferTokenManager &STM,
llvm::ArrayRef<syntax::Token> Expanded) {
const auto &Buffer = A.getTokenBuffer();
const auto &SM = A.getSourceManager();
const auto &Buffer = STM.tokenBuffer();
const auto &SM = STM.sourceManager();
// Check that \p Expanded actually points into expanded tokens.
assert(Buffer.expandedTokens().begin() <= Expanded.begin());
@ -83,10 +90,10 @@ syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
} // namespace
tooling::Replacements
syntax::computeReplacements(const syntax::Arena &A,
syntax::computeReplacements(const TokenBufferTokenManager &TBTM,
const syntax::TranslationUnit &TU) {
const auto &Buffer = A.getTokenBuffer();
const auto &SM = A.getSourceManager();
const auto &Buffer = TBTM.tokenBuffer();
const auto &SM = TBTM.sourceManager();
tooling::Replacements Replacements;
// Text inserted by the replacement we are building now.
@ -95,13 +102,13 @@ syntax::computeReplacements(const syntax::Arena &A,
if (ReplacedRange.empty() && Replacement.empty())
return;
llvm::cantFail(Replacements.add(tooling::Replacement(
SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement)));
SM, rangeOfExpanded(TBTM, ReplacedRange).toCharRange(SM),
Replacement)));
Replacement = "";
};
const syntax::Token *NextOriginal = Buffer.expandedTokens().begin();
enumerateTokenSpans(
&TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
&TU, TBTM, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
if (!IsOriginal) {
Replacement +=
syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM);

View File

@ -77,7 +77,8 @@ public:
}
};
void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) {
void syntax::removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM,
syntax::Statement *S) {
assert(S);
assert(S->canModify());
@ -90,5 +91,5 @@ void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) {
if (isa<EmptyStatement>(S))
return; // already an empty statement, nothing to do.
MutationsImpl::replace(S, createEmptyStatement(A));
MutationsImpl::replace(S, createEmptyStatement(A, TBTM));
}

View File

@ -8,6 +8,8 @@
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/BuildTree.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
using namespace clang;
@ -27,35 +29,40 @@ public:
}
static std::pair<FileID, ArrayRef<Token>>
lexBuffer(syntax::Arena &A, std::unique_ptr<llvm::MemoryBuffer> Buffer) {
return A.lexBuffer(std::move(Buffer));
lexBuffer(TokenBufferTokenManager &TBTM,
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
return TBTM.lexBuffer(std::move(Buffer));
}
};
// FIXME: `createLeaf` is based on `syntax::tokenize` internally, as such it
// doesn't support digraphs or line continuations.
syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K,
StringRef Spelling) {
syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A,
TokenBufferTokenManager &TBTM,
tok::TokenKind K, StringRef Spelling) {
auto Tokens =
FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBufferCopy(Spelling))
FactoryImpl::lexBuffer(TBTM, llvm::MemoryBuffer::getMemBufferCopy(Spelling))
.second;
assert(Tokens.size() == 1);
assert(Tokens.front().kind() == K &&
"spelling is not lexed into the expected kind of token");
auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin());
auto *Leaf = new (A.getAllocator()) syntax::Leaf(
reinterpret_cast<TokenManager::Key>(Tokens.begin()));
syntax::FactoryImpl::setCanModify(Leaf);
Leaf->assertInvariants();
return Leaf;
}
syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K) {
syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A,
TokenBufferTokenManager &TBTM,
tok::TokenKind K) {
const auto *Spelling = tok::getPunctuatorSpelling(K);
if (!Spelling)
Spelling = tok::getKeywordSpelling(K);
assert(Spelling &&
"Cannot infer the spelling of the token from its token kind.");
return createLeaf(A, K, Spelling);
return createLeaf(A, TBTM, K, Spelling);
}
namespace {
@ -208,24 +215,25 @@ syntax::Tree *clang::syntax::createTree(
}
syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A,
TokenBufferTokenManager &TBTM,
const syntax::Node *N) {
if (const auto *L = dyn_cast<syntax::Leaf>(N))
// `L->getToken()` gives us the expanded token, thus we implicitly expand
// any macros here.
return createLeaf(A, L->getToken()->kind(),
L->getToken()->text(A.getSourceManager()));
return createLeaf(A, TBTM, TBTM.getToken(L->getTokenKey())->kind(),
TBTM.getText(L->getTokenKey()));
const auto *T = cast<syntax::Tree>(N);
std::vector<std::pair<syntax::Node *, syntax::NodeRole>> Children;
for (const auto *Child = T->getFirstChild(); Child;
Child = Child->getNextSibling())
Children.push_back({deepCopyExpandingMacros(A, Child), Child->getRole()});
Children.push_back({deepCopyExpandingMacros(A, TBTM, Child), Child->getRole()});
return createTree(A, Children, N->getKind());
}
syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A) {
syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM) {
return cast<EmptyStatement>(
createTree(A, {{createLeaf(A, tok::semi), NodeRole::Unknown}},
createTree(A, {{createLeaf(A, TBTM, tok::semi), NodeRole::Unknown}},
NodeKind::EmptyStatement));
}

View File

@ -0,0 +1,25 @@
//===- TokenBufferTokenManager.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
namespace clang {
namespace syntax {
constexpr llvm::StringLiteral syntax::TokenBufferTokenManager::Kind;
std::pair<FileID, ArrayRef<syntax::Token>>
syntax::TokenBufferTokenManager::lexBuffer(
std::unique_ptr<llvm::MemoryBuffer> Input) {
auto FID = SM.createFileID(std::move(Input));
auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM, LangOpts));
assert(It.second && "duplicate FileID");
return {FID, It.first->second};
}
} // namespace syntax
} // namespace clang

View File

@ -33,25 +33,7 @@ static void traverse(syntax::Node *N,
}
} // namespace
syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
const TokenBuffer &Tokens)
: SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
return Tokens;
}
std::pair<FileID, ArrayRef<syntax::Token>>
syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
auto FID = SourceMgr.createFileID(std::move(Input));
auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
assert(It.second && "duplicate FileID");
return {FID, It.first->second};
}
syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
assert(Tok != nullptr);
}
syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {}
syntax::Node::Node(NodeKind Kind)
: Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
@ -190,20 +172,8 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
}
namespace {
static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
const SourceManager &SM) {
assert(L);
const auto *Token = L->getToken();
assert(Token);
// Handle 'eof' separately, calling text() on it produces an empty string.
if (Token->kind() == tok::eof)
OS << "<eof>";
else
OS << Token->text(SM);
}
static void dumpNode(raw_ostream &OS, const syntax::Node *N,
const SourceManager &SM, llvm::BitVector IndentMask) {
const syntax::TokenManager &TM, llvm::BitVector IndentMask) {
auto DumpExtraInfo = [&OS](const syntax::Node *N) {
if (N->getRole() != syntax::NodeRole::Unknown)
OS << " " << N->getRole();
@ -216,7 +186,7 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N,
assert(N);
if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
OS << "'";
dumpLeaf(OS, L, SM);
OS << TM.getText(L->getTokenKey());
OS << "'";
DumpExtraInfo(N);
OS << "\n";
@ -242,25 +212,25 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N,
OS << "|-";
IndentMask.push_back(true);
}
dumpNode(OS, &It, SM, IndentMask);
dumpNode(OS, &It, TM, IndentMask);
IndentMask.pop_back();
}
}
} // namespace
std::string syntax::Node::dump(const SourceManager &SM) const {
std::string syntax::Node::dump(const TokenManager &TM) const {
std::string Str;
llvm::raw_string_ostream OS(Str);
dumpNode(OS, this, SM, /*IndentMask=*/{});
dumpNode(OS, this, TM, /*IndentMask=*/{});
return std::move(OS.str());
}
std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
std::string syntax::Node::dumpTokens(const TokenManager &TM) const {
std::string Storage;
llvm::raw_string_ostream OS(Storage);
traverse(this, [&](const syntax::Node *N) {
if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
dumpLeaf(OS, L, SM);
OS << TM.getText(L->getTokenKey());
OS << " ";
}
});
@ -297,7 +267,8 @@ void syntax::Node::assertInvariants() const {
C.getRole() == NodeRole::ListDelimiter);
if (C.getRole() == NodeRole::ListDelimiter) {
assert(isa<Leaf>(C));
assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
// FIXME: re-enable it when there is way to retrieve token kind in Leaf.
// assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
}
}

View File

@ -25,6 +25,7 @@
#include "clang/StaticAnalyzer/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Syntax/BuildTree.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "clang/Tooling/Tooling.h"
@ -157,9 +158,11 @@ public:
clang::syntax::TokenBuffer TB = std::move(Collector).consume();
if (TokensDump)
llvm::outs() << TB.dumpForTests();
clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB);
llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(
AST.getSourceManager());
clang::syntax::TokenBufferTokenManager TBTM(TB, AST.getLangOpts(),
AST.getSourceManager());
clang::syntax::Arena A;
llvm::outs()
<< clang::syntax::buildSyntaxTree(A, TBTM, AST)->dump(TBTM);
}
private:

View File

@ -26,7 +26,7 @@ protected:
auto ErrorOK = errorOK(Code);
if (!ErrorOK)
return ErrorOK;
auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
auto Actual = StringRef(Root->dump(*TM)).trim().str();
// EXPECT_EQ shows the diff between the two strings if they are different.
EXPECT_EQ(Tree.trim().str(), Actual);
if (Actual != Tree.trim().str()) {
@ -59,7 +59,7 @@ protected:
auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root);
assert(AnnotatedNode);
auto AnnotatedNodeDump =
StringRef(AnnotatedNode->dump(Arena->getSourceManager()))
StringRef(AnnotatedNode->dump(*TM))
.trim()
.str();
// EXPECT_EQ shows the diff between the two strings if they are different.

View File

@ -30,7 +30,7 @@ protected:
Transform(Source, Root);
auto Replacements = syntax::computeReplacements(*Arena, *Root);
auto Replacements = syntax::computeReplacements(*TM, *Root);
auto Output = tooling::applyAllReplacements(Source.code(), Replacements);
if (!Output) {
ADD_FAILURE() << "could not apply replacements: "
@ -47,7 +47,7 @@ protected:
TranslationUnit *Root) {
auto *S = cast<syntax::Statement>(nodeByRange(Input.range(), Root));
ASSERT_TRUE(S->canModify()) << "cannot remove a statement";
syntax::removeStatement(*Arena, S);
syntax::removeStatement(*Arena, *TM, S);
EXPECT_TRUE(S->isDetached());
EXPECT_FALSE(S->isOriginal())
<< "node removed from tree cannot be marked as original";

View File

@ -27,7 +27,7 @@ protected:
return ::testing::AssertionFailure()
<< "Root was not built successfully.";
auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
auto Actual = StringRef(Root->dump(*TM)).trim().str();
auto Expected = Dump.trim().str();
// EXPECT_EQ shows the diff between the two strings if they are different.
EXPECT_EQ(Expected, Actual);
@ -44,7 +44,7 @@ INSTANTIATE_TEST_SUITE_P(SynthesisTests, SynthesisTest,
TEST_P(SynthesisTest, Leaf_Punctuation) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::comma);
auto *Leaf = createLeaf(*Arena, *TM, tok::comma);
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
',' Detached synthesized
@ -57,7 +57,7 @@ TEST_P(SynthesisTest, Leaf_Punctuation_CXX) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::coloncolon);
auto *Leaf = createLeaf(*Arena, *TM, tok::coloncolon);
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
'::' Detached synthesized
@ -67,7 +67,7 @@ TEST_P(SynthesisTest, Leaf_Punctuation_CXX) {
TEST_P(SynthesisTest, Leaf_Keyword) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::kw_if);
auto *Leaf = createLeaf(*Arena, *TM, tok::kw_if);
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
'if' Detached synthesized
@ -80,7 +80,7 @@ TEST_P(SynthesisTest, Leaf_Keyword_CXX11) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::kw_nullptr);
auto *Leaf = createLeaf(*Arena, *TM, tok::kw_nullptr);
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
'nullptr' Detached synthesized
@ -90,7 +90,7 @@ TEST_P(SynthesisTest, Leaf_Keyword_CXX11) {
TEST_P(SynthesisTest, Leaf_Identifier) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::identifier, "a");
auto *Leaf = createLeaf(*Arena, *TM, tok::identifier, "a");
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
'a' Detached synthesized
@ -100,7 +100,7 @@ TEST_P(SynthesisTest, Leaf_Identifier) {
TEST_P(SynthesisTest, Leaf_Number) {
buildTree("", GetParam());
auto *Leaf = createLeaf(*Arena, tok::numeric_constant, "1");
auto *Leaf = createLeaf(*Arena, *TM, tok::numeric_constant, "1");
EXPECT_TRUE(treeDumpEqual(Leaf, R"txt(
'1' Detached synthesized
@ -120,8 +120,8 @@ UnknownExpression Detached synthesized
TEST_P(SynthesisTest, Tree_Flat) {
buildTree("", GetParam());
auto *LeafLParen = createLeaf(*Arena, tok::l_paren);
auto *LeafRParen = createLeaf(*Arena, tok::r_paren);
auto *LeafLParen = createLeaf(*Arena, *TM, tok::l_paren);
auto *LeafRParen = createLeaf(*Arena, *TM, tok::r_paren);
auto *TreeParen = createTree(*Arena,
{{LeafLParen, NodeRole::LeftHandSide},
{LeafRParen, NodeRole::RightHandSide}},
@ -137,13 +137,13 @@ ParenExpression Detached synthesized
TEST_P(SynthesisTest, Tree_OfTree) {
buildTree("", GetParam());
auto *Leaf1 = createLeaf(*Arena, tok::numeric_constant, "1");
auto *Leaf1 = createLeaf(*Arena, *TM, tok::numeric_constant, "1");
auto *Int1 = createTree(*Arena, {{Leaf1, NodeRole::LiteralToken}},
NodeKind::IntegerLiteralExpression);
auto *LeafPlus = createLeaf(*Arena, tok::plus);
auto *LeafPlus = createLeaf(*Arena, *TM, tok::plus);
auto *Leaf2 = createLeaf(*Arena, tok::numeric_constant, "2");
auto *Leaf2 = createLeaf(*Arena, *TM, tok::numeric_constant, "2");
auto *Int2 = createTree(*Arena, {{Leaf2, NodeRole::LiteralToken}},
NodeKind::IntegerLiteralExpression);
@ -166,16 +166,15 @@ BinaryOperatorExpression Detached synthesized
TEST_P(SynthesisTest, DeepCopy_Synthesized) {
buildTree("", GetParam());
auto *LeafContinue = createLeaf(*Arena, tok::kw_continue);
auto *LeafSemiColon = createLeaf(*Arena, tok::semi);
auto *LeafContinue = createLeaf(*Arena, *TM, tok::kw_continue);
auto *LeafSemiColon = createLeaf(*Arena, *TM, tok::semi);
auto *StatementContinue = createTree(*Arena,
{{LeafContinue, NodeRole::LiteralToken},
{LeafSemiColon, NodeRole::Unknown}},
NodeKind::ContinueStatement);
auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue);
EXPECT_TRUE(
treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager())));
auto *Copy = deepCopyExpandingMacros(*Arena, *TM, StatementContinue);
EXPECT_TRUE(treeDumpEqual(Copy, StatementContinue->dump(*TM)));
// FIXME: Test that copy is independent of original, once the Mutations API is
// more developed.
}
@ -183,7 +182,7 @@ TEST_P(SynthesisTest, DeepCopy_Synthesized) {
TEST_P(SynthesisTest, DeepCopy_Original) {
auto *OriginalTree = buildTree("int a;", GetParam());
auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree);
auto *Copy = deepCopyExpandingMacros(*Arena, *TM, OriginalTree);
EXPECT_TRUE(treeDumpEqual(Copy, R"txt(
TranslationUnit Detached synthesized
`-SimpleDeclaration synthesized
@ -198,7 +197,8 @@ TranslationUnit Detached synthesized
TEST_P(SynthesisTest, DeepCopy_Child) {
auto *OriginalTree = buildTree("int a;", GetParam());
auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree->getFirstChild());
auto *Copy =
deepCopyExpandingMacros(*Arena, *TM, OriginalTree->getFirstChild());
EXPECT_TRUE(treeDumpEqual(Copy, R"txt(
SimpleDeclaration Detached synthesized
|-'int' synthesized
@ -218,7 +218,7 @@ void test() {
})cpp",
GetParam());
auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree);
auto *Copy = deepCopyExpandingMacros(*Arena, *TM, OriginalTree);
// The syntax tree stores already expanded Tokens, we can only see whether the
// macro was expanded when computing replacements. The dump does show that
@ -260,7 +260,7 @@ TranslationUnit Detached synthesized
TEST_P(SynthesisTest, Statement_EmptyStatement) {
buildTree("", GetParam());
auto *S = createEmptyStatement(*Arena);
auto *S = createEmptyStatement(*Arena, *TM);
EXPECT_TRUE(treeDumpEqual(S, R"txt(
EmptyStatement Detached synthesized
`-';' synthesized

View File

@ -27,7 +27,7 @@ private:
ChildrenWithRoles.reserve(Children.size());
for (const auto *Child : Children) {
ChildrenWithRoles.push_back(std::make_pair(
deepCopyExpandingMacros(*Arena, Child), NodeRole::Unknown));
deepCopyExpandingMacros(*Arena, *TM, Child), NodeRole::Unknown));
}
return clang::syntax::createTree(*Arena, ChildrenWithRoles,
NodeKind::UnknownExpression);
@ -108,29 +108,29 @@ INSTANTIATE_TEST_SUITE_P(TreeTests, TreeTest,
TEST_P(TreeTest, FirstLeaf) {
buildTree("", GetParam());
std::vector<const Node *> Leafs = {createLeaf(*Arena, tok::l_paren),
createLeaf(*Arena, tok::r_paren)};
std::vector<const Node *> Leafs = {createLeaf(*Arena, *TM, tok::l_paren),
createLeaf(*Arena, *TM, tok::r_paren)};
for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
ASSERT_TRUE(Tree->findFirstLeaf() != nullptr);
EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren);
EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren);
}
}
TEST_P(TreeTest, LastLeaf) {
buildTree("", GetParam());
std::vector<const Node *> Leafs = {createLeaf(*Arena, tok::l_paren),
createLeaf(*Arena, tok::r_paren)};
std::vector<const Node *> Leafs = {createLeaf(*Arena, *TM, tok::l_paren),
createLeaf(*Arena, *TM, tok::r_paren)};
for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
ASSERT_TRUE(Tree->findLastLeaf() != nullptr);
EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren);
EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren);
}
}
TEST_F(TreeTest, Iterators) {
buildTree("", allTestClangConfigs().front());
std::vector<Node *> Children = {createLeaf(*Arena, tok::identifier, "a"),
createLeaf(*Arena, tok::identifier, "b"),
createLeaf(*Arena, tok::identifier, "c")};
std::vector<Node *> Children = {createLeaf(*Arena, *TM, tok::identifier, "a"),
createLeaf(*Arena, *TM, tok::identifier, "b"),
createLeaf(*Arena, *TM, tok::identifier, "c")};
auto *Tree = syntax::createTree(*Arena,
{{Children[0], NodeRole::LeftHandSide},
{Children[1], NodeRole::OperatorToken},
@ -180,7 +180,7 @@ class ListTest : public SyntaxTreeTest {
private:
std::string dumpQuotedTokensOrNull(const Node *N) {
return N ? "'" +
StringRef(N->dumpTokens(Arena->getSourceManager()))
StringRef(N->dumpTokens(*TM))
.trim()
.str() +
"'"
@ -233,11 +233,11 @@ TEST_P(ListTest, List_Separated_WellFormed) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
},
NodeKind::CallArguments));
@ -254,10 +254,10 @@ TEST_P(ListTest, List_Separated_MissingElement) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
},
NodeKind::CallArguments));
@ -274,10 +274,10 @@ TEST_P(ListTest, List_Separated_MissingDelimiter) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
},
NodeKind::CallArguments));
@ -294,10 +294,10 @@ TEST_P(ListTest, List_Separated_MissingLastElement) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter},
},
NodeKind::CallArguments));
@ -317,12 +317,12 @@ TEST_P(ListTest, List_Terminated_WellFormed) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
},
NodeKind::NestedNameSpecifier));
@ -342,11 +342,11 @@ TEST_P(ListTest, List_Terminated_MissingElement) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
},
NodeKind::NestedNameSpecifier));
@ -366,11 +366,11 @@ TEST_P(ListTest, List_Terminated_MissingDelimiter) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
},
NodeKind::NestedNameSpecifier));
@ -390,11 +390,11 @@ TEST_P(ListTest, List_Terminated_MissingLastDelimiter) {
auto *List = dyn_cast<syntax::List>(syntax::createTree(
*Arena,
{
{createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement},
{createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter},
{createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement},
},
NodeKind::NestedNameSpecifier));

View File

@ -35,13 +35,14 @@ using namespace clang;
using namespace clang::syntax;
namespace {
ArrayRef<syntax::Token> tokens(syntax::Node *N) {
ArrayRef<syntax::Token> tokens(syntax::Node *N,
const TokenBufferTokenManager &STM) {
assert(N->isOriginal() && "tokens of modified nodes are not well-defined");
if (auto *L = dyn_cast<syntax::Leaf>(N))
return llvm::makeArrayRef(L->getToken(), 1);
return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1);
auto *T = cast<syntax::Tree>(N);
return llvm::makeArrayRef(T->findFirstLeaf()->getToken(),
T->findLastLeaf()->getToken() + 1);
return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()),
STM.getToken(T->findLastLeaf()->getTokenKey()) + 1);
}
} // namespace
@ -70,23 +71,26 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) {
public:
BuildSyntaxTree(syntax::TranslationUnit *&Root,
std::unique_ptr<syntax::TokenBuffer> &TB,
std::unique_ptr<syntax::TokenBufferTokenManager> &TM,
std::unique_ptr<syntax::Arena> &Arena,
std::unique_ptr<syntax::TokenCollector> Tokens)
: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) {
: Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) {
assert(this->Tokens);
}
void HandleTranslationUnit(ASTContext &Ctx) override {
TB = std::make_unique<syntax::TokenBuffer>(std::move(*Tokens).consume());
Tokens = nullptr; // make sure we fail if this gets called twice.
Arena = std::make_unique<syntax::Arena>(Ctx.getSourceManager(),
Ctx.getLangOpts(), *TB);
Root = syntax::buildSyntaxTree(*Arena, Ctx);
TM = std::make_unique<syntax::TokenBufferTokenManager>(
*TB, Ctx.getLangOpts(), Ctx.getSourceManager());
Arena = std::make_unique<syntax::Arena>();
Root = syntax::buildSyntaxTree(*Arena, *TM, Ctx);
}
private:
syntax::TranslationUnit *&Root;
std::unique_ptr<syntax::TokenBuffer> &TB;
std::unique_ptr<syntax::TokenBufferTokenManager> &TM;
std::unique_ptr<syntax::Arena> &Arena;
std::unique_ptr<syntax::TokenCollector> Tokens;
};
@ -94,21 +98,23 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) {
class BuildSyntaxTreeAction : public ASTFrontendAction {
public:
BuildSyntaxTreeAction(syntax::TranslationUnit *&Root,
std::unique_ptr<syntax::TokenBufferTokenManager> &TM,
std::unique_ptr<syntax::TokenBuffer> &TB,
std::unique_ptr<syntax::Arena> &Arena)
: Root(Root), TB(TB), Arena(Arena) {}
: Root(Root), TM(TM), TB(TB), Arena(Arena) {}
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
StringRef InFile) override {
// We start recording the tokens, ast consumer will take on the result.
auto Tokens =
std::make_unique<syntax::TokenCollector>(CI.getPreprocessor());
return std::make_unique<BuildSyntaxTree>(Root, TB, Arena,
return std::make_unique<BuildSyntaxTree>(Root, TB, TM, Arena,
std::move(Tokens));
}
private:
syntax::TranslationUnit *&Root;
std::unique_ptr<syntax::TokenBufferTokenManager> &TM;
std::unique_ptr<syntax::TokenBuffer> &TB;
std::unique_ptr<syntax::Arena> &Arena;
};
@ -149,7 +155,7 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) {
Compiler.setSourceManager(SourceMgr.get());
syntax::TranslationUnit *Root = nullptr;
BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena);
BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena);
// Action could not be executed but the frontend didn't identify any errors
// in the code ==> problem in setting up the action.
@ -163,7 +169,7 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) {
syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R,
syntax::Node *Root) {
ArrayRef<syntax::Token> Toks = tokens(Root);
ArrayRef<syntax::Token> Toks = tokens(Root, *TM);
if (Toks.front().location().isFileID() && Toks.back().location().isFileID() &&
syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==

View File

@ -17,6 +17,7 @@
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Testing/TestClangConfig.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/StringRef.h"
@ -51,6 +52,7 @@ protected:
std::shared_ptr<CompilerInvocation> Invocation;
// Set after calling buildTree().
std::unique_ptr<syntax::TokenBuffer> TB;
std::unique_ptr<syntax::TokenBufferTokenManager> TM;
std::unique_ptr<syntax::Arena> Arena;
};