From 7e3b22758d1b28e3215d728257843085509b299e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 9 Jul 2019 11:26:35 +0000 Subject: [PATCH] Revert rL365355 : [Syntax] Introduce syntax trees Summary: A tooling-focused alternative to the AST. This commit focuses on the memory-management strategy and the structure of the AST. More to follow later: - Operations to mutate the syntax trees and corresponding textual replacements. - Mapping between clang AST nodes and syntax tree nodes. - More node types corresponding to the language constructs. Reviewers: sammccall Reviewed By: sammccall Subscribers: llvm-commits, mgorny, cfe-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D61637 ........ Fixes buildbots which were crashing on SyntaxTests.exe llvm-svn: 365465 --- .../include/clang/Tooling/Syntax/BuildTree.h | 24 -- clang/include/clang/Tooling/Syntax/Nodes.h | 85 ------ clang/include/clang/Tooling/Syntax/Tree.h | 154 ---------- clang/lib/Tooling/Syntax/BuildTree.cpp | 271 ------------------ clang/lib/Tooling/Syntax/CMakeLists.txt | 5 - clang/lib/Tooling/Syntax/Nodes.cpp | 33 --- clang/lib/Tooling/Syntax/Tree.cpp | 145 ---------- clang/unittests/Tooling/Syntax/CMakeLists.txt | 1 - clang/unittests/Tooling/Syntax/TreeTest.cpp | 160 ----------- 9 files changed, 878 deletions(-) delete mode 100644 clang/include/clang/Tooling/Syntax/BuildTree.h delete mode 100644 clang/include/clang/Tooling/Syntax/Nodes.h delete mode 100644 clang/include/clang/Tooling/Syntax/Tree.h delete mode 100644 clang/lib/Tooling/Syntax/BuildTree.cpp delete mode 100644 clang/lib/Tooling/Syntax/Nodes.cpp delete mode 100644 clang/lib/Tooling/Syntax/Tree.cpp delete mode 100644 clang/unittests/Tooling/Syntax/TreeTest.cpp diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h deleted file mode 100644 index 055d6462eabd..000000000000 --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- BuildTree.h - build syntax trees -----------------------*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Functions to construct a syntax tree from an AST. -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H - -#include "clang/AST/Decl.h" -#include "clang/Tooling/Syntax/Nodes.h" - -namespace clang { -namespace syntax { - -/// Build a syntax tree for the main file. -syntax::TranslationUnit *buildSyntaxTree(Arena &A, - const clang::TranslationUnitDecl &TU); -} // namespace syntax -} // namespace clang -#endif diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h deleted file mode 100644 index d4bab5cb1e70..000000000000 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ /dev/null @@ -1,85 +0,0 @@ -//===- Nodes.h - syntax nodes for C/C++ grammar constructs ----*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Syntax tree nodes for C, C++ and Objective-C grammar constructs. -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H -#define LLVM_CLANG_TOOLING_SYNTAX_NODES_H - -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Token.h" -#include "clang/Tooling/Syntax/Tokens.h" -#include "clang/Tooling/Syntax/Tree.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang { -namespace syntax { - -/// A kind of a syntax node, used for implementing casts. -enum class NodeKind : uint16_t { - Leaf, - TranslationUnit, - TopLevelDeclaration, - CompoundStatement -}; -/// For debugging purposes. -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); - -/// A root node for a translation unit. Parent is always null. -class TranslationUnit final : public Tree { -public: - TranslationUnit() : Tree(NodeKind::TranslationUnit) {} - static bool classof(const Node *N) { - return N->kind() == NodeKind::TranslationUnit; - } -}; - -/// FIXME: this node is temporary and will be replaced with nodes for various -/// 'declarations' and 'declarators' from the C/C++ grammar -/// -/// Represents any top-level declaration. Only there to give the syntax tree a -/// bit of structure until we implement syntax nodes for declarations and -/// declarators. -class TopLevelDeclaration final : public Tree { -public: - TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {} - static bool classof(const Node *N) { - return N->kind() == NodeKind::TopLevelDeclaration; - } -}; - -/// An abstract node for C++ statements, e.g. 'while', 'if', etc. -class Statement : public Tree { -public: - Statement(NodeKind K) : Tree(K) {} - static bool classof(const Node *N) { - return NodeKind::CompoundStatement <= N->kind() && - N->kind() <= NodeKind::CompoundStatement; - } -}; - -/// { statement1; statement2; … } -class CompoundStatement final : public Statement { -public: - CompoundStatement() : Statement(NodeKind::CompoundStatement) {} - static bool classof(const Node *N) { - return N->kind() == NodeKind::CompoundStatement; - } - syntax::Leaf *lbrace(); - syntax::Leaf *rbrace(); - - struct Roles { - static constexpr NodeRole lbrace = 1; - static constexpr NodeRole rbrace = 2; - }; -}; - -} // namespace syntax -} // namespace clang -#endif diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h deleted file mode 100644 index f7ce8a66f4bb..000000000000 --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ /dev/null @@ -1,154 +0,0 @@ -//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, -// - tree nodes correspond to language grammar constructs. -// -// The tree is initially built from an AST. Each node of a newly built tree -// covers a continous subrange of expanded tokens (i.e. tokens after -// preprocessing), the specific tokens coverered are stored in the leaf nodes of -// a tree. A post-order traversal of a tree will visit leaf nodes in an order -// corresponding the original order of expanded tokens. -// -// This is still work in progress and highly experimental, we leave room for -// ourselves to completely change the design and/or implementation. -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H -#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H - -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Tokens.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/Allocator.h" -#include - -namespace clang { -namespace syntax { - -/// A memory arena for syntax trees. Also tracks the underlying token buffers, -/// source manager, etc. -class Arena { -public: - Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - TokenBuffer Tokens); - - const SourceManager &sourceManager() const { return SourceMgr; } - const LangOptions &langOptions() const { return LangOpts; } - - const TokenBuffer &tokenBuffer() const; - llvm::BumpPtrAllocator &allocator() { return Allocator; } - - /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Useful when there is a need to materialize tokens that - /// were not written in user code. - std::pair> - lexBuffer(std::unique_ptr Buffer); - -private: - SourceManager &SourceMgr; - const LangOptions &LangOpts; - TokenBuffer Tokens; - /// IDs and storage for additional tokenized files. - llvm::DenseMap> ExtraTokens; - /// Keeps all the allocated nodes and their intermediate data structures. - llvm::BumpPtrAllocator Allocator; -}; - -class Tree; -class TreeBuilder; -enum class NodeKind : uint16_t; - -/// Represents a relation of this node to its parent, e.g. 'lbrace inside a -/// compound statement'. -/// -/// Each node type defines a set of roles for its children. -using NodeRole = uint8_t; - -/// Role for detached nodes, i.e. the ones that do not have parent nodes. -constexpr NodeRole NodeRoleDetached = 0; -/// Role for children of unknown semantic nature, e.g. skipped tokens, comments. -constexpr NodeRole NodeRoleUnknown = 255; - -/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or -/// a Tree (representing language constructrs). -class Node { -public: - /// Newly created nodes are detached from a tree, parent and sibling links are - /// set when the node is added as a child to another one. - Node(NodeKind Kind) - : Parent(nullptr), NextSibling(nullptr), - Kind(static_cast(Kind)), Role(NodeRoleDetached) {} - - NodeKind kind() const { return static_cast(Kind); } - NodeRole role() const { return Role; } - - const Tree *parent() const { return Parent; } - Tree *parent() { return Parent; } - - const Node *nextSibling() const { return NextSibling; } - Node *nextSibling() { return NextSibling; } - - /// Dumps the structure of a subtree. For debugging and testing purposes. - std::string dump(const Arena &A) const; - /// Dumps the tokens forming this subtree. - std::string dumpTokens(const Arena &A) const; - -private: - // Tree is allowed to change the Parent link and Role. - friend class Tree; - - Tree *Parent; - Node *NextSibling; - unsigned Kind : 16; - unsigned Role : 8; -}; - -/// A leaf node points to a single token inside the expanded token stream. -class Leaf final : public Node { -public: - Leaf(const syntax::Token *T); - static bool classof(const Node *N); - - const syntax::Token *token() const { return Tok; } - -private: - const syntax::Token *Tok; -}; - -/// A node that has children and represents a syntactic language construct. -class Tree : public Node { -public: - using Node::Node; - static bool classof(const Node *N); - - Node *firstChild() { return FirstChild; } - const Node *firstChild() const { return FirstChild; } - -protected: - /// Find the first node with a corresponding role. - syntax::Node *findChild(NodeRole R); - -private: - /// Prepend \p Child to the list of children and and sets the parent pointer. - /// A very low-level operation that does not check any invariants, only used - /// by TreeBuilder. - /// EXPECTS: Role != NodeRoleDetached. - void prependChildLowLevel(Node *Child, NodeRole Role); - friend class TreeBuilder; - - Node *FirstChild = nullptr; -}; - -} // namespace syntax -} // namespace clang - -#endif diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp deleted file mode 100644 index dc682ff677fe..000000000000 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ /dev/null @@ -1,271 +0,0 @@ -//===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/BuildTree.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/Stmt.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Lexer.h" -#include "clang/Tooling/Syntax/Nodes.h" -#include "clang/Tooling/Syntax/Tokens.h" -#include "clang/Tooling/Syntax/Tree.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace clang; - -/// A helper class for constructing the syntax tree while traversing a clang -/// AST. -/// -/// At each point of the traversal we maintain a list of pending nodes. -/// Initially all tokens are added as pending nodes. When processing a clang AST -/// node, the clients need to: -/// - create a corresponding syntax node, -/// - assign roles to all pending child nodes with 'markChild' and -/// 'markChildToken', -/// - replace the child nodes with the new syntax node in the pending list -/// with 'foldNode'. -/// -/// Note that all children are expected to be processed when building a node. -/// -/// Call finalize() to finish building the tree and consume the root node. -class syntax::TreeBuilder { -public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} - - llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } - - /// Populate children for \p New node, assuming it covers tokens from \p - /// Range. - void foldNode(llvm::ArrayRef Range, syntax::Tree *New); - - /// Set role for a token starting at \p Loc. - void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); - - /// Finish building the tree and consume the root node. - syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.tokenBuffer().expandedTokens(); - // Build the root of the tree, consuming all the children. - Pending.foldChildren(Tokens, - new (Arena.allocator()) syntax::TranslationUnit); - - return cast(std::move(Pending).finalize()); - } - - /// getRange() finds the syntax tokens corresponding to the passed source - /// locations. - /// \p First is the start position of the first token and \p Last is the start - /// position of the last token. - llvm::ArrayRef getRange(SourceLocation First, - SourceLocation Last) const { - assert(First.isValid()); - assert(Last.isValid()); - assert(First == Last || - Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); - return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); - } - llvm::ArrayRef getRange(const Decl *D) const { - return getRange(D->getBeginLoc(), D->getEndLoc()); - } - llvm::ArrayRef getRange(const Stmt *S) const { - return getRange(S->getBeginLoc(), S->getEndLoc()); - } - -private: - /// Finds a token starting at \p L. The token must exist. - const syntax::Token *findToken(SourceLocation L) const; - - /// A collection of trees covering the input tokens. - /// When created, each tree corresponds to a single token in the file. - /// Clients call 'foldChildren' to attach one or more subtrees to a parent - /// node and update the list of trees accordingly. - /// - /// Ensures that added nodes properly nest and cover the whole token stream. - struct Forest { - Forest(syntax::Arena &A) { - // FIXME: do not add 'eof' to the tree. - - // Create all leaf nodes. - for (auto &T : A.tokenBuffer().expandedTokens()) - Trees.insert(Trees.end(), - {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); - } - - void assignRole(llvm::ArrayRef Range, - syntax::NodeRole Role) { - assert(!Range.empty()); - auto It = Trees.lower_bound(Range.begin()); - assert(It != Trees.end() && "no node found"); - assert(It->first == Range.begin() && "no child with the specified range"); - assert((std::next(It) == Trees.end() || - std::next(It)->first == Range.end()) && - "no child with the specified range"); - It->second.Role = Role; - } - - /// Add \p Node to the forest and fill its children nodes based on the \p - /// NodeRange. - void foldChildren(llvm::ArrayRef NodeTokens, - syntax::Tree *Node) { - assert(!NodeTokens.empty()); - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = NodeTokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert(BeginChildren != Trees.end() && - BeginChildren->first == FirstToken && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(NodeTokens.end()); - assert((EndChildren == Trees.end() || - EndChildren->first == NodeTokens.end()) && - "fold crosses boundaries of existing subtrees"); - - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); - - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); - } - - // EXPECTS: all tokens were consumed and are owned by a single root node. - syntax::Node *finalize() && { - assert(Trees.size() == 1); - auto *Root = Trees.begin()->second.Node; - Trees = {}; - return Root; - } - - std::string str(const syntax::Arena &A) const { - std::string R; - for (auto It = Trees.begin(); It != Trees.end(); ++It) { - unsigned CoveredTokens = - It != Trees.end() - ? (std::next(It)->first - It->first) - : A.tokenBuffer().expandedTokens().end() - It->first; - - R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", - It->second.Node->kind(), - It->first->text(A.sourceManager()), CoveredTokens); - R += It->second.Node->dump(A); - } - return R; - } - - private: - /// A with a role that should be assigned to it when adding to a parent. - struct NodeAndRole { - explicit NodeAndRole(syntax::Node *Node) - : Node(Node), Role(NodeRoleUnknown) {} - - syntax::Node *Node; - NodeRole Role; - }; - - /// Maps from the start token to a subtree starting at that token. - /// FIXME: storing the end tokens is redundant. - /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. - std::map Trees; - }; - - /// For debugging purposes. - std::string str() { return Pending.str(Arena); } - - syntax::Arena &Arena; - Forest Pending; -}; - -namespace { -class BuildTreeVisitor : public RecursiveASTVisitor { -public: - explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) - : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} - - bool shouldTraversePostOrder() const { return true; } - - bool TraverseDecl(Decl *D) { - if (!D || isa(D)) - return RecursiveASTVisitor::TraverseDecl(D); - if (!llvm::isa(D->getDeclContext())) - return true; // Only build top-level decls for now, do not recurse. - return RecursiveASTVisitor::TraverseDecl(D); - } - - bool VisitDecl(Decl *D) { - assert(llvm::isa(D->getDeclContext()) && - "expected a top-level decl"); - assert(!D->isImplicit()); - Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::TopLevelDeclaration()); - return true; - } - - bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { - // (!) we do not want to call VisitDecl(), the declaration for translation - // unit is built by finalize(). - return true; - } - - bool WalkUpFromCompoundStmt(CompoundStmt *S) { - using Roles = syntax::CompoundStatement::Roles; - - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, Roles::lbrace); - Builder.markChildToken(S->getRBracLoc(), tok::r_brace, Roles::rbrace); - - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::CompoundStatement); - return true; - } - -private: - /// A small helper to save some typing. - llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } - - syntax::TreeBuilder &Builder; - const LangOptions &LangOpts; -}; -} // namespace - -void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, - syntax::Tree *New) { - Pending.foldChildren(Range, New); -} - -void syntax::TreeBuilder::markChildToken(SourceLocation Loc, - tok::TokenKind Kind, NodeRole Role) { - if (Loc.isInvalid()) - return; - Pending.assignRole(*findToken(Loc), Role); -} - -const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { - auto Tokens = Arena.tokenBuffer().expandedTokens(); - auto &SM = Arena.sourceManager(); - auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { - return SM.isBeforeInTranslationUnit(T.location(), L); - }); - assert(It != Tokens.end()); - assert(It->location() == L); - return &*It; -} - -syntax::TranslationUnit * -syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { - TreeBuilder Builder(A); - BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); - return std::move(Builder).finalize(); -} diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt index fee5f5b0ce56..e4c2660057b7 100644 --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -1,15 +1,10 @@ set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangToolingSyntax - BuildTree.cpp - Nodes.cpp Tokens.cpp - Tree.cpp LINK_LIBS - clangAST clangBasic clangFrontend clangLex - clangToolingCore ) diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp deleted file mode 100644 index 5dd793ccc2dd..000000000000 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===- Nodes.cpp ----------------------------------------------*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/Nodes.h" -#include "clang/Basic/TokenKinds.h" - -using namespace clang; - -llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { - switch (K) { - case NodeKind::Leaf: - return OS << "Leaf"; - case NodeKind::TranslationUnit: - return OS << "TranslationUnit"; - case NodeKind::TopLevelDeclaration: - return OS << "TopLevelDeclaration"; - case NodeKind::CompoundStatement: - return OS << "CompoundStatement"; - } - llvm_unreachable("unknown node kind"); -} - -syntax::Leaf *syntax::CompoundStatement::lbrace() { - return llvm::cast_or_null(findChild(Roles::lbrace)); -} - -syntax::Leaf *syntax::CompoundStatement::rbrace() { - return llvm::cast_or_null(findChild(Roles::rbrace)); -} diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp deleted file mode 100644 index fb7645786c56..000000000000 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ /dev/null @@ -1,145 +0,0 @@ -//===- Tree.cpp -----------------------------------------------*- C++ -*-=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/Tree.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Nodes.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Casting.h" - -using namespace clang; - -syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - TokenBuffer Tokens) - : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(std::move(Tokens)) {} - -const clang::syntax::TokenBuffer &syntax::Arena::tokenBuffer() const { - return Tokens; -} - -std::pair> -syntax::Arena::lexBuffer(std::unique_ptr Input) { - auto FID = SourceMgr.createFileID(std::move(Input)); - auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); - assert(It.second && "duplicate FileID"); - return {FID, It.first->second}; -} - -syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { - assert(Tok != nullptr); -} - -bool syntax::Leaf::classof(const Node *N) { - return N->kind() == NodeKind::Leaf; -} - -bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } - -void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { - assert(Child->Parent == nullptr); - assert(Child->NextSibling == nullptr); - assert(Child->Role == NodeRoleDetached); - assert(Role != NodeRoleDetached); - - Child->Parent = this; - Child->NextSibling = this->FirstChild; - Child->Role = Role; - this->FirstChild = Child; -} - -namespace { -static void traverse(const syntax::Node *N, - llvm::function_ref Visit) { - if (auto *T = dyn_cast(N)) { - for (auto *C = T->firstChild(); C; C = C->nextSibling()) - traverse(C, Visit); - } - Visit(N); -} -static void dumpTokens(llvm::raw_ostream &OS, ArrayRef Tokens, - const SourceManager &SM) { - assert(!Tokens.empty()); - bool First = true; - for (const auto &T : Tokens) { - if (!First) - OS << " "; - else - First = false; - // Handle 'eof' separately, calling text() on it produces an empty string. - if (T.kind() == tok::eof) { - OS << ""; - continue; - } - OS << T.text(SM); - } -} - -static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, - const syntax::Arena &A, std::vector IndentMask) { - if (N->role() != syntax::NodeRoleUnknown) { - // FIXME: print the symbolic name of a role. - if (N->role() == syntax::NodeRoleDetached) - OS << "*: "; - else - OS << static_cast(N->role()) << ": "; - } - if (auto *L = llvm::dyn_cast(N)) { - dumpTokens(OS, *L->token(), A.sourceManager()); - OS << "\n"; - return; - } - - auto *T = llvm::cast(N); - OS << T->kind() << "\n"; - - for (auto It = T->firstChild(); It != nullptr; It = It->nextSibling()) { - for (bool Filled : IndentMask) { - if (Filled) - OS << "| "; - else - OS << " "; - } - if (!It->nextSibling()) { - OS << "`-"; - IndentMask.push_back(false); - } else { - OS << "|-"; - IndentMask.push_back(true); - } - dumpTree(OS, It, A, IndentMask); - IndentMask.pop_back(); - } -} -} // namespace - -std::string syntax::Node::dump(const Arena &A) const { - std::string Str; - llvm::raw_string_ostream OS(Str); - dumpTree(OS, this, A, /*IndentMask=*/{}); - return std::move(OS.str()); -} - -std::string syntax::Node::dumpTokens(const Arena &A) const { - std::string Storage; - llvm::raw_string_ostream OS(Storage); - traverse(this, [&](const syntax::Node *N) { - auto *L = llvm::dyn_cast(N); - if (!L) - return; - ::dumpTokens(OS, *L->token(), A.sourceManager()); - }); - return OS.str(); -} - -syntax::Node *syntax::Tree::findChild(NodeRole R) { - for (auto *C = FirstChild; C; C = C->nextSibling()) { - if (C->Role == R) - return C; - } - return nullptr; -} diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt index f9d079b91344..9803157dd2db 100644 --- a/clang/unittests/Tooling/Syntax/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt @@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(SyntaxTests - TreeTest.cpp TokensTest.cpp ) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp deleted file mode 100644 index da1b6cfc8d79..000000000000 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===- TreeTest.cpp -------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Tooling/Syntax/Tree.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/Decl.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Tooling/Syntax/BuildTree.h" -#include "clang/Tooling/Syntax/Nodes.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include - -using namespace clang; - -namespace { -class SyntaxTreeTest : public ::testing::Test { -protected: - // Build a syntax tree for the code. - syntax::TranslationUnit *buildTree(llvm::StringRef Code) { - // FIXME: this code is almost the identical to the one in TokensTest. Share - // it. - class BuildSyntaxTree : public ASTConsumer { - public: - BuildSyntaxTree(syntax::TranslationUnit *&Root, - std::unique_ptr &Arena, - std::unique_ptr Tokens) - : Root(Root), Arena(Arena), Tokens(std::move(Tokens)) { - assert(this->Tokens); - } - - void HandleTranslationUnit(ASTContext &Ctx) override { - Arena = llvm::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), - std::move(*Tokens).consume()); - Tokens = nullptr; // make sure we fail if this gets called twice. - Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl()); - } - - private: - syntax::TranslationUnit *&Root; - std::unique_ptr &Arena; - std::unique_ptr Tokens; - }; - - class BuildSyntaxTreeAction : public ASTFrontendAction { - public: - BuildSyntaxTreeAction(syntax::TranslationUnit *&Root, - std::unique_ptr &Arena) - : Root(Root), Arena(Arena) {} - - std::unique_ptr - CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { - // We start recording the tokens, ast consumer will take on the result. - auto Tokens = - llvm::make_unique(CI.getPreprocessor()); - return llvm::make_unique(Root, Arena, - std::move(Tokens)); - } - - private: - syntax::TranslationUnit *&Root; - std::unique_ptr &Arena; - }; - - constexpr const char *FileName = "./input.cpp"; - FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); - // Prepare to run a compiler. - std::vector Args = {"syntax-test", "-std=c++11", - "-fsyntax-only", FileName}; - auto CI = createInvocationFromCommandLine(Args, Diags, FS); - assert(CI); - CI->getFrontendOpts().DisableFree = false; - CI->getPreprocessorOpts().addRemappedFile( - FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); - CompilerInstance Compiler; - Compiler.setInvocation(std::move(CI)); - if (!Diags->getClient()) - Diags->setClient(new IgnoringDiagConsumer); - Compiler.setDiagnostics(Diags.get()); - Compiler.setFileManager(FileMgr.get()); - Compiler.setSourceManager(SourceMgr.get()); - - syntax::TranslationUnit *Root = nullptr; - BuildSyntaxTreeAction Recorder(Root, this->Arena); - if (!Compiler.ExecuteAction(Recorder)) { - ADD_FAILURE() << "failed to run the frontend"; - std::abort(); - } - return Root; - } - - // Adds a file to the test VFS. - void addFile(llvm::StringRef Path, llvm::StringRef Contents) { - if (!FS->addFile(Path, time_t(), - llvm::MemoryBuffer::getMemBufferCopy(Contents))) { - ADD_FAILURE() << "could not add a file to VFS: " << Path; - } - } - - // Data fields. - llvm::IntrusiveRefCntPtr Diags = - new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); - IntrusiveRefCntPtr FS = - new llvm::vfs::InMemoryFileSystem; - llvm::IntrusiveRefCntPtr FileMgr = - new FileManager(FileSystemOptions(), FS); - llvm::IntrusiveRefCntPtr SourceMgr = - new SourceManager(*Diags, *FileMgr); - // Set after calling buildTree(). - std::unique_ptr Arena; -}; - -TEST_F(SyntaxTreeTest, Basic) { - std::pair Cases[] = { - { - R"cpp( -int main() {} -void foo() {} - )cpp", - R"txt( -*: TranslationUnit -|-TopLevelDeclaration -| |-int -| |-main -| |-( -| |-) -| `-CompoundStatement -| |-1: { -| `-2: } -|-TopLevelDeclaration -| |-void -| |-foo -| |-( -| |-) -| `-CompoundStatement -| |-1: { -| `-2: } -`- -)txt"}, - }; - - for (const auto &T : Cases) { - auto *Root = buildTree(T.first); - std::string Expected = llvm::StringRef(T.second).trim().str(); - std::string Actual = llvm::StringRef(Root->dump(*Arena)).trim(); - EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual; - } -} -} // namespace