llvm-project/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h

//===--- Grammar.h - grammar used by clang pseudoparser  ---------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file defines base structures for parsing & modeling a grammar for a
//  programming language:
//
//    # This is a fake C++ BNF grammar
//    _ := translation-unit
//    translation-unit := declaration-seq_opt
//    declaration-seq := declaration
//    declaration-seq := declaration-seq declaration
//
//  A grammar formally describes a language, and it is constructed by a set of
//  production rules. A rule is of BNF form (AAA := BBB CCC). A symbol is either
//  nonterminal or terminal, identified by a SymbolID.
//
//  Notions about the BNF grammar:
//  - "_" is the start symbol of the augmented grammar;
//  - single-line comment is supported, starting with a #
//  - A rule describes how a nonterminal (left side of :=) is constructed, and
//    it is *per line* in the grammar file
//  - Terminals (also called tokens) correspond to the clang::TokenKind; they
//    are written in the grammar like "IDENTIFIER", "USING", "+"
//  - Nonterminals are specified with "lower-case" names in the grammar; they
//    shouldn't be nullable (has an empty sequence)
//  - optional symbols are supported (specified with a _opt suffix), and they
//    will be eliminated during the grammar parsing stage
//
//===----------------------------------------------------------------------===//

#ifndef CLANG_PSEUDO_GRAMMAR_H
#define CLANG_PSEUDO_GRAMMAR_H

#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include <cstdint>
#include <vector>

namespace clang {
namespace pseudo {
// A SymbolID uniquely identifies a terminal/nonterminal symbol in a grammar.
// nonterminal IDs are indexes into a table of nonterminal symbols.
// Terminal IDs correspond to the clang TokenKind enum.
using SymbolID = uint16_t;
// SymbolID is only 12 bits wide.
// There are maximum 2^11 terminals (aka tokens) and 2^11 nonterminals.
static constexpr uint16_t SymbolBits = 12;
static constexpr uint16_t NumTerminals = tok::NUM_TOKENS;
// SymbolIDs with the top bit set are tokens/terminals.
static constexpr SymbolID TokenFlag = 1 << (SymbolBits - 1);
inline bool isToken(SymbolID ID) { return ID & TokenFlag; }
inline bool isNonterminal(SymbolID ID) { return !isToken(ID); }
// The terminals are always the clang tok::TokenKind (not all are used).
inline tok::TokenKind symbolToToken(SymbolID SID) {
  assert(isToken(SID));
  SID &= ~TokenFlag;
  assert(SID < NumTerminals);
  return static_cast<tok::TokenKind>(SID);
}
inline SymbolID tokenSymbol(tok::TokenKind TK) {
  return TokenFlag | static_cast<SymbolID>(TK);
}

// A RuleID uniquely identifies a production rule in a grammar.
// It is an index into a table of rules.
using RuleID = uint16_t;
// There are maximum 2^12 rules.
static constexpr unsigned RuleBits = 12;

// Represent a production rule in the grammar, e.g.
//   expression := a b c
//   ^Target       ^Sequence
struct Rule {
  Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Seq);

  // We occupy 4 bits for the sequence, in theory, it can be at most 2^4 tokens
  // long, however, we're stricter in order to reduce the size, we limit the max
  // length to 9 (this is the longest sequence in cxx grammar).
  static constexpr unsigned SizeBits = 4;
  static constexpr unsigned MaxElements = 9;
  static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit");
  static_assert(SizeBits + SymbolBits <= 16,
                "Must be able to store symbol ID + size efficiently");

  // 16 bits for target symbol and size of sequence:
  // SymbolID : 12 | Size : 4
  SymbolID Target : SymbolBits;
  uint8_t Size : SizeBits; // Size of the Sequence
  SymbolID Sequence[MaxElements];

  llvm::ArrayRef<SymbolID> seq() const {
    return llvm::ArrayRef<SymbolID>(Sequence, Size);
  }
  friend bool operator==(const Rule &L, const Rule &R) {
    return L.Target == R.Target && L.seq() == R.seq();
  }
};

struct GrammarTable;

// Grammar that describes a programming language, e.g. C++. It represents the
// contents of the specified grammar.
// It is a building block for constructing a table-based parser.
class Grammar {
public:
  explicit Grammar(std::unique_ptr<GrammarTable>);

  // Parses grammar from a BNF file.
  // Diagnostics emitted during parsing are stored in Diags.
  static std::unique_ptr<Grammar> parseBNF(llvm::StringRef BNF,
                                           std::vector<std::string> &Diags);

  // Returns the SymbolID of the start symbol '_'.
  SymbolID startSymbol() const { return StartSymbol; };

  // Returns all rules of the given nonterminal symbol.
  llvm::ArrayRef<Rule> rulesFor(SymbolID SID) const;
  const Rule &lookupRule(RuleID RID) const;

  // Gets symbol (terminal or nonterminal) name.
  // Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
  llvm::StringRef symbolName(SymbolID) const;

  // Dumps the whole grammar.
  std::string dump() const;
  // Dumps a particular rule.
  std::string dumpRule(RuleID) const;
  // Dumps all rules of the given nonterminal symbol.
  std::string dumpRules(SymbolID) const;

  const GrammarTable &table() const { return *T; }

private:
  std::unique_ptr<GrammarTable> T;
  // The start symbol '_' of the augmented grammar.
  SymbolID StartSymbol;
};
// For each nonterminal X, computes the set of terminals that begin strings
// derived from X. (Known as FIRST sets in grammar-based parsers).
std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &);
// For each nonterminal X, computes the set of terminals that could immediately
// follow X. (Known as FOLLOW sets in grammar-based parsers).
std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &);

// Storage for the underlying data of the Grammar.
// It can be constructed dynamically (from compiling BNF file) or statically
// (a compiled data-source).
struct GrammarTable {
  GrammarTable();

  struct Nonterminal {
    std::string Name;
    // Corresponding rules that construct the nonterminal, it is a [Start, End)
    // index range of the Rules table.
    struct {
      RuleID Start;
      RuleID End;
    } RuleRange;
  };

  // The rules are sorted (and thus grouped) by target symbol.
  // RuleID is the index of the vector.
  std::vector<Rule> Rules;
  // A table of terminals (aka tokens). It corresponds to the clang::Token.
  // clang::tok::TokenKind is the index of the table.
  llvm::ArrayRef<std::string> Terminals;
  // A table of nonterminals, sorted by name.
  // SymbolID is the index of the table.
  std::vector<Nonterminal> Nonterminals;
};

} // namespace pseudo
} // namespace clang

#endif // CLANG_PSEUDO_GRAMMAR_H
[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`//===--- Grammar.h - grammar used by clang pseudoparser ---------- C++--===//`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file defines base structures for parsing & modeling a grammar for a`
			`// programming language:`
			`//`
			`// # This is a fake C++ BNF grammar`
			`// _ := translation-unit`
			`// translation-unit := declaration-seq_opt`
			`// declaration-seq := declaration`
			`// declaration-seq := declaration-seq declaration`
			`//`
			`// A grammar formally describes a language, and it is constructed by a set of`
			`// production rules. A rule is of BNF form (AAA := BBB CCC). A symbol is either`
[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`// nonterminal or terminal, identified by a SymbolID.`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`//`
			`// Notions about the BNF grammar:`
[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`// - "_" is the start symbol of the augmented grammar;`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// - single-line comment is supported, starting with a #`
			`// - A rule describes how a nonterminal (left side of :=) is constructed, and`
			`// it is per line in the grammar file`
			`// - Terminals (also called tokens) correspond to the clang::TokenKind; they`
			`// are written in the grammar like "IDENTIFIER", "USING", "+"`
			`// - Nonterminals are specified with "lower-case" names in the grammar; they`
			`// shouldn't be nullable (has an empty sequence)`
			`// - optional symbols are supported (specified with a _opt suffix), and they`
			`// will be eliminated during the grammar parsing stage`
			`//`
			`//===----------------------------------------------------------------------===//`

Reapply [pseudo] Move pseudoparser from clang to clang-tools-extra" This reverts commit 049f4e4eab19c6e468e029232e94ca71245b0f56. The problem was a stray dependency in CLANG_TEST_DEPS which caused cmake to fail if clang-pseudo wasn't built. This is now removed. 2022-03-16 08:08:02 +08:00			`#ifndef CLANG_PSEUDO_GRAMMAR_H`
			`#define CLANG_PSEUDO_GRAMMAR_H`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00
			`#include "clang/Basic/TokenKinds.h"`
			`#include "llvm/ADT/ArrayRef.h"`
[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`#include "llvm/ADT/DenseSet.h"`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`#include "llvm/ADT/StringRef.h"`
			`#include <cstdint>`
			`#include <vector>`

			`namespace clang {`
			`namespace pseudo {`
[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`// A SymbolID uniquely identifies a terminal/nonterminal symbol in a grammar.`
			`// nonterminal IDs are indexes into a table of nonterminal symbols.`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// Terminal IDs correspond to the clang TokenKind enum.`
			`using SymbolID = uint16_t;`
			`// SymbolID is only 12 bits wide.`
			`// There are maximum 2^11 terminals (aka tokens) and 2^11 nonterminals.`
			`static constexpr uint16_t SymbolBits = 12;`
			`static constexpr uint16_t NumTerminals = tok::NUM_TOKENS;`
			`// SymbolIDs with the top bit set are tokens/terminals.`
			`static constexpr SymbolID TokenFlag = 1 << (SymbolBits - 1);`
			`inline bool isToken(SymbolID ID) { return ID & TokenFlag; }`
			`inline bool isNonterminal(SymbolID ID) { return !isToken(ID); }`
			`// The terminals are always the clang tok::TokenKind (not all are used).`
			`inline tok::TokenKind symbolToToken(SymbolID SID) {`
			`assert(isToken(SID));`
			`SID &= ~TokenFlag;`
			`assert(SID < NumTerminals);`
			`return static_cast<tok::TokenKind>(SID);`
			`}`
			`inline SymbolID tokenSymbol(tok::TokenKind TK) {`
			`return TokenFlag \| static_cast<SymbolID>(TK);`
			`}`

			`// A RuleID uniquely identifies a production rule in a grammar.`
			`// It is an index into a table of rules.`
			`using RuleID = uint16_t;`
			`// There are maximum 2^12 rules.`
			`static constexpr unsigned RuleBits = 12;`

			`// Represent a production rule in the grammar, e.g.`
			`// expression := a b c`
			`// ^Target ^Sequence`
			`struct Rule {`
			`Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Seq);`

			`// We occupy 4 bits for the sequence, in theory, it can be at most 2^4 tokens`
			`// long, however, we're stricter in order to reduce the size, we limit the max`
[pseudo] NFC, fix some typos. 2022-02-11 22:34:40 +08:00			`// length to 9 (this is the longest sequence in cxx grammar).`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`static constexpr unsigned SizeBits = 4;`
			`static constexpr unsigned MaxElements = 9;`
			`static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit");`
			`static_assert(SizeBits + SymbolBits <= 16,`
			`"Must be able to store symbol ID + size efficiently");`

			`// 16 bits for target symbol and size of sequence:`
			`// SymbolID : 12 \| Size : 4`
			`SymbolID Target : SymbolBits;`
			`uint8_t Size : SizeBits; // Size of the Sequence`
			`SymbolID Sequence[MaxElements];`

			`llvm::ArrayRef<SymbolID> seq() const {`
			`return llvm::ArrayRef<SymbolID>(Sequence, Size);`
			`}`
			`friend bool operator==(const Rule &L, const Rule &R) {`
			`return L.Target == R.Target && L.seq() == R.seq();`
			`}`
			`};`

			`struct GrammarTable;`

			`// Grammar that describes a programming language, e.g. C++. It represents the`
			`// contents of the specified grammar.`
			`// It is a building block for constructing a table-based parser.`
			`class Grammar {`
			`public:`
[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`explicit Grammar(std::unique_ptr<GrammarTable>);`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00
			`// Parses grammar from a BNF file.`
			`// Diagnostics emitted during parsing are stored in Diags.`
			`static std::unique_ptr<Grammar> parseBNF(llvm::StringRef BNF,`
			`std::vector<std::string> &Diags);`

[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`// Returns the SymbolID of the start symbol '_'.`
			`SymbolID startSymbol() const { return StartSymbol; };`

[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`// Returns all rules of the given nonterminal symbol.`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`llvm::ArrayRef<Rule> rulesFor(SymbolID SID) const;`
			`const Rule &lookupRule(RuleID RID) const;`

[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`// Gets symbol (terminal or nonterminal) name.`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).`
			`llvm::StringRef symbolName(SymbolID) const;`

			`// Dumps the whole grammar.`
			`std::string dump() const;`
			`// Dumps a particular rule.`
			`std::string dumpRule(RuleID) const;`
			`// Dumps all rules of the given nonterminal symbol.`
			`std::string dumpRules(SymbolID) const;`

			`const GrammarTable &table() const { return *T; }`

			`private:`
			`std::unique_ptr<GrammarTable> T;`
[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`// The start symbol '_' of the augmented grammar.`
			`SymbolID StartSymbol;`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`};`
[pseudo] Add first and follow set computation in Grammar. These will be used when building parsing table for LR parsers. Separate from https://reviews.llvm.org/D118196. Differential Revision: https://reviews.llvm.org/D118990 2022-02-04 20:54:55 +08:00			`// For each nonterminal X, computes the set of terminals that begin strings`
			`// derived from X. (Known as FIRST sets in grammar-based parsers).`
			`std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &);`
			`// For each nonterminal X, computes the set of terminals that could immediately`
			`// follow X. (Known as FOLLOW sets in grammar-based parsers).`
			`std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &);`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00
			`// Storage for the underlying data of the Grammar.`
			`// It can be constructed dynamically (from compiling BNF file) or statically`
			`// (a compiled data-source).`
			`struct GrammarTable {`
[pseudo] Implement LRTable. This patch introduces a dense implementation of the LR parsing table, which is used by LR parsers. We build a SLR(1) parsing table from the LR(0) graph. Statistics of the LR parsing table on the C++ spec grammar: - number of states: 1449 - number of actions: 83069 - size of the table (bytes): 334928 Differential Revision: https://reviews.llvm.org/D118196 2022-02-11 21:09:15 +08:00			`GrammarTable();`

[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`struct Nonterminal {`
			`std::string Name;`
[pseudo] Tweak some docs, NFC Consitently use the "nonterminal", "pseudoparser" terms. 2022-03-16 23:45:24 +08:00			`// Corresponding rules that construct the nonterminal, it is a [Start, End)`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// index range of the Rules table.`
			`struct {`
[pseudo] Fix some naming-style violations. 2022-03-17 16:08:25 +08:00			`RuleID Start;`
			`RuleID End;`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`} RuleRange;`
			`};`

			`// The rules are sorted (and thus grouped) by target symbol.`
			`// RuleID is the index of the vector.`
			`std::vector<Rule> Rules;`
[pseudo] NFC, fix some typos. 2022-02-11 22:34:40 +08:00			`// A table of terminals (aka tokens). It corresponds to the clang::Token.`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// clang::tok::TokenKind is the index of the table.`
[pseudo] Implement LRTable. This patch introduces a dense implementation of the LR parsing table, which is used by LR parsers. We build a SLR(1) parsing table from the LR(0) graph. Statistics of the LR parsing table on the C++ spec grammar: - number of states: 1449 - number of actions: 83069 - size of the table (bytes): 334928 Differential Revision: https://reviews.llvm.org/D118196 2022-02-11 21:09:15 +08:00			`llvm::ArrayRef<std::string> Terminals;`
[syntax][pseudo] Add Grammar for the clang pseudo-parser This patch introduces the Grammar class, which is a critial piece for constructing a tabled-based parser. As the first patch, the scope is limited to: - define base types (symbol, rules) of modeling the grammar - construct Grammar by parsing the BNF file (annotations are excluded for now) Differential Revision: https://reviews.llvm.org/D114790 2022-01-19 20:14:57 +08:00			`// A table of nonterminals, sorted by name.`
			`// SymbolID is the index of the table.`
			`std::vector<Nonterminal> Nonterminals;`
			`};`

			`} // namespace pseudo`
			`} // namespace clang`

[pseudo] Cleanup the leftover header guards after the movement, NFC. 2022-03-16 23:17:25 +08:00			`#endif // CLANG_PSEUDO_GRAMMAR_H`