forked from OSchip/llvm-project
[pseudo] Tweak some docs, NFC
Consitently use the "nonterminal", "pseudoparser" terms.
This commit is contained in:
parent
eb265e3ba2
commit
30de15e100
|
@ -1,4 +1,4 @@
|
||||||
//===--- Grammar.h - grammar used by clang pseudo parser --------*- C++-*-===//
|
//===--- Grammar.h - grammar used by clang pseudoparser ---------*- C++-*-===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
@ -17,7 +17,7 @@
|
||||||
//
|
//
|
||||||
// A grammar formally describes a language, and it is constructed by a set of
|
// A grammar formally describes a language, and it is constructed by a set of
|
||||||
// production rules. A rule is of BNF form (AAA := BBB CCC). A symbol is either
|
// production rules. A rule is of BNF form (AAA := BBB CCC). A symbol is either
|
||||||
// non-terminal or terminal, identified by a SymbolID.
|
// nonterminal or terminal, identified by a SymbolID.
|
||||||
//
|
//
|
||||||
// Notions about the BNF grammar:
|
// Notions about the BNF grammar:
|
||||||
// - "_" is the start symbol of the augmented grammar;
|
// - "_" is the start symbol of the augmented grammar;
|
||||||
|
@ -45,8 +45,8 @@
|
||||||
|
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace pseudo {
|
namespace pseudo {
|
||||||
// A SymbolID uniquely identifies a terminal/non-terminal symbol in a grammar.
|
// A SymbolID uniquely identifies a terminal/nonterminal symbol in a grammar.
|
||||||
// Non-terminal IDs are indexes into a table of non-terminal symbols.
|
// nonterminal IDs are indexes into a table of nonterminal symbols.
|
||||||
// Terminal IDs correspond to the clang TokenKind enum.
|
// Terminal IDs correspond to the clang TokenKind enum.
|
||||||
using SymbolID = uint16_t;
|
using SymbolID = uint16_t;
|
||||||
// SymbolID is only 12 bits wide.
|
// SymbolID is only 12 bits wide.
|
||||||
|
@ -120,11 +120,11 @@ public:
|
||||||
// Returns the SymbolID of the start symbol '_'.
|
// Returns the SymbolID of the start symbol '_'.
|
||||||
SymbolID startSymbol() const { return StartSymbol; };
|
SymbolID startSymbol() const { return StartSymbol; };
|
||||||
|
|
||||||
// Returns all rules of the given non-terminal symbol.
|
// Returns all rules of the given nonterminal symbol.
|
||||||
llvm::ArrayRef<Rule> rulesFor(SymbolID SID) const;
|
llvm::ArrayRef<Rule> rulesFor(SymbolID SID) const;
|
||||||
const Rule &lookupRule(RuleID RID) const;
|
const Rule &lookupRule(RuleID RID) const;
|
||||||
|
|
||||||
// Gets symbol (terminal or non-terminal) name.
|
// Gets symbol (terminal or nonterminal) name.
|
||||||
// Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
|
// Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
|
||||||
llvm::StringRef symbolName(SymbolID) const;
|
llvm::StringRef symbolName(SymbolID) const;
|
||||||
|
|
||||||
|
@ -157,7 +157,7 @@ struct GrammarTable {
|
||||||
|
|
||||||
struct Nonterminal {
|
struct Nonterminal {
|
||||||
std::string Name;
|
std::string Name;
|
||||||
// Corresponding rules that construct the non-terminal, it is a [Start, End)
|
// Corresponding rules that construct the nonterminal, it is a [Start, End)
|
||||||
// index range of the Rules table.
|
// index range of the Rules table.
|
||||||
struct {
|
struct {
|
||||||
RuleID Start;
|
RuleID Start;
|
||||||
|
|
|
@ -18,10 +18,10 @@
|
||||||
//
|
//
|
||||||
// Typically, based on the category of the grammar symbol, the LRTable is
|
// Typically, based on the category of the grammar symbol, the LRTable is
|
||||||
// broken into two logically separate tables:
|
// broken into two logically separate tables:
|
||||||
// - ACTION table with terminals as columns -- e.g ACTION[S, a] specifies
|
// - ACTION table with terminals as columns -- e.g. ACTION[S, a] specifies
|
||||||
// next action (shift/reduce/accept/error) on state S under a lookahead
|
// next action (shift/reduce/accept/error) on state S under a lookahead
|
||||||
// terminal a
|
// terminal a
|
||||||
// - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specify
|
// - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specifies
|
||||||
// the state which we transist to from the state S with the nonterminal X
|
// the state which we transist to from the state S with the nonterminal X
|
||||||
//
|
//
|
||||||
// LRTable is *performance-critial* as it is consulted frequently during a
|
// LRTable is *performance-critial* as it is consulted frequently during a
|
||||||
|
@ -161,7 +161,7 @@ private:
|
||||||
|
|
||||||
// Index is nonterminal SymbolID, value is the offset into States/Actions
|
// Index is nonterminal SymbolID, value is the offset into States/Actions
|
||||||
// where the entries for this nonterminal begin.
|
// where the entries for this nonterminal begin.
|
||||||
// Give a non-terminal id, the corresponding half-open range of StateIdx is
|
// Give a nonterminal id, the corresponding half-open range of StateIdx is
|
||||||
// [NontermIdx[id], NontermIdx[id+1]).
|
// [NontermIdx[id], NontermIdx[id+1]).
|
||||||
std::vector<uint32_t> NontermOffset;
|
std::vector<uint32_t> NontermOffset;
|
||||||
// Similar to NontermOffset, but for terminals, index is tok::TokenKind.
|
// Similar to NontermOffset, but for terminals, index is tok::TokenKind.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===--- Grammar.cpp - Grammar for clang pseudo parser ----------*- C++-*-===//
|
//===--- Grammar.cpp - Grammar for clang pseudoparser -----------*- C++-*-===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
@ -118,7 +118,7 @@ std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) {
|
||||||
auto FirstSets = firstSets(G);
|
auto FirstSets = firstSets(G);
|
||||||
std::vector<llvm::DenseSet<SymbolID>> FollowSets(
|
std::vector<llvm::DenseSet<SymbolID>> FollowSets(
|
||||||
G.table().Nonterminals.size());
|
G.table().Nonterminals.size());
|
||||||
// Expand the follow set of a non-terminal symbol Y by adding all from the
|
// Expand the follow set of a nonterminal symbol Y by adding all from the
|
||||||
// given symbol set.
|
// given symbol set.
|
||||||
auto ExpandFollowSet = [&FollowSets](SymbolID Y,
|
auto ExpandFollowSet = [&FollowSets](SymbolID Y,
|
||||||
const llvm::DenseSet<SymbolID> &ToAdd) {
|
const llvm::DenseSet<SymbolID> &ToAdd) {
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
# It aims to align with the ISO C++ grammar as much as possible. We adjust it
|
# It aims to align with the ISO C++ grammar as much as possible. We adjust it
|
||||||
# to fit the need for the grammar-based parser:
|
# to fit the need for the grammar-based parser:
|
||||||
# - attributes are omitted, which will be handled as comments;
|
# - attributes are omitted, which will be handled as comments;
|
||||||
# - we don't allow nullable non-terminal symbols. There are few nullable
|
# - we don't allow nullable nonterminal symbols. There are few nullable
|
||||||
# non-terminals in the spec grammar, they are adjusted to be non-nullable;
|
# nonterminals in the spec grammar, they are adjusted to be non-nullable;
|
||||||
# - the file merely describes the core C++ grammar. Preprocessor directives and
|
# - the file merely describes the core C++ grammar. Preprocessor directives and
|
||||||
# lexical conversions are omitted as we reuse clang's lexer and run a fake
|
# lexical conversions are omitted as we reuse clang's lexer and run a fake
|
||||||
# preprocessor;
|
# preprocessor;
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
# and right shift operator;
|
# and right shift operator;
|
||||||
#
|
#
|
||||||
# Guidelines:
|
# Guidelines:
|
||||||
# - non-terminals are lower_case; terminals (aka tokens) correspond to
|
# - nonterminals are lower_case; terminals (aka tokens) correspond to
|
||||||
# clang::TokenKind, written as "IDENTIFIER", "USING", "::" etc;
|
# clang::TokenKind, written as "IDENTIFIER", "USING", "::" etc;
|
||||||
# - optional symbols are supported, with a _opt suffix;
|
# - optional symbols are supported, with a _opt suffix;
|
||||||
#
|
#
|
||||||
|
@ -706,7 +706,7 @@ string-literal-chunk := UTF16_STRING_LITERAL
|
||||||
string-literal-chunk := UTF32_STRING_LITERAL
|
string-literal-chunk := UTF32_STRING_LITERAL
|
||||||
#! Technically, string concatenation happens at phase 6 which is before parsing,
|
#! Technically, string concatenation happens at phase 6 which is before parsing,
|
||||||
#! so it doesn't belong to the grammar. However, we extend the grammar to
|
#! so it doesn't belong to the grammar. However, we extend the grammar to
|
||||||
#! support it, to make the pseudo parser fully functional on practical code.
|
#! support it, to make the pseudoparser fully functional on practical code.
|
||||||
string-literal := string-literal-chunk
|
string-literal := string-literal-chunk
|
||||||
string-literal := string-literal string-literal-chunk
|
string-literal := string-literal string-literal-chunk
|
||||||
user-defined-literal := user-defined-integer-literal
|
user-defined-literal := user-defined-integer-literal
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//===-- ClangPseudo.cpp - Clang pseudo parser tool ------------------------===//
|
//===-- ClangPseudo.cpp - Clang pseudoparser tool -------------------------===//
|
||||||
//
|
//
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
|
Loading…
Reference in New Issue