forked from OSchip/llvm-project
[pseudo] Remove the explicit Accept actions.
As pointed out in the previous review section, having a dedicated accept action doesn't seem to be necessary. This patch implements the the same behavior without accept acction, which will save some code complexity. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D125677
This commit is contained in:
parent
075449da80
commit
7a05942dd0
|
@ -19,8 +19,7 @@
|
|||
// Typically, based on the category of the grammar symbol, the LRTable is
|
||||
// broken into two logically separate tables:
|
||||
// - ACTION table with terminals as columns -- e.g. ACTION[S, a] specifies
|
||||
// next action (shift/reduce/accept/error) on state S under a lookahead
|
||||
// terminal a
|
||||
// next action (shift/reduce) on state S under a lookahead terminal a
|
||||
// - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specifies
|
||||
// the state which we transist to from the state S with the nonterminal X
|
||||
//
|
||||
|
@ -76,8 +75,12 @@ public:
|
|||
Shift,
|
||||
// Reduce by a rule: pop the state stack.
|
||||
Reduce,
|
||||
// Signals that we have parsed the input successfully.
|
||||
Accept,
|
||||
|
||||
// NOTE: there are no typical accept actions in the LRtable, accept
|
||||
// actions are handled specifically in the parser -- if the parser
|
||||
// reaches to a target state (which is goto(StartState, StartSymbol)) at
|
||||
// the EOF token after a reduce, this indicates the input has been parsed
|
||||
// as the StartSymbol successfully.
|
||||
|
||||
// Nonterminal actions, corresponding to entry of GOTO table.
|
||||
|
||||
|
@ -86,7 +89,6 @@ public:
|
|||
GoTo,
|
||||
};
|
||||
|
||||
static Action accept(RuleID RID) { return Action(Accept, RID); }
|
||||
static Action goTo(StateID S) { return Action(GoTo, S); }
|
||||
static Action shift(StateID S) { return Action(Shift, S); }
|
||||
static Action reduce(RuleID RID) { return Action(Reduce, RID); }
|
||||
|
|
|
@ -39,13 +39,14 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) {
|
|||
|
||||
const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
|
||||
SymbolID StartSymbol) {
|
||||
assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
|
||||
llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
|
||||
auto &G = Params.G;
|
||||
(void)G;
|
||||
auto &GSS = Params.GSStack;
|
||||
|
||||
// Lists of active shift, reduce, accept actions.
|
||||
std::vector<ParseStep> PendingShift, PendingReduce, PendingAccept;
|
||||
// Lists of active shift, reduce actions.
|
||||
std::vector<ParseStep> PendingShift, PendingReduce;
|
||||
auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) {
|
||||
for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) {
|
||||
switch (Action.kind()) {
|
||||
|
@ -55,20 +56,18 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
|
|||
case LRTable::Action::Reduce:
|
||||
PendingReduce.push_back({Head, Action});
|
||||
break;
|
||||
case LRTable::Action::Accept:
|
||||
PendingAccept.push_back({Head, Action});
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected action kind!");
|
||||
}
|
||||
}
|
||||
};
|
||||
StateID StartState = Params.Table.getStartState(StartSymbol);
|
||||
std::vector<const GSS::Node *> NewHeads = {
|
||||
GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol),
|
||||
GSS.addNode(/*State=*/StartState,
|
||||
/*ForestNode=*/nullptr, {})};
|
||||
auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {
|
||||
assert(PendingShift.empty() && PendingReduce.empty() &&
|
||||
PendingAccept.empty() && "Running GC at the wrong time!");
|
||||
"Running GC at the wrong time!");
|
||||
|
||||
if (++I != 20) // Run periodically to balance CPU and memory usage.
|
||||
return;
|
||||
|
@ -98,24 +97,31 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
|
|||
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n"));
|
||||
for (const auto *Heads : NewHeads)
|
||||
AddSteps(Heads, tokenSymbol(tok::eof));
|
||||
glrReduce(PendingReduce, Params,
|
||||
[&](const GSS::Node * NewHead) {
|
||||
// A reduce will enable more steps.
|
||||
AddSteps(NewHead, tokenSymbol(tok::eof));
|
||||
});
|
||||
|
||||
if (!PendingAccept.empty()) {
|
||||
LLVM_DEBUG({
|
||||
llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n",
|
||||
PendingAccept.size());
|
||||
for (const auto &Accept : PendingAccept)
|
||||
llvm::dbgs() << " - " << G.symbolName(Accept.Head->Payload->symbol())
|
||||
<< "\n";
|
||||
});
|
||||
assert(PendingAccept.size() == 1);
|
||||
return *PendingAccept.front().Head->Payload;
|
||||
StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
|
||||
// Collect new heads created from the final reduce.
|
||||
std::vector<const GSS::Node*> Heads;
|
||||
glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {
|
||||
Heads.push_back(NewHead);
|
||||
// A reduce will enable more steps.
|
||||
AddSteps(NewHead, tokenSymbol(tok::eof));
|
||||
});
|
||||
|
||||
const ForestNode *Result = nullptr;
|
||||
for (const auto *Head : Heads) {
|
||||
if (Head->State == AcceptState) {
|
||||
assert(Head->Payload->symbol() == StartSymbol);
|
||||
assert(Result == nullptr && "multiple results!");
|
||||
Result = Head->Payload;
|
||||
}
|
||||
}
|
||||
if (Result)
|
||||
return *Result;
|
||||
// We failed to parse the input, returning an opaque forest node for recovery.
|
||||
//
|
||||
// FIXME: We will need to invoke our generic error-recovery handlers when we
|
||||
// reach EOF without reaching accept state, and involving the eof
|
||||
// token in the above main for-loopmay be the best way to reuse the code).
|
||||
return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,8 +25,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
|
|||
return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
|
||||
case LRTable::Action::GoTo:
|
||||
return OS << llvm::formatv("go to state {0}", A.getGoToState());
|
||||
case LRTable::Action::Accept:
|
||||
return OS << "acc";
|
||||
case LRTable::Action::Sentinel:
|
||||
llvm_unreachable("unexpected Sentinel action kind!");
|
||||
}
|
||||
|
@ -60,8 +58,6 @@ std::string LRTable::dumpForTests(const Grammar &G) const {
|
|||
OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
|
||||
G.symbolName(TokID), A.getReduceRule(),
|
||||
G.dumpRule(A.getReduceRule()));
|
||||
else if (A.kind() == LRTable::Action::Accept)
|
||||
OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID));
|
||||
}
|
||||
}
|
||||
for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
|
||||
|
|
|
@ -117,11 +117,11 @@ LRTable LRTable::buildSLR(const Grammar &G) {
|
|||
auto FollowSets = followSets(G);
|
||||
for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
|
||||
for (const Item &I : Graph.states()[SID].Items) {
|
||||
// If we've just parsed the start symbol, we can accept the input.
|
||||
if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
|
||||
Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
|
||||
// If we've just parsed the start symbol, this means we successfully parse
|
||||
// the input. We don't add the reduce action of `_ := start_symbol` in the
|
||||
// LRTable (the GLR parser handles it specifically).
|
||||
if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext())
|
||||
continue;
|
||||
}
|
||||
if (!I.hasNext()) {
|
||||
// If we've reached the end of a rule A := ..., then we can reduce if
|
||||
// the next token is in the follow set of A.
|
||||
|
|
|
@ -22,7 +22,6 @@ id := IDENTIFIER
|
|||
# TABLE-NEXT: 'expr': go to state 1
|
||||
# TABLE-NEXT: 'id': go to state 2
|
||||
# TABLE-NEXT: State 1
|
||||
# TABLE-NEXT: 'EOF': accept
|
||||
# TABLE-NEXT: State 2
|
||||
# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'
|
||||
# TABLE-NEXT: State 3
|
||||
|
|
|
@ -33,7 +33,6 @@ expr := IDENTIFIER
|
|||
# TABLE-NEXT: 'IDENTIFIER': shift state 2
|
||||
# TABLE-NEXT: 'expr': go to state 1
|
||||
# TABLE-NEXT: State 1
|
||||
# TABLE-NEXT: 'EOF': accept
|
||||
# TABLE-NEXT: '-': shift state 3
|
||||
# TABLE-NEXT: State 2
|
||||
# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'
|
||||
|
|
|
@ -393,6 +393,29 @@ TEST_F(GLRTest, GLRReduceOrder) {
|
|||
"[ 0, end) └─IDENTIFIER := tok[0]\n");
|
||||
}
|
||||
|
||||
TEST_F(GLRTest, NoExplicitAccept) {
|
||||
build(R"bnf(
|
||||
_ := test
|
||||
|
||||
test := IDENTIFIER test
|
||||
test := IDENTIFIER
|
||||
)bnf");
|
||||
clang::LangOptions LOptions;
|
||||
// Given the following input, and the grammar above, we perform two reductions
|
||||
// of the nonterminal `test` when the next token is `eof`, verify that the
|
||||
// parser stops at the right state.
|
||||
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
|
||||
auto LRTable = LRTable::buildSLR(*G);
|
||||
|
||||
const ForestNode &Parsed =
|
||||
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
|
||||
EXPECT_EQ(Parsed.dumpRecursive(*G),
|
||||
"[ 0, end) test := IDENTIFIER test\n"
|
||||
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
|
||||
"[ 1, end) └─test := IDENTIFIER\n"
|
||||
"[ 1, end) └─IDENTIFIER := tok[1]\n");
|
||||
}
|
||||
|
||||
TEST(GSSTest, GC) {
|
||||
// ┌-A-┬-AB
|
||||
// ├-B-┘
|
||||
|
|
|
@ -33,7 +33,7 @@ TEST(LRTable, Builder) {
|
|||
std::vector<LRTable::Entry> Entries = {
|
||||
{/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
|
||||
{/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
|
||||
{/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
|
||||
{/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
|
||||
{/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
|
||||
GrammarTable GT;
|
||||
LRTable T = LRTable::buildForTests(GT, Entries);
|
||||
|
@ -41,7 +41,7 @@ TEST(LRTable, Builder) {
|
|||
EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
|
||||
UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
|
||||
EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
|
||||
UnorderedElementsAre(Action::accept(2)));
|
||||
UnorderedElementsAre(Action::reduce(2)));
|
||||
EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
|
||||
EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
|
||||
UnorderedElementsAre(Action::reduce(1)));
|
||||
|
|
Loading…
Reference in New Issue