From 7a05942dd0c548cf4a0cf52d8d3f97b0a11a6509 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 8 Jun 2022 13:26:53 +0200 Subject: [PATCH] [pseudo] Remove the explicit Accept actions. As pointed out in the previous review section, having a dedicated accept action doesn't seem to be necessary. This patch implements the the same behavior without accept acction, which will save some code complexity. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D125677 --- .../pseudo/include/clang-pseudo/LRTable.h | 12 +++-- clang-tools-extra/pseudo/lib/GLR.cpp | 50 +++++++++++-------- .../pseudo/lib/grammar/LRTable.cpp | 4 -- .../pseudo/lib/grammar/LRTableBuild.cpp | 8 +-- .../pseudo/test/lr-build-basic.test | 1 - .../pseudo/test/lr-build-conflicts.test | 1 - .../pseudo/unittests/GLRTest.cpp | 23 +++++++++ .../pseudo/unittests/LRTableTest.cpp | 4 +- 8 files changed, 64 insertions(+), 39 deletions(-) diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h index 3cff9aec8c5e..52f29b7d1c0b 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h @@ -19,8 +19,7 @@ // Typically, based on the category of the grammar symbol, the LRTable is // broken into two logically separate tables: // - ACTION table with terminals as columns -- e.g. ACTION[S, a] specifies -// next action (shift/reduce/accept/error) on state S under a lookahead -// terminal a +// next action (shift/reduce) on state S under a lookahead terminal a // - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specifies // the state which we transist to from the state S with the nonterminal X // @@ -76,8 +75,12 @@ public: Shift, // Reduce by a rule: pop the state stack. Reduce, - // Signals that we have parsed the input successfully. - Accept, + + // NOTE: there are no typical accept actions in the LRtable, accept + // actions are handled specifically in the parser -- if the parser + // reaches to a target state (which is goto(StartState, StartSymbol)) at + // the EOF token after a reduce, this indicates the input has been parsed + // as the StartSymbol successfully. // Nonterminal actions, corresponding to entry of GOTO table. @@ -86,7 +89,6 @@ public: GoTo, }; - static Action accept(RuleID RID) { return Action(Accept, RID); } static Action goTo(StateID S) { return Action(GoTo, S); } static Action shift(StateID S) { return Action(Shift, S); } static Action reduce(RuleID RID) { return Action(Reduce, RID); } diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp index 1031e3dd2007..0938d8846fe0 100644 --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -39,13 +39,14 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) { const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, SymbolID StartSymbol) { + assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal"); llvm::ArrayRef Terminals = Params.Forest.createTerminals(Tokens); auto &G = Params.G; (void)G; auto &GSS = Params.GSStack; - // Lists of active shift, reduce, accept actions. - std::vector PendingShift, PendingReduce, PendingAccept; + // Lists of active shift, reduce actions. + std::vector PendingShift, PendingReduce; auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) { for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) { switch (Action.kind()) { @@ -55,20 +56,18 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, case LRTable::Action::Reduce: PendingReduce.push_back({Head, Action}); break; - case LRTable::Action::Accept: - PendingAccept.push_back({Head, Action}); - break; default: llvm_unreachable("unexpected action kind!"); } } }; + StateID StartState = Params.Table.getStartState(StartSymbol); std::vector NewHeads = { - GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol), + GSS.addNode(/*State=*/StartState, /*ForestNode=*/nullptr, {})}; auto MaybeGC = [&, Roots(std::vector{}), I(0u)]() mutable { assert(PendingShift.empty() && PendingReduce.empty() && - PendingAccept.empty() && "Running GC at the wrong time!"); + "Running GC at the wrong time!"); if (++I != 20) // Run periodically to balance CPU and memory usage. return; @@ -98,24 +97,31 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params, LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n")); for (const auto *Heads : NewHeads) AddSteps(Heads, tokenSymbol(tok::eof)); - glrReduce(PendingReduce, Params, - [&](const GSS::Node * NewHead) { - // A reduce will enable more steps. - AddSteps(NewHead, tokenSymbol(tok::eof)); - }); - if (!PendingAccept.empty()) { - LLVM_DEBUG({ - llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n", - PendingAccept.size()); - for (const auto &Accept : PendingAccept) - llvm::dbgs() << " - " << G.symbolName(Accept.Head->Payload->symbol()) - << "\n"; - }); - assert(PendingAccept.size() == 1); - return *PendingAccept.front().Head->Payload; + StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol); + // Collect new heads created from the final reduce. + std::vector Heads; + glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) { + Heads.push_back(NewHead); + // A reduce will enable more steps. + AddSteps(NewHead, tokenSymbol(tok::eof)); + }); + + const ForestNode *Result = nullptr; + for (const auto *Head : Heads) { + if (Head->State == AcceptState) { + assert(Head->Payload->symbol() == StartSymbol); + assert(Result == nullptr && "multiple results!"); + Result = Head->Payload; + } } + if (Result) + return *Result; // We failed to parse the input, returning an opaque forest node for recovery. + // + // FIXME: We will need to invoke our generic error-recovery handlers when we + // reach EOF without reaching accept state, and involving the eof + // token in the above main for-loopmay be the best way to reuse the code). return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); } diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp index 28e2bf5b136a..0c4ff6e775fd 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp @@ -25,8 +25,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule()); case LRTable::Action::GoTo: return OS << llvm::formatv("go to state {0}", A.getGoToState()); - case LRTable::Action::Accept: - return OS << "acc"; case LRTable::Action::Sentinel: llvm_unreachable("unexpected Sentinel action kind!"); } @@ -60,8 +58,6 @@ std::string LRTable::dumpForTests(const Grammar &G) const { OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n", G.symbolName(TokID), A.getReduceRule(), G.dumpRule(A.getReduceRule())); - else if (A.kind() == LRTable::Action::Accept) - OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID)); } } for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size(); diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp index f59c8736fbec..4b5e87a3ac46 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -117,11 +117,11 @@ LRTable LRTable::buildSLR(const Grammar &G) { auto FollowSets = followSets(G); for (StateID SID = 0; SID < Graph.states().size(); ++SID) { for (const Item &I : Graph.states()[SID].Items) { - // If we've just parsed the start symbol, we can accept the input. - if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) { - Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())}); + // If we've just parsed the start symbol, this means we successfully parse + // the input. We don't add the reduce action of `_ := start_symbol` in the + // LRTable (the GLR parser handles it specifically). + if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) continue; - } if (!I.hasNext()) { // If we've reached the end of a rule A := ..., then we can reduce if // the next token is in the follow set of A. diff --git a/clang-tools-extra/pseudo/test/lr-build-basic.test b/clang-tools-extra/pseudo/test/lr-build-basic.test index 5c017fa0e4da..36a86ecd9f1f 100644 --- a/clang-tools-extra/pseudo/test/lr-build-basic.test +++ b/clang-tools-extra/pseudo/test/lr-build-basic.test @@ -22,7 +22,6 @@ id := IDENTIFIER # TABLE-NEXT: 'expr': go to state 1 # TABLE-NEXT: 'id': go to state 2 # TABLE-NEXT: State 1 -# TABLE-NEXT: 'EOF': accept # TABLE-NEXT: State 2 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id' # TABLE-NEXT: State 3 diff --git a/clang-tools-extra/pseudo/test/lr-build-conflicts.test b/clang-tools-extra/pseudo/test/lr-build-conflicts.test index aefa905413bd..916589572ae4 100644 --- a/clang-tools-extra/pseudo/test/lr-build-conflicts.test +++ b/clang-tools-extra/pseudo/test/lr-build-conflicts.test @@ -33,7 +33,6 @@ expr := IDENTIFIER # TABLE-NEXT: 'IDENTIFIER': shift state 2 # TABLE-NEXT: 'expr': go to state 1 # TABLE-NEXT: State 1 -# TABLE-NEXT: 'EOF': accept # TABLE-NEXT: '-': shift state 3 # TABLE-NEXT: State 2 # TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER' diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp index ca12c53214ae..0a85deaf6a1d 100644 --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -393,6 +393,29 @@ TEST_F(GLRTest, GLRReduceOrder) { "[ 0, end) └─IDENTIFIER := tok[0]\n"); } +TEST_F(GLRTest, NoExplicitAccept) { + build(R"bnf( + _ := test + + test := IDENTIFIER test + test := IDENTIFIER + )bnf"); + clang::LangOptions LOptions; + // Given the following input, and the grammar above, we perform two reductions + // of the nonterminal `test` when the next token is `eof`, verify that the + // parser stops at the right state. + const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions); + auto LRTable = LRTable::buildSLR(*G); + + const ForestNode &Parsed = + glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test")); + EXPECT_EQ(Parsed.dumpRecursive(*G), + "[ 0, end) test := IDENTIFIER test\n" + "[ 0, 1) ├─IDENTIFIER := tok[0]\n" + "[ 1, end) └─test := IDENTIFIER\n" + "[ 1, end) └─IDENTIFIER := tok[1]\n"); +} + TEST(GSSTest, GC) { // ┌-A-┬-AB // ├-B-┘ diff --git a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp index 9823159e5517..21b46879e256 100644 --- a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp +++ b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp @@ -33,7 +33,7 @@ TEST(LRTable, Builder) { std::vector Entries = { {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)}, {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)}, - {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)}, + {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)}, {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}}; GrammarTable GT; LRTable T = LRTable::buildForTests(GT, Entries); @@ -41,7 +41,7 @@ TEST(LRTable, Builder) { EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)), UnorderedElementsAre(Action::shift(0), Action::reduce(0))); EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)), - UnorderedElementsAre(Action::accept(2))); + UnorderedElementsAre(Action::reduce(2))); EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty()); EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)), UnorderedElementsAre(Action::reduce(1)));