From 7a05942dd0c548cf4a0cf52d8d3f97b0a11a6509 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Wed, 8 Jun 2022 13:26:53 +0200
Subject: [PATCH] [pseudo] Remove the explicit Accept actions.

As pointed out in the previous review section, having a dedicated accept
action doesn't seem to be necessary. This patch implements the the same behavior
without accept acction, which will save some code complexity.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D125677
---
 .../pseudo/include/clang-pseudo/LRTable.h     | 12 +++--
 clang-tools-extra/pseudo/lib/GLR.cpp          | 50 +++++++++++--------
 .../pseudo/lib/grammar/LRTable.cpp            |  4 --
 .../pseudo/lib/grammar/LRTableBuild.cpp       |  8 +--
 .../pseudo/test/lr-build-basic.test           |  1 -
 .../pseudo/test/lr-build-conflicts.test       |  1 -
 .../pseudo/unittests/GLRTest.cpp              | 23 +++++++++
 .../pseudo/unittests/LRTableTest.cpp          |  4 +-
 8 files changed, 64 insertions(+), 39 deletions(-)
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
index 3cff9aec8c5e..52f29b7d1c0b 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -19,8 +19,7 @@
 //  Typically, based on the category of the grammar symbol, the LRTable is
 //  broken into two logically separate tables:
 //    - ACTION table with terminals as columns -- e.g. ACTION[S, a] specifies
-//      next action (shift/reduce/accept/error) on state S under a lookahead
-//      terminal a
+//      next action (shift/reduce) on state S under a lookahead terminal a
 //    - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specifies
 //      the state which we transist to from the state S with the nonterminal X
 //
@@ -76,8 +75,12 @@ public:
       Shift,
       // Reduce by a rule: pop the state stack.
       Reduce,
-      // Signals that we have parsed the input successfully.
-      Accept,
+
+      // NOTE: there are no typical accept actions in the LRtable, accept
+      // actions are handled specifically in the parser -- if the parser
+      // reaches to a target state (which is goto(StartState, StartSymbol)) at
+      // the EOF token after a reduce, this indicates the input has been parsed
+      // as the StartSymbol successfully.
 
       // Nonterminal actions, corresponding to entry of GOTO table.
 
@@ -86,7 +89,6 @@ public:
       GoTo,
     };
 
-    static Action accept(RuleID RID) { return Action(Accept, RID); }
     static Action goTo(StateID S) { return Action(GoTo, S); }
     static Action shift(StateID S) { return Action(Shift, S); }
     static Action reduce(RuleID RID) { return Action(Reduce, RID); }
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 1031e3dd2007..0938d8846fe0 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -39,13 +39,14 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) {
 
 const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
                            SymbolID StartSymbol) {
+  assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
   llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
   auto &G = Params.G;
   (void)G;
   auto &GSS = Params.GSStack;
 
-  // Lists of active shift, reduce, accept actions.
-  std::vector<ParseStep> PendingShift, PendingReduce, PendingAccept;
+  // Lists of active shift, reduce actions.
+  std::vector<ParseStep> PendingShift, PendingReduce;
   auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) {
     for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) {
       switch (Action.kind()) {
@@ -55,20 +56,18 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
       case LRTable::Action::Reduce:
         PendingReduce.push_back({Head, Action});
         break;
-      case LRTable::Action::Accept:
-        PendingAccept.push_back({Head, Action});
-        break;
       default:
         llvm_unreachable("unexpected action kind!");
       }
     }
   };
+  StateID StartState = Params.Table.getStartState(StartSymbol);
   std::vector<const GSS::Node *> NewHeads = {
-      GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol),
+      GSS.addNode(/*State=*/StartState,
                   /*ForestNode=*/nullptr, {})};
   auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {
     assert(PendingShift.empty() && PendingReduce.empty() &&
-           PendingAccept.empty() && "Running GC at the wrong time!");
+           "Running GC at the wrong time!");
 
     if (++I != 20) // Run periodically to balance CPU and memory usage.
       return;
@@ -98,24 +97,31 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n"));
   for (const auto *Heads : NewHeads)
     AddSteps(Heads, tokenSymbol(tok::eof));
-  glrReduce(PendingReduce, Params,
-            [&](const GSS::Node * NewHead) {
-              // A reduce will enable more steps.
-              AddSteps(NewHead, tokenSymbol(tok::eof));
-            });
 
-  if (!PendingAccept.empty()) {
-    LLVM_DEBUG({
-      llvm::dbgs() << llvm::formatv("Accept: {0} accepted result:\n",
-                                             PendingAccept.size());
-      for (const auto &Accept : PendingAccept)
-        llvm::dbgs() << "  - " << G.symbolName(Accept.Head->Payload->symbol())
-                     << "\n";
-    });
-    assert(PendingAccept.size() == 1);
-    return *PendingAccept.front().Head->Payload;
+  StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+  // Collect new heads created from the final reduce.
+  std::vector<const GSS::Node*> Heads;
+  glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {
+    Heads.push_back(NewHead);
+    // A reduce will enable more steps.
+    AddSteps(NewHead, tokenSymbol(tok::eof));
+  });
+
+  const ForestNode *Result = nullptr;
+  for (const auto *Head : Heads) {
+    if (Head->State == AcceptState) {
+      assert(Head->Payload->symbol() == StartSymbol);
+      assert(Result == nullptr && "multiple results!");
+      Result = Head->Payload;
+    }
   }
+  if (Result)
+    return *Result;
   // We failed to parse the input, returning an opaque forest node for recovery.
+  //
+  // FIXME: We will need to invoke our generic error-recovery handlers when we
+  // reach EOF without reaching accept state, and involving the eof
+  // token in the above main for-loopmay be the best way to reuse the code).
   return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
 }
 
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
index 28e2bf5b136a..0c4ff6e775fd 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
@@ -25,8 +25,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
     return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
   case LRTable::Action::GoTo:
     return OS << llvm::formatv("go to state {0}", A.getGoToState());
-  case LRTable::Action::Accept:
-    return OS << "acc";
   case LRTable::Action::Sentinel:
     llvm_unreachable("unexpected Sentinel action kind!");
   }
@@ -60,8 +58,6 @@ std::string LRTable::dumpForTests(const Grammar &G) const {
           OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
                                         G.symbolName(TokID), A.getReduceRule(),
                                         G.dumpRule(A.getReduceRule()));
-        else if (A.kind() == LRTable::Action::Accept)
-          OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID));
       }
     }
     for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
index f59c8736fbec..4b5e87a3ac46 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -117,11 +117,11 @@ LRTable LRTable::buildSLR(const Grammar &G) {
   auto FollowSets = followSets(G);
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
     for (const Item &I : Graph.states()[SID].Items) {
-      // If we've just parsed the start symbol, we can accept the input.
-      if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
-        Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
+      // If we've just parsed the start symbol, this means we successfully parse
+      // the input. We don't add the reduce action of `_ := start_symbol` in the
+      // LRTable (the GLR parser handles it specifically).
+      if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext())
         continue;
-      }
       if (!I.hasNext()) {
         // If we've reached the end of a rule A := ..., then we can reduce if
         // the next token is in the follow set of A.
diff --git a/clang-tools-extra/pseudo/test/lr-build-basic.test b/clang-tools-extra/pseudo/test/lr-build-basic.test
index 5c017fa0e4da..36a86ecd9f1f 100644
--- a/clang-tools-extra/pseudo/test/lr-build-basic.test
+++ b/clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -22,7 +22,6 @@ id := IDENTIFIER
 # TABLE-NEXT:     'expr': go to state 1
 # TABLE-NEXT:     'id': go to state 2
 # TABLE-NEXT: State 1
-# TABLE-NEXT:     'EOF': accept
 # TABLE-NEXT: State 2
 # TABLE-NEXT:     'EOF': reduce by rule 1 'expr := id'
 # TABLE-NEXT: State 3
diff --git a/clang-tools-extra/pseudo/test/lr-build-conflicts.test b/clang-tools-extra/pseudo/test/lr-build-conflicts.test
index aefa905413bd..916589572ae4 100644
--- a/clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ b/clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -33,7 +33,6 @@ expr := IDENTIFIER
 # TABLE-NEXT:     'IDENTIFIER': shift state 2
 # TABLE-NEXT:     'expr': go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT:     'EOF': accept
 # TABLE-NEXT:     '-': shift state 3
 # TABLE-NEXT: State 2
 # TABLE-NEXT:     'EOF': reduce by rule 1 'expr := IDENTIFIER'
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index ca12c53214ae..0a85deaf6a1d 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -393,6 +393,29 @@ TEST_F(GLRTest, GLRReduceOrder) {
             "[  0, end)     └─IDENTIFIER := tok[0]\n");
 }
 
+TEST_F(GLRTest, NoExplicitAccept) {
+  build(R"bnf(
+    _ := test
+
+    test := IDENTIFIER test
+    test := IDENTIFIER
+  )bnf");
+  clang::LangOptions LOptions;
+  // Given the following input, and the grammar above, we perform two reductions
+  // of the nonterminal `test` when the next token is `eof`, verify that the
+  // parser stops at the right state.
+  const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
+  auto LRTable = LRTable::buildSLR(*G);
+
+  const ForestNode &Parsed =
+      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(*G),
+            "[  0, end) test := IDENTIFIER test\n"
+            "[  0,   1) ├─IDENTIFIER := tok[0]\n"
+            "[  1, end) └─test := IDENTIFIER\n"
+            "[  1, end)   └─IDENTIFIER := tok[1]\n");
+}
+
 TEST(GSSTest, GC) {
   //      ┌-A-┬-AB
   //      ├-B-┘
diff --git a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp
index 9823159e5517..21b46879e256 100644
--- a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@@ -33,7 +33,7 @@ TEST(LRTable, Builder) {
   std::vector<LRTable::Entry> Entries = {
       {/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},
       {/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},
-      {/* State */ 1, tokenSymbol(tok::eof), Action::accept(2)},
+      {/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},
       {/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};
   GrammarTable GT;
   LRTable T = LRTable::buildForTests(GT, Entries);
@@ -41,7 +41,7 @@ TEST(LRTable, Builder) {
   EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
               UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
-              UnorderedElementsAre(Action::accept(2)));
+              UnorderedElementsAre(Action::reduce(2)));
   EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
   EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
               UnorderedElementsAre(Action::reduce(1)));