[pseudo] Eliminate LRTable::Action. NFC

The last remaining uses are in tests/test builders. Replace with a builder struct. Differential Revision: https://reviews.llvm.org/D129093
2022-07-04 20:35:40 +02:00 · 2022-07-04 20:35:40 +02:00 · 9fbf1107cc
parent dc969061c6
commit 9fbf1107cc
5 changed files with 120 additions and 247 deletions
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@ -39,6 +39,7 @@
 #include "clang-pseudo/grammar/Grammar.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@ -58,71 +59,15 @@ namespace pseudo {
 // Unlike the typical LR parsing table which allows at most one available action
 // per entry, conflicted actions are allowed in LRTable. The LRTable is designed
 // to be used in nondeterministic LR parsers (e.g. GLR).
+//
+// There are no "accept" actions in the LRTable, instead the stack is inspected
+// after parsing completes: is the state goto(StartState, StartSymbol)?
 class LRTable {
 public:
  // StateID is only 13 bits wide.
  using StateID = uint16_t;
  static constexpr unsigned StateBits = 13;

-  // Action represents the terminal and nonterminal actions, it combines the
-  // entry of the ACTION and GOTO tables from the LR literature.
-  //
-  // FIXME: as we move away from a homogeneous table structure shared between
-  // action types, this class becomes less useful. Remove it.
-  class Action {
-  public:
-    enum Kind : uint8_t {
-      Sentinel = 0,
-      // Terminal actions, corresponding to entries of ACTION table.
-
-      // Shift to state n: move forward with the lookahead, and push state n
-      // onto the state stack.
-      // A shift is a forward transition, and the value n is the next state that
-      // the parser is to enter.
-      Shift,
-
-      // NOTE: there are no typical accept actions in the LRtable, accept
-      // actions are handled specifically in the parser -- if the parser
-      // reaches to a target state (which is goto(StartState, StartSymbol)) at
-      // the EOF token after a reduce, this indicates the input has been parsed
-      // as the StartSymbol successfully.
-
-      // Nonterminal actions, corresponding to entry of GOTO table.
-
-      // Go to state n: push state n onto the state stack.
-      // Similar to Shift, but it is a nonterminal forward transition.
-      GoTo,
-    };
-
-    static Action goTo(StateID S) { return Action(GoTo, S); }
-    static Action shift(StateID S) { return Action(Shift, S); }
-    static Action sentinel() { return Action(Sentinel, 0); }
-
-    StateID getShiftState() const {
-      assert(kind() == Shift);
-      return Value;
-    }
-    StateID getGoToState() const {
-      assert(kind() == GoTo);
-      return Value;
-    }
-    Kind kind() const { return static_cast<Kind>(K); }
-
-    bool operator==(const Action &L) const { return opaque() == L.opaque(); }
-    uint16_t opaque() const { return K << ValueBits | Value; };
-
-  private:
-    Action(Kind K1, unsigned Value) : K(K1), Value(Value) {}
-    static constexpr unsigned ValueBits = StateBits;
-    static constexpr unsigned KindBits = 3;
-    static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID");
-    static_assert(KindBits + ValueBits <= 16,
-                  "Must be able to store kind and value efficiently");
-    uint16_t K : KindBits;
-    // Either StateID or RuleID, depending on the Kind.
-    uint16_t Value : ValueBits;
-  };
-
  // Returns the state after we reduce a nonterminal.
  // Expected to be called by LR parsers.
  // If the nonterminal is invalid here, returns None.
@ -184,20 +129,27 @@ public:
  // Build a SLR(1) parsing table.
  static LRTable buildSLR(const Grammar &G);

-  struct Builder;
-  // Represents an entry in the table, used for building the LRTable.
-  struct Entry {
-    StateID State;
-    SymbolID Symbol;
-    Action Act;
+  // Helper for building a table with specified actions/states.
+  struct Builder {
+    Builder() = default;
+    Builder(const Grammar &G) {
+      NumNonterminals = G.table().Nonterminals.size();
+      FollowSets = followSets(G);
+    }
+
+    unsigned int NumNonterminals = 0;
+    // States representing `_ := . start` for various start symbols.
+    std::vector<std::pair<SymbolID, StateID>> StartStates;
+    // State transitions `X := ABC . D EFG` => `X := ABC D . EFG`.
+    // Key is (initial state, D), value is final state.
+    llvm::DenseMap<std::pair<StateID, SymbolID>, StateID> Transition;
+    // Reductions available in a given state.
+    llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduce;
+    // FollowSets[NT] is the set of terminals that can follow the nonterminal.
+    std::vector<llvm::DenseSet<SymbolID>> FollowSets;
+
+    LRTable build() &&;
  };
-  struct ReduceEntry {
-    StateID State;
-    RuleID Rule;
-  };
-  // Build a specifid table for testing purposes.
-  static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
-                               llvm::ArrayRef<ReduceEntry>);

 private:
  unsigned numStates() const { return ReduceOffset.size() - 1; }
@ -300,7 +252,6 @@ private:
  // as an index: Nonterminal * NUM_TOKENS + Token.
  llvm::BitVector FollowSets;
 };
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);

 } // namespace pseudo
 } // namespace clang
--- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
@ -11,25 +11,12 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"

 namespace clang {
 namespace pseudo {

-llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
-  switch (A.kind()) {
-  case LRTable::Action::Shift:
-    return OS << llvm::formatv("shift state {0}", A.getShiftState());
-  case LRTable::Action::GoTo:
-    return OS << llvm::formatv("go to state {0}", A.getGoToState());
-  case LRTable::Action::Sentinel:
-    llvm_unreachable("unexpected Sentinel action kind!");
-  }
-  llvm_unreachable("unexpected action kind!");
-}
-
 std::string LRTable::dumpStatistics() const {
  return llvm::formatv(R"(
 Statistics of the LR parsing table:
--- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@ -13,109 +13,67 @@
 #include "llvm/ADT/SmallSet.h"
 #include <cstdint>

-namespace llvm {
-template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
-  using Entry = clang::pseudo::LRTable::Entry;
-  static inline Entry getEmptyKey() {
-    static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
-                   clang::pseudo::LRTable::Action::sentinel()};
-    return E;
-  }
-  static inline Entry getTombstoneKey() {
-    static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
-                   clang::pseudo::LRTable::Action::sentinel()};
-    return E;
-  }
-  static unsigned getHashValue(const Entry &I) {
-    return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
-  }
-  static bool isEqual(const Entry &LHS, const Entry &RHS) {
-    return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
-           LHS.Act == RHS.Act;
-  }
-};
-} // namespace llvm
-
 namespace clang {
 namespace pseudo {

-struct LRTable::Builder {
-  std::vector<std::pair<SymbolID, StateID>> StartStates;
-  llvm::DenseSet<Entry> Entries;
-  llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
-  std::vector<llvm::DenseSet<SymbolID>> FollowSets;
+LRTable LRTable::Builder::build() && {
+  assert(NumNonterminals != 0 && "Set NumNonterminals or init with grammar");
+  LRTable Table;

-  LRTable build(unsigned NumStates, unsigned NumNonterminals) && {
-    LRTable Table;
-    Table.StartStates = std::move(StartStates);
-
-    // Compile the goto and shift actions into transition tables.
-    llvm::DenseMap<unsigned, SymbolID> Gotos;
-    llvm::DenseMap<unsigned, SymbolID> Shifts;
-    for (const auto &E : Entries) {
-      if (E.Act.kind() == Action::Shift)
-        Shifts.try_emplace(shiftIndex(E.State, E.Symbol, NumStates),
-                           E.Act.getShiftState());
-      else if (E.Act.kind() == Action::GoTo)
-        Gotos.try_emplace(gotoIndex(E.State, E.Symbol, NumStates),
-                          E.Act.getGoToState());
-    }
-    Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
-    Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);
-
-    // Compile the follow sets into a bitmap.
-    Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
-    for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
-      for (SymbolID Follow : FollowSets[NT])
-        Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
-
-    // Store the reduce actions in a vector partitioned by state.
-    Table.ReduceOffset.reserve(NumStates + 1);
-    std::vector<RuleID> StateRules;
-    for (StateID S = 0; S < NumStates; ++S) {
-      Table.ReduceOffset.push_back(Table.Reduces.size());
-      auto It = Reduces.find(S);
-      if (It == Reduces.end())
-        continue;
-      Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
-                           It->second.end());
-      std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
-                Table.Reduces.end());
-    }
-    Table.ReduceOffset.push_back(Table.Reduces.size());
-
-    return Table;
-  }
-};
-
-LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
-                               llvm::ArrayRef<ReduceEntry> Reduces) {
+  // Count number of states: every state has to be reachable somehow.
  StateID MaxState = 0;
-  for (const auto &Entry : Entries) {
-    MaxState = std::max(MaxState, Entry.State);
-    if (Entry.Act.kind() == LRTable::Action::Shift)
-      MaxState = std::max(MaxState, Entry.Act.getShiftState());
-    if (Entry.Act.kind() == LRTable::Action::GoTo)
-      MaxState = std::max(MaxState, Entry.Act.getGoToState());
+  for (const auto &Entry : StartStates)
+    MaxState = std::max(MaxState, Entry.second);
+  for (const auto &Entry : Transition)
+    MaxState = std::max(MaxState, Entry.second);
+  unsigned NumStates = MaxState + 1;
+
+  Table.StartStates = std::move(StartStates);
+
+  // Compile the goto and shift actions into transition tables.
+  llvm::DenseMap<unsigned, SymbolID> Gotos;
+  llvm::DenseMap<unsigned, SymbolID> Shifts;
+  for (const auto &E : Transition) {
+    if (isToken(E.first.second))
+      Shifts.try_emplace(shiftIndex(E.first.first, E.first.second, NumStates),
+                         E.second);
+    else
+      Gotos.try_emplace(gotoIndex(E.first.first, E.first.second, NumStates),
+                        E.second);
  }
-  Builder Build;
-  Build.Entries.insert(Entries.begin(), Entries.end());
-  for (const ReduceEntry &E : Reduces)
-    Build.Reduces[E.State].insert(E.Rule);
-  Build.FollowSets = followSets(G);
-  return std::move(Build).build(/*NumStates=*/MaxState + 1,
-                                G.table().Nonterminals.size());
+  Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
+  Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);
+
+  // Compile the follow sets into a bitmap.
+  Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
+  for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
+    for (SymbolID Follow : FollowSets[NT])
+      Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
+
+  // Store the reduce actions in a vector partitioned by state.
+  Table.ReduceOffset.reserve(NumStates + 1);
+  std::vector<RuleID> StateRules;
+  for (StateID S = 0; S < NumStates; ++S) {
+    Table.ReduceOffset.push_back(Table.Reduces.size());
+    auto It = Reduce.find(S);
+    if (It == Reduce.end())
+      continue;
+    Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
+                         It->second.end());
+    std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
+              Table.Reduces.end());
+  }
+  Table.ReduceOffset.push_back(Table.Reduces.size());
+
+  return Table;
 }

 LRTable LRTable::buildSLR(const Grammar &G) {
  auto Graph = LRGraph::buildLR0(G);
-  Builder Build;
+  Builder Build(G);
  Build.StartStates = Graph.startStates();
-  for (const auto &T : Graph.edges()) {
-    Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
-    Build.Entries.insert({T.Src, T.Label, Act});
-  }
-  Build.FollowSets = followSets(G);
+  for (const auto &T : Graph.edges())
+    Build.Transition.try_emplace({T.Src, T.Label}, T.Dst);
  assert(Graph.states().size() <= (1 << StateBits) &&
         "Graph states execceds the maximum limit!");
  // Add reduce actions.
@ -129,11 +87,10 @@ LRTable LRTable::buildSLR(const Grammar &G) {
      if (!I.hasNext())
        // If we've reached the end of a rule A := ..., then we can reduce if
        // the next token is in the follow set of A.
-        Build.Reduces[SID].insert(I.rule());
+        Build.Reduce[SID].insert(I.rule());
    }
  }
-  return std::move(Build).build(Graph.states().size(),
-                                G.table().Nonterminals.size());
+  return std::move(Build).build();
 }

 } // namespace pseudo
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@ -30,7 +30,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,

 namespace {

-using Action = LRTable::Action;
+using StateID = LRTable::StateID;
 using testing::AllOf;
 using testing::ElementsAre;
 using testing::UnorderedElementsAre;
@ -122,13 +122,11 @@ TEST_F(GLRTest, ShiftMergingHeads) {
                                   /*Parents=*/{GSSNode0});

  buildGrammar({}, {}); // Create a fake empty grammar.
-  TestLang.Table =
-      LRTable::buildForTests(TestLang.G, /*Entries=*/{
-                                 {1, tokenSymbol(tok::semi), Action::shift(4)},
-                                 {2, tokenSymbol(tok::semi), Action::shift(4)},
-                                 {3, tokenSymbol(tok::semi), Action::shift(5)},
-                             },
-                             {});
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{1}, tokenSymbol(tok::semi)}] = StateID{4};
+  B.Transition[{StateID{2}, tokenSymbol(tok::semi)}] = StateID{4};
+  B.Transition[{StateID{3}, tokenSymbol(tok::semi)}] = StateID{5};
+  TestLang.Table = std::move(B).build();

  ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
  std::vector<const GSS::Node *> NewHeads;
@ -152,17 +150,12 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
  //    └--3(enum-name)     // 3 is goto(0, enum-name)
  buildGrammar({"class-name", "enum-name"},
               {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
-
-  TestLang.Table = LRTable::buildForTests(
-      TestLang.G,
-      {
-          {/*State=*/0, id("class-name"), Action::goTo(2)},
-          {/*State=*/0, id("enum-name"), Action::goTo(3)},
-      },
-      {
-          {/*State=*/1, ruleFor("class-name")},
-          {/*State=*/1, ruleFor("enum-name")},
-      });
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{0}, id("class-name")}] = StateID{2};
+  B.Transition[{StateID{0}, id("enum-name")}] = StateID{3};
+  B.Reduce[StateID{1}].insert(ruleFor("class-name"));
+  B.Reduce[StateID{1}].insert(ruleFor("enum-name"));
+  TestLang.Table = std::move(B).build();

  const auto *GSSNode0 =
      GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
@ -202,15 +195,12 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
      /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
      /*Parents=*/{GSSNode2, GSSNode3});

-  TestLang.Table = LRTable::buildForTests(
-      TestLang.G,
-      {
-          {/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
-          {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
-      },
-      {
-          {/*State=*/4, ruleFor("ptr-operator")},
-      });
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{2}, id("ptr-operator")}] = StateID{5};
+  B.Transition[{StateID{3}, id("ptr-operator")}] = StateID{6};
+  B.Reduce[StateID{4}].insert(ruleFor("ptr-operator"));
+  TestLang.Table = std::move(B).build();
+
  std::vector<const GSS::Node *> Heads = {GSSNode4};
  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});

@ -256,16 +246,13 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
                      /*Parents=*/{GSSNode2});

  // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  TestLang.Table = LRTable::buildForTests(
-      TestLang.G,
-      {
-          {/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
-          {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
-      },
-      {
-          {/*State=*/3, /* type-name := class-name */ 0},
-          {/*State=*/4, /* type-name := enum-name */ 1},
-      });
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{1}, id("type-name")}] = StateID{5};
+  B.Transition[{StateID{2}, id("type-name")}] = StateID{5};
+  B.Reduce[StateID{3}].insert(/* type-name := class-name */ RuleID{0});
+  B.Reduce[StateID{4}].insert(/* type-name := enum-name */ RuleID{1});
+  TestLang.Table = std::move(B).build();
+
  std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});

@ -314,15 +301,12 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
                      /*Parents=*/{GSSNode2});

  // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  TestLang.Table =
-      LRTable::buildForTests(TestLang.G,
-                             {
-                                 {/*State=*/0, id("pointer"), Action::goTo(5)},
-                             },
-                             {
-                                 {3, /* pointer := class-name */ 0},
-                                 {4, /* pointer := enum-name */ 1},
-                             });
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{0}, id("pointer")}] = StateID{5};
+  B.Reduce[StateID{3}].insert(/* pointer := class-name */ RuleID{0});
+  B.Reduce[StateID{4}].insert(/* pointer := enum-name */ RuleID{1});
+  TestLang.Table = std::move(B).build();
+
  std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
  glrReduce(Heads, tokenSymbol(tok::eof),
            {TestLang.G, TestLang.Table, Arena, GSStack});
@ -345,14 +329,10 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
 TEST_F(GLRTest, ReduceLookahead) {
  // A term can be followed by +, but not by -.
  buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
-  TestLang.Table =
-      LRTable::buildForTests(TestLang.G,
-                             {
-                                 {/*State=*/0, id("term"), Action::goTo(2)},
-                             },
-                             {
-                                 {/*State=*/1, 0},
-                             });
+  LRTable::Builder B(TestLang.G);
+  B.Transition[{StateID{0}, id("term")}] = StateID{2};
+  B.Reduce[StateID{1}].insert(RuleID{0});
+  TestLang.Table = std::move(B).build();

  auto *Identifier = &Arena.createTerminal(tok::identifier, /*Start=*/0);

--- a/clang-tools-extra/pseudo/unittests/LRTableTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/LRTableTest.cpp
@ -20,7 +20,7 @@ namespace {

 using llvm::ValueIs;
 using testing::ElementsAre;
-using Action = LRTable::Action;
+using StateID = LRTable::StateID;

 TEST(LRTable, Builder) {
  std::vector<std::string> GrammarDiags;
@ -38,22 +38,20 @@ TEST(LRTable, Builder) {
  SymbolID Identifier = tokenSymbol(tok::identifier);
  SymbolID Plus = tokenSymbol(tok::plus);

+  LRTable::Builder B(G);
  //           eof  IDENT   term
  // +-------+----+-------+------+
  // |state0 |    | s0    |      |
  // |state1 |    |       | g3   |
  // |state2 |    |       |      |
  // +-------+----+-------+------+-------
-  std::vector<LRTable::Entry> Entries = {
-      {/* State */ 0, Identifier, Action::shift(0)},
-      {/* State */ 1, Term, Action::goTo(3)},
-  };
-  std::vector<LRTable::ReduceEntry> ReduceEntries = {
-      {/*State=*/0, /*Rule=*/0},
-      {/*State=*/1, /*Rule=*/2},
-      {/*State=*/2, /*Rule=*/1},
-  };
-  LRTable T = LRTable::buildForTests(G, Entries, ReduceEntries);
+  B.Transition[{StateID{0}, Identifier}] = StateID{0};
+  B.Transition[{StateID{1}, Term}] = StateID{3};
+  B.Reduce[StateID{0}].insert(RuleID{0});
+  B.Reduce[StateID{1}].insert(RuleID{2});
+  B.Reduce[StateID{2}].insert(RuleID{1});
+  LRTable T = std::move(B).build();
+
  EXPECT_EQ(T.getShiftState(0, Eof), llvm::None);
  EXPECT_THAT(T.getShiftState(0, Identifier), ValueIs(0));
  EXPECT_THAT(T.getReduceRules(0), ElementsAre(0));