[pseudo] Eliminate LRTable::Action. NFC

The last remaining uses are in tests/test builders.
Replace with a builder struct.

Differential Revision: https://reviews.llvm.org/D129093
This commit is contained in:
Sam McCall 2022-07-04 20:35:40 +02:00
parent dc969061c6
commit 9fbf1107cc
5 changed files with 120 additions and 247 deletions

View File

@ -39,6 +39,7 @@
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@ -58,71 +59,15 @@ namespace pseudo {
// Unlike the typical LR parsing table which allows at most one available action
// per entry, conflicted actions are allowed in LRTable. The LRTable is designed
// to be used in nondeterministic LR parsers (e.g. GLR).
//
// There are no "accept" actions in the LRTable, instead the stack is inspected
// after parsing completes: is the state goto(StartState, StartSymbol)?
class LRTable {
public:
// StateID is only 13 bits wide.
using StateID = uint16_t;
static constexpr unsigned StateBits = 13;
// Action represents the terminal and nonterminal actions, it combines the
// entry of the ACTION and GOTO tables from the LR literature.
//
// FIXME: as we move away from a homogeneous table structure shared between
// action types, this class becomes less useful. Remove it.
class Action {
public:
enum Kind : uint8_t {
Sentinel = 0,
// Terminal actions, corresponding to entries of ACTION table.
// Shift to state n: move forward with the lookahead, and push state n
// onto the state stack.
// A shift is a forward transition, and the value n is the next state that
// the parser is to enter.
Shift,
// NOTE: there are no typical accept actions in the LRtable, accept
// actions are handled specifically in the parser -- if the parser
// reaches to a target state (which is goto(StartState, StartSymbol)) at
// the EOF token after a reduce, this indicates the input has been parsed
// as the StartSymbol successfully.
// Nonterminal actions, corresponding to entry of GOTO table.
// Go to state n: push state n onto the state stack.
// Similar to Shift, but it is a nonterminal forward transition.
GoTo,
};
static Action goTo(StateID S) { return Action(GoTo, S); }
static Action shift(StateID S) { return Action(Shift, S); }
static Action sentinel() { return Action(Sentinel, 0); }
StateID getShiftState() const {
assert(kind() == Shift);
return Value;
}
StateID getGoToState() const {
assert(kind() == GoTo);
return Value;
}
Kind kind() const { return static_cast<Kind>(K); }
bool operator==(const Action &L) const { return opaque() == L.opaque(); }
uint16_t opaque() const { return K << ValueBits | Value; };
private:
Action(Kind K1, unsigned Value) : K(K1), Value(Value) {}
static constexpr unsigned ValueBits = StateBits;
static constexpr unsigned KindBits = 3;
static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID");
static_assert(KindBits + ValueBits <= 16,
"Must be able to store kind and value efficiently");
uint16_t K : KindBits;
// Either StateID or RuleID, depending on the Kind.
uint16_t Value : ValueBits;
};
// Returns the state after we reduce a nonterminal.
// Expected to be called by LR parsers.
// If the nonterminal is invalid here, returns None.
@ -184,20 +129,27 @@ public:
// Build a SLR(1) parsing table.
static LRTable buildSLR(const Grammar &G);
struct Builder;
// Represents an entry in the table, used for building the LRTable.
struct Entry {
StateID State;
SymbolID Symbol;
Action Act;
// Helper for building a table with specified actions/states.
struct Builder {
Builder() = default;
Builder(const Grammar &G) {
NumNonterminals = G.table().Nonterminals.size();
FollowSets = followSets(G);
}
unsigned int NumNonterminals = 0;
// States representing `_ := . start` for various start symbols.
std::vector<std::pair<SymbolID, StateID>> StartStates;
// State transitions `X := ABC . D EFG` => `X := ABC D . EFG`.
// Key is (initial state, D), value is final state.
llvm::DenseMap<std::pair<StateID, SymbolID>, StateID> Transition;
// Reductions available in a given state.
llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduce;
// FollowSets[NT] is the set of terminals that can follow the nonterminal.
std::vector<llvm::DenseSet<SymbolID>> FollowSets;
LRTable build() &&;
};
struct ReduceEntry {
StateID State;
RuleID Rule;
};
// Build a specifid table for testing purposes.
static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
llvm::ArrayRef<ReduceEntry>);
private:
unsigned numStates() const { return ReduceOffset.size() - 1; }
@ -300,7 +252,6 @@ private:
// as an index: Nonterminal * NUM_TOKENS + Token.
llvm::BitVector FollowSets;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
} // namespace pseudo
} // namespace clang

View File

@ -11,25 +11,12 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace pseudo {
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
switch (A.kind()) {
case LRTable::Action::Shift:
return OS << llvm::formatv("shift state {0}", A.getShiftState());
case LRTable::Action::GoTo:
return OS << llvm::formatv("go to state {0}", A.getGoToState());
case LRTable::Action::Sentinel:
llvm_unreachable("unexpected Sentinel action kind!");
}
llvm_unreachable("unexpected action kind!");
}
std::string LRTable::dumpStatistics() const {
return llvm::formatv(R"(
Statistics of the LR parsing table:

View File

@ -13,109 +13,67 @@
#include "llvm/ADT/SmallSet.h"
#include <cstdint>
namespace llvm {
template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
using Entry = clang::pseudo::LRTable::Entry;
static inline Entry getEmptyKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static inline Entry getTombstoneKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static unsigned getHashValue(const Entry &I) {
return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
}
static bool isEqual(const Entry &LHS, const Entry &RHS) {
return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
LHS.Act == RHS.Act;
}
};
} // namespace llvm
namespace clang {
namespace pseudo {
struct LRTable::Builder {
std::vector<std::pair<SymbolID, StateID>> StartStates;
llvm::DenseSet<Entry> Entries;
llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
std::vector<llvm::DenseSet<SymbolID>> FollowSets;
LRTable LRTable::Builder::build() && {
assert(NumNonterminals != 0 && "Set NumNonterminals or init with grammar");
LRTable Table;
LRTable build(unsigned NumStates, unsigned NumNonterminals) && {
LRTable Table;
Table.StartStates = std::move(StartStates);
// Compile the goto and shift actions into transition tables.
llvm::DenseMap<unsigned, SymbolID> Gotos;
llvm::DenseMap<unsigned, SymbolID> Shifts;
for (const auto &E : Entries) {
if (E.Act.kind() == Action::Shift)
Shifts.try_emplace(shiftIndex(E.State, E.Symbol, NumStates),
E.Act.getShiftState());
else if (E.Act.kind() == Action::GoTo)
Gotos.try_emplace(gotoIndex(E.State, E.Symbol, NumStates),
E.Act.getGoToState());
}
Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);
// Compile the follow sets into a bitmap.
Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
for (SymbolID Follow : FollowSets[NT])
Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
// Store the reduce actions in a vector partitioned by state.
Table.ReduceOffset.reserve(NumStates + 1);
std::vector<RuleID> StateRules;
for (StateID S = 0; S < NumStates; ++S) {
Table.ReduceOffset.push_back(Table.Reduces.size());
auto It = Reduces.find(S);
if (It == Reduces.end())
continue;
Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
It->second.end());
std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
Table.Reduces.end());
}
Table.ReduceOffset.push_back(Table.Reduces.size());
return Table;
}
};
LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
llvm::ArrayRef<ReduceEntry> Reduces) {
// Count number of states: every state has to be reachable somehow.
StateID MaxState = 0;
for (const auto &Entry : Entries) {
MaxState = std::max(MaxState, Entry.State);
if (Entry.Act.kind() == LRTable::Action::Shift)
MaxState = std::max(MaxState, Entry.Act.getShiftState());
if (Entry.Act.kind() == LRTable::Action::GoTo)
MaxState = std::max(MaxState, Entry.Act.getGoToState());
for (const auto &Entry : StartStates)
MaxState = std::max(MaxState, Entry.second);
for (const auto &Entry : Transition)
MaxState = std::max(MaxState, Entry.second);
unsigned NumStates = MaxState + 1;
Table.StartStates = std::move(StartStates);
// Compile the goto and shift actions into transition tables.
llvm::DenseMap<unsigned, SymbolID> Gotos;
llvm::DenseMap<unsigned, SymbolID> Shifts;
for (const auto &E : Transition) {
if (isToken(E.first.second))
Shifts.try_emplace(shiftIndex(E.first.first, E.first.second, NumStates),
E.second);
else
Gotos.try_emplace(gotoIndex(E.first.first, E.first.second, NumStates),
E.second);
}
Builder Build;
Build.Entries.insert(Entries.begin(), Entries.end());
for (const ReduceEntry &E : Reduces)
Build.Reduces[E.State].insert(E.Rule);
Build.FollowSets = followSets(G);
return std::move(Build).build(/*NumStates=*/MaxState + 1,
G.table().Nonterminals.size());
Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);
// Compile the follow sets into a bitmap.
Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
for (SymbolID Follow : FollowSets[NT])
Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
// Store the reduce actions in a vector partitioned by state.
Table.ReduceOffset.reserve(NumStates + 1);
std::vector<RuleID> StateRules;
for (StateID S = 0; S < NumStates; ++S) {
Table.ReduceOffset.push_back(Table.Reduces.size());
auto It = Reduce.find(S);
if (It == Reduce.end())
continue;
Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
It->second.end());
std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
Table.Reduces.end());
}
Table.ReduceOffset.push_back(Table.Reduces.size());
return Table;
}
LRTable LRTable::buildSLR(const Grammar &G) {
auto Graph = LRGraph::buildLR0(G);
Builder Build;
Builder Build(G);
Build.StartStates = Graph.startStates();
for (const auto &T : Graph.edges()) {
Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
Build.Entries.insert({T.Src, T.Label, Act});
}
Build.FollowSets = followSets(G);
for (const auto &T : Graph.edges())
Build.Transition.try_emplace({T.Src, T.Label}, T.Dst);
assert(Graph.states().size() <= (1 << StateBits) &&
"Graph states execceds the maximum limit!");
// Add reduce actions.
@ -129,11 +87,10 @@ LRTable LRTable::buildSLR(const Grammar &G) {
if (!I.hasNext())
// If we've reached the end of a rule A := ..., then we can reduce if
// the next token is in the follow set of A.
Build.Reduces[SID].insert(I.rule());
Build.Reduce[SID].insert(I.rule());
}
}
return std::move(Build).build(Graph.states().size(),
G.table().Nonterminals.size());
return std::move(Build).build();
}
} // namespace pseudo

View File

@ -30,7 +30,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
namespace {
using Action = LRTable::Action;
using StateID = LRTable::StateID;
using testing::AllOf;
using testing::ElementsAre;
using testing::UnorderedElementsAre;
@ -122,13 +122,11 @@ TEST_F(GLRTest, ShiftMergingHeads) {
/*Parents=*/{GSSNode0});
buildGrammar({}, {}); // Create a fake empty grammar.
TestLang.Table =
LRTable::buildForTests(TestLang.G, /*Entries=*/{
{1, tokenSymbol(tok::semi), Action::shift(4)},
{2, tokenSymbol(tok::semi), Action::shift(4)},
{3, tokenSymbol(tok::semi), Action::shift(5)},
},
{});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{1}, tokenSymbol(tok::semi)}] = StateID{4};
B.Transition[{StateID{2}, tokenSymbol(tok::semi)}] = StateID{4};
B.Transition[{StateID{3}, tokenSymbol(tok::semi)}] = StateID{5};
TestLang.Table = std::move(B).build();
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<const GSS::Node *> NewHeads;
@ -152,17 +150,12 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
// └--3(enum-name) // 3 is goto(0, enum-name)
buildGrammar({"class-name", "enum-name"},
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/0, id("class-name"), Action::goTo(2)},
{/*State=*/0, id("enum-name"), Action::goTo(3)},
},
{
{/*State=*/1, ruleFor("class-name")},
{/*State=*/1, ruleFor("enum-name")},
});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{0}, id("class-name")}] = StateID{2};
B.Transition[{StateID{0}, id("enum-name")}] = StateID{3};
B.Reduce[StateID{1}].insert(ruleFor("class-name"));
B.Reduce[StateID{1}].insert(ruleFor("enum-name"));
TestLang.Table = std::move(B).build();
const auto *GSSNode0 =
GSStack.addNode(/*State=*/0, /*ForestNode=*/nullptr, /*Parents=*/{});
@ -202,15 +195,12 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
/*Parents=*/{GSSNode2, GSSNode3});
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
},
{
{/*State=*/4, ruleFor("ptr-operator")},
});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{2}, id("ptr-operator")}] = StateID{5};
B.Transition[{StateID{3}, id("ptr-operator")}] = StateID{6};
B.Reduce[StateID{4}].insert(ruleFor("ptr-operator"));
TestLang.Table = std::move(B).build();
std::vector<const GSS::Node *> Heads = {GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
@ -256,16 +246,13 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
},
{
{/*State=*/3, /* type-name := class-name */ 0},
{/*State=*/4, /* type-name := enum-name */ 1},
});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{1}, id("type-name")}] = StateID{5};
B.Transition[{StateID{2}, id("type-name")}] = StateID{5};
B.Reduce[StateID{3}].insert(/* type-name := class-name */ RuleID{0});
B.Reduce[StateID{4}].insert(/* type-name := enum-name */ RuleID{1});
TestLang.Table = std::move(B).build();
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
@ -314,15 +301,12 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
TestLang.Table =
LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("pointer"), Action::goTo(5)},
},
{
{3, /* pointer := class-name */ 0},
{4, /* pointer := enum-name */ 1},
});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{0}, id("pointer")}] = StateID{5};
B.Reduce[StateID{3}].insert(/* pointer := class-name */ RuleID{0});
B.Reduce[StateID{4}].insert(/* pointer := enum-name */ RuleID{1});
TestLang.Table = std::move(B).build();
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof),
{TestLang.G, TestLang.Table, Arena, GSStack});
@ -345,14 +329,10 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
TEST_F(GLRTest, ReduceLookahead) {
// A term can be followed by +, but not by -.
buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
TestLang.Table =
LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("term"), Action::goTo(2)},
},
{
{/*State=*/1, 0},
});
LRTable::Builder B(TestLang.G);
B.Transition[{StateID{0}, id("term")}] = StateID{2};
B.Reduce[StateID{1}].insert(RuleID{0});
TestLang.Table = std::move(B).build();
auto *Identifier = &Arena.createTerminal(tok::identifier, /*Start=*/0);

View File

@ -20,7 +20,7 @@ namespace {
using llvm::ValueIs;
using testing::ElementsAre;
using Action = LRTable::Action;
using StateID = LRTable::StateID;
TEST(LRTable, Builder) {
std::vector<std::string> GrammarDiags;
@ -38,22 +38,20 @@ TEST(LRTable, Builder) {
SymbolID Identifier = tokenSymbol(tok::identifier);
SymbolID Plus = tokenSymbol(tok::plus);
LRTable::Builder B(G);
// eof IDENT term
// +-------+----+-------+------+
// |state0 | | s0 | |
// |state1 | | | g3 |
// |state2 | | | |
// +-------+----+-------+------+-------
std::vector<LRTable::Entry> Entries = {
{/* State */ 0, Identifier, Action::shift(0)},
{/* State */ 1, Term, Action::goTo(3)},
};
std::vector<LRTable::ReduceEntry> ReduceEntries = {
{/*State=*/0, /*Rule=*/0},
{/*State=*/1, /*Rule=*/2},
{/*State=*/2, /*Rule=*/1},
};
LRTable T = LRTable::buildForTests(G, Entries, ReduceEntries);
B.Transition[{StateID{0}, Identifier}] = StateID{0};
B.Transition[{StateID{1}, Term}] = StateID{3};
B.Reduce[StateID{0}].insert(RuleID{0});
B.Reduce[StateID{1}].insert(RuleID{2});
B.Reduce[StateID{2}].insert(RuleID{1});
LRTable T = std::move(B).build();
EXPECT_EQ(T.getShiftState(0, Eof), llvm::None);
EXPECT_THAT(T.getShiftState(0, Identifier), ValueIs(0));
EXPECT_THAT(T.getReduceRules(0), ElementsAre(0));