[pseudo] Add ForestNode descendants iterator, print ambiguous/opaque node stats.

Differential Revision: https://reviews.llvm.org/D128930
This commit is contained in:
Sam McCall 2022-06-30 19:50:40 +02:00
parent 38ca754eb0
commit 9b6bb12b85
5 changed files with 138 additions and 1 deletions

View File

@ -43,6 +43,7 @@ namespace pseudo {
// doesn't have parent pointers.
class alignas(class ForestNode *) ForestNode {
public:
class RecursiveIterator;
enum Kind {
// A Terminal node is a single terminal symbol bound to a token.
Terminal,
@ -87,6 +88,22 @@ public:
return children(Data);
}
llvm::ArrayRef<const ForestNode *> children() const {
switch (kind()) {
case Sequence:
return elements();
case Ambiguous:
return alternatives();
case Terminal:
case Opaque:
return {};
}
llvm_unreachable("Bad kind");
}
// Iteration over all nodes in the forest, including this.
llvm::iterator_range<RecursiveIterator> descendants() const;
std::string dump(const Grammar &) const;
std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const;
@ -181,6 +198,25 @@ private:
uint32_t NodeCount = 0;
};
class ForestNode::RecursiveIterator
: public std::iterator<std::input_iterator_tag, const ForestNode> {
llvm::DenseSet<const ForestNode *> Seen;
struct StackFrame {
const ForestNode *Parent;
unsigned ChildIndex;
};
std::vector<StackFrame> Stack;
const ForestNode *Cur;
public:
RecursiveIterator(const ForestNode *N = nullptr) : Cur(N) {}
const ForestNode &operator*() const { return *Cur; };
void operator++();
bool operator==(const RecursiveIterator &I) const { return Cur == I.Cur; }
bool operator!=(const RecursiveIterator &I) const { return !(*this == I); }
};
} // namespace pseudo
} // namespace clang

View File

@ -16,6 +16,35 @@
namespace clang {
namespace pseudo {
void ForestNode::RecursiveIterator::operator++() {
auto C = Cur->children();
// Try to find a child of the current node to descend into.
for (unsigned I = 0; I < C.size(); ++I) {
if (Seen.insert(C[I]).second) {
Stack.push_back({Cur, I});
Cur = C[I];
return;
}
}
// Try to find a sibling af an ancestor to advance to.
for (; !Stack.empty(); Stack.pop_back()) {
C = Stack.back().Parent->children();
unsigned &Index = Stack.back().ChildIndex;
while (++Index < C.size()) {
if (Seen.insert(C[Index]).second) {
Cur = C[Index];
return;
}
}
}
Cur = nullptr;
}
llvm::iterator_range<ForestNode::RecursiveIterator>
ForestNode::descendants() const {
return {RecursiveIterator(this), RecursiveIterator()};
}
std::string ForestNode::dump(const Grammar &G) const {
switch (kind()) {
case Ambiguous:

View File

@ -1,4 +1,4 @@
// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s
// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest -print-statistics | FileCheck %s
void foo() {
T* a; // a multiply expression or a pointer declaration?
@ -22,3 +22,10 @@ void foo() {
// CHECK-NEXT: │ └─ptr-declarator~id-expression =#1
// CHECK-NEXT: └─; := tok[8]
}
// CHECK: 3 Ambiguous nodes:
// CHECK-NEXT: 1 simple-type-specifier
// CHECK-NEXT: 1 statement
// CHECK-NEXT: 1 type-name
// CHECK-EMPTY:
// CHECK-NEXT: 0 Opaque nodes:

View File

@ -14,6 +14,8 @@
#include "clang-pseudo/grammar/LRGraph.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
@ -59,6 +61,34 @@ static std::string readOrDie(llvm::StringRef Path) {
return Text.get()->getBuffer().str();
}
namespace clang {
namespace pseudo {
namespace {
struct NodeStats {
unsigned Total = 0;
std::vector<std::pair<SymbolID, unsigned>> BySymbol;
NodeStats(const ForestNode &Root,
llvm::function_ref<bool(const ForestNode &)> Filter) {
llvm::DenseMap<SymbolID, unsigned> Map;
for (const ForestNode &N : Root.descendants())
if (Filter(N)) {
++Total;
++Map[N.symbol()];
}
BySymbol = {Map.begin(), Map.end()};
// Sort by count descending, then symbol ascending.
llvm::sort(BySymbol, [](const auto &L, const auto &R) {
return std::tie(R.second, L.first) < std::tie(L.second, R.first);
});
}
};
} // namespace
} // namespace pseudo
} // namespace clang
int main(int argc, char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, "");
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
@ -135,6 +165,17 @@ int main(int argc, char *argv[]) {
<< " nodes: " << Arena.nodeCount() << "\n";
llvm::outs() << "GSS bytes: " << GSS.bytes()
<< " nodes: " << GSS.nodesCreated() << "\n";
for (auto &P :
{std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous),
std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) {
clang::pseudo::NodeStats Stats(
Root, [&](const auto &N) { return N.kind() == P.second; });
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
for (const auto &S : Stats.BySymbol)
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
G.symbolName(S.first));
}
}
}
}

View File

@ -151,6 +151,30 @@ TEST_F(ForestTest, DumpAbbreviatedShared) {
"[ 0, end) └─A~B =#1\n");
}
TEST_F(ForestTest, Iteration) {
// Z
// / \
// X Y
// |\|
// A B
ForestArena Arena;
const auto *A = &Arena.createTerminal(tok::identifier, 0);
const auto *B = &Arena.createOpaque(1, 0);
const auto *X = &Arena.createSequence(2, 1, {A, B});
const auto *Y = &Arena.createSequence(2, 2, {B});
const auto *Z = &Arena.createAmbiguous(2, {X, Y});
std::vector<const ForestNode *> Nodes;
for (const ForestNode &N : Z->descendants())
Nodes.push_back(&N);
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
Nodes.clear();
for (const ForestNode &N : X->descendants())
Nodes.push_back(&N);
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
}
} // namespace
} // namespace pseudo
} // namespace clang