forked from OSchip/llvm-project
195 lines
6.9 KiB
C++
195 lines
6.9 KiB
C++
//===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang-pseudo/Forest.h"
|
|
#include "clang-pseudo/Token.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
|
|
namespace clang {
|
|
namespace pseudo {
|
|
|
|
void ForestNode::RecursiveIterator::operator++() {
|
|
auto C = Cur->children();
|
|
// Try to find a child of the current node to descend into.
|
|
for (unsigned I = 0; I < C.size(); ++I) {
|
|
if (Seen.insert(C[I]).second) {
|
|
Stack.push_back({Cur, I});
|
|
Cur = C[I];
|
|
return;
|
|
}
|
|
}
|
|
// Try to find a sibling af an ancestor to advance to.
|
|
for (; !Stack.empty(); Stack.pop_back()) {
|
|
C = Stack.back().Parent->children();
|
|
unsigned &Index = Stack.back().ChildIndex;
|
|
while (++Index < C.size()) {
|
|
if (Seen.insert(C[Index]).second) {
|
|
Cur = C[Index];
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
Cur = nullptr;
|
|
}
|
|
|
|
llvm::iterator_range<ForestNode::RecursiveIterator>
|
|
ForestNode::descendants() const {
|
|
return {RecursiveIterator(this), RecursiveIterator()};
|
|
}
|
|
|
|
std::string ForestNode::dump(const Grammar &G) const {
|
|
switch (kind()) {
|
|
case Ambiguous:
|
|
return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
|
|
case Terminal:
|
|
return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
|
|
startTokenIndex());
|
|
case Sequence:
|
|
return G.dumpRule(rule());
|
|
case Opaque:
|
|
return llvm::formatv("{0} := <opaque>", G.symbolName(symbol()));
|
|
}
|
|
llvm_unreachable("Unhandled node kind!");
|
|
}
|
|
|
|
std::string ForestNode::dumpRecursive(const Grammar &G,
|
|
bool Abbreviated) const {
|
|
using llvm::formatv;
|
|
Token::Index MaxToken = 0;
|
|
// Count visits of nodes so we can mark those seen multiple times.
|
|
llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
|
|
std::function<void(const ForestNode *)> CountVisits =
|
|
[&](const ForestNode *P) {
|
|
MaxToken = std::max(MaxToken, P->startTokenIndex());
|
|
if (VisitCounts[P]++ > 0)
|
|
return; // Don't count children as multiply visited.
|
|
if (P->kind() == Ambiguous)
|
|
llvm::for_each(P->alternatives(), CountVisits);
|
|
else if (P->kind() == Sequence)
|
|
llvm::for_each(P->elements(), CountVisits);
|
|
};
|
|
CountVisits(this);
|
|
|
|
unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size());
|
|
// e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
|
|
std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth);
|
|
|
|
// The box-drawing characters that should be added as a child is rendered.
|
|
struct LineDecoration {
|
|
std::string Prefix; // Prepended to every line.
|
|
llvm::StringRef First; // added to the child's line.
|
|
llvm::StringRef Subsequent; // added to descendants' lines.
|
|
};
|
|
|
|
// We print a "#<id>" for nonterminal forest nodes that are being dumped
|
|
// multiple times.
|
|
llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
|
|
std::string Result;
|
|
constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
|
|
std::function<void(const ForestNode *, Token::Index, llvm::Optional<SymbolID>,
|
|
LineDecoration &LineDec)>
|
|
Dump = [&](const ForestNode *P, Token::Index End,
|
|
llvm::Optional<SymbolID> ElidedParent,
|
|
LineDecoration LineDec) {
|
|
bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
|
|
llvm::ArrayRef<const ForestNode *> Children;
|
|
auto EndOfElement = [&](size_t ChildIndex) {
|
|
return ChildIndex + 1 == Children.size()
|
|
? End
|
|
: Children[ChildIndex + 1]->startTokenIndex();
|
|
};
|
|
if (P->kind() == Ambiguous) {
|
|
Children = P->alternatives();
|
|
} else if (P->kind() == Sequence) {
|
|
Children = P->elements();
|
|
if (Abbreviated) {
|
|
// Abbreviate chains of trivial sequence nodes.
|
|
// A := B, B := C, C := D, D := X Y Z
|
|
// becomes
|
|
// A~D := X Y Z
|
|
//
|
|
// We can't hide nodes that appear multiple times in the tree,
|
|
// because we need to call out their identity with IDs.
|
|
if (Children.size() == 1 && !SharedNode) {
|
|
assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
|
|
EndOfElement(0) == End);
|
|
return Dump(Children[0], End,
|
|
/*ElidedParent=*/ElidedParent.value_or(P->symbol()),
|
|
LineDec);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (End == KEnd)
|
|
Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end");
|
|
else
|
|
Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End);
|
|
Result += LineDec.Prefix;
|
|
Result += LineDec.First;
|
|
if (ElidedParent) {
|
|
Result += G.symbolName(*ElidedParent);
|
|
Result += "~";
|
|
}
|
|
|
|
if (SharedNode && P->kind() != ForestNode::Terminal) {
|
|
auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
|
|
bool First = It.second;
|
|
unsigned ID = It.first->second;
|
|
|
|
// The first time, print as #1. Later, =#1.
|
|
if (First) {
|
|
Result += formatv("{0} #{1}", P->dump(G), ID);
|
|
} else {
|
|
Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
|
|
Children = {}; // Don't walk the children again.
|
|
}
|
|
} else {
|
|
Result.append(P->dump(G));
|
|
}
|
|
Result.push_back('\n');
|
|
|
|
auto OldPrefixSize = LineDec.Prefix.size();
|
|
LineDec.Prefix += LineDec.Subsequent;
|
|
for (size_t I = 0; I < Children.size(); ++I) {
|
|
if (I == Children.size() - 1) {
|
|
LineDec.First = "└─";
|
|
LineDec.Subsequent = " ";
|
|
} else {
|
|
LineDec.First = "├─";
|
|
LineDec.Subsequent = "│ ";
|
|
}
|
|
Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
|
|
llvm::None, LineDec);
|
|
}
|
|
LineDec.Prefix.resize(OldPrefixSize);
|
|
};
|
|
LineDecoration LineDec;
|
|
Dump(this, KEnd, llvm::None, LineDec);
|
|
return Result;
|
|
}
|
|
|
|
llvm::ArrayRef<ForestNode>
|
|
ForestArena::createTerminals(const TokenStream &Code) {
|
|
ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size());
|
|
size_t Index = 0;
|
|
for (const auto &T : Code.tokens()) {
|
|
new (&Terminals[Index])
|
|
ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind),
|
|
/*Start=*/Index, /*TerminalData*/ 0);
|
|
++Index;
|
|
}
|
|
NodeCount = Index;
|
|
return llvm::makeArrayRef(Terminals, Index);
|
|
}
|
|
|
|
} // namespace pseudo
|
|
} // namespace clang
|