[pseudo] Add `clang-pseudo -html-forest=<output.html>`, an HTML forest browser

It generates a standalone HTML file with all needed JS/CSS embedded.
This allows navigating the tree both with a tree widget and in the code,
inspecting nodes, and selecting ambiguous alternatives.

Demo: https://htmlpreview.github.io/?https://gist.githubusercontent.com/sam-mccall/03882f7499d293196594e8a50599a503/raw/ASTSignals.cpp.html

Differential Revision: https://reviews.llvm.org/D130004
This commit is contained in:
Sam McCall 2022-07-18 14:52:11 +02:00
parent 5bce73ba75
commit c91ce94144
8 changed files with 645 additions and 1 deletions

View File

@ -0,0 +1,8 @@
// RUN: clang-pseudo -source %s -html-forest=%t.html
// RUN: FileCheck %s < %t.html
int main() {
}
// Sanity check for some obvious strings.
// CHECK-DAG: <body>
// CHECK-DAG: "compound-statement"
// CHECK-DAG: main

View File

@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS support)
add_clang_tool(clang-pseudo
ClangPseudo.cpp
HTMLForest.cpp
)
clang_target_link_libraries(clang-pseudo
@ -16,3 +17,13 @@ target_link_libraries(clang-pseudo
clangPseudoCLI
)
add_custom_command(OUTPUT HTMLForestResources.inc
COMMAND "${Python3_EXECUTABLE}" bundle_resources.py
${CMAKE_CURRENT_BINARY_DIR}/HTMLForestResources.inc
HTMLForest.css HTMLForest.js HTMLForest.html
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Bundling HTMLForest resources"
DEPENDS bundle_resources.py HTMLForest.css HTMLForest.js HTMLForest.html
VERBATIM)
add_custom_target(clang-pseudo-resources DEPENDS HTMLForestResources.inc)
add_dependencies(clang-pseudo clang-pseudo-resources)

View File

@ -8,6 +8,7 @@
#include "clang-pseudo/Bracket.h"
#include "clang-pseudo/DirectiveTree.h"
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Language.h"
#include "clang-pseudo/Token.h"
@ -18,7 +19,6 @@
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
@ -45,6 +45,8 @@ static opt<bool>
desc("Strip directives and select conditional sections"));
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
static opt<std::string> HTMLForest("html-forest",
desc("output file for HTML forest"));
static opt<std::string> StartSymbol("start-symbol",
desc("specify the start symbol to parse"),
init("translation-unit"));
@ -62,6 +64,9 @@ static std::string readOrDie(llvm::StringRef Path) {
namespace clang {
namespace pseudo {
// Defined in HTMLForest.cpp
void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &,
const ForestNode &Root, const TokenStream &);
namespace {
struct NodeStats {
@ -150,6 +155,17 @@ int main(int argc, char *argv[]) {
if (PrintForest)
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
if (HTMLForest.getNumOccurrences()) {
std::error_code EC;
llvm::raw_fd_ostream HTMLOut(HTMLForest, EC);
if (EC) {
llvm::errs() << "Couldn't write " << HTMLForest << ": " << EC.message()
<< "\n";
return 2;
}
clang::pseudo::writeHTMLForest(HTMLOut, Lang.G, Root, *ParseableStream);
}
if (PrintStatistics) {
llvm::outs() << "Forest bytes: " << Arena.bytes()
<< " nodes: " << Arena.nodeCount() << "\n";

View File

@ -0,0 +1,188 @@
//===-- HTMLForest.cpp - browser-based parse forest explorer
//---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The plain text forest node dump (clang-pseudo -print-forest) is useful but
// hard to reconcile with the code being examined, especially when it is large.
//
// HTMLForest produces a self-contained HTML file containing both the code and
// the forest representation, linking them interactively with javascript.
// At any given time, a single parse tree is shown (ambiguities resolved).
// The user can switch between ambiguous alternatives.
//
// +-------+---------------+
// | | +-----+|
// | #tree | #code |#info||
// | | +-----+|
// | | |
// +-------+---------------+
//
// #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest.
// (It is a simple tree, not a DAG, because ambiguities have been resolved).
// Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER).
//
// #code is the source code, annotated with <span>s marking the node ranges.
// These spans are usually invisible (exception: ambiguities are marked), but
// they are used to show and change the selection.
//
// #info is a floating box that shows details of the currently selected node:
// - rule (for sequence nodes). Abbreviated rules are also shown.
// - alternatives (for ambiguous nodes). The user can choose an alternative.
// - ancestors. The parent nodes show how this node fits in translation-unit.
//
// There are two types of 'active' node:
// - *highlight* is what the cursor is over, and is colored blue.
// Near ancestors are shaded faintly (onion-skin) to show local structure.
// - *selection* is set by clicking.
// The #info box shows the selection, and selected nodes have a dashed ring.
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace pseudo {
namespace {
// Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc
#include "HTMLForestResources.inc"
struct Writer {
llvm::raw_ostream &Out;
const Grammar &G;
const ForestNode &Root;
const TokenStream &Stream;
void write() {
Out << "<!doctype html>\n";
tag("html", [&] {
tag("head", [&] {
tag("title", [&] { Out << "HTMLForest"; });
tag("script", [&] { Out << HTMLForest_js; });
tag("style", [&] { Out << HTMLForest_css; });
tag("script", [&] {
Out << "var forest=";
writeForestJSON();
Out << ";";
});
tag("pre id='hidden-code' hidden", [&] { writeCode(); });
});
tag("body", [&] { Out << HTMLForest_html; });
});
}
void writeCode();
void writeForestJSON();
void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) {
Out << "<" << Opener << ">";
Body();
Out << "</" << Opener.split(' ').first << ">\n";
}
};
void Writer::writeCode() {
// This loop (whitespace logic) is cribbed from TokenStream::Print.
bool FirstToken = true;
unsigned LastLine = -1;
StringRef LastText;
for (const auto &T : Stream.tokens()) {
StringRef Text = T.text();
if (FirstToken) {
FirstToken = false;
} else if (T.Line == LastLine) {
if (LastText.data() + LastText.size() != Text.data())
Out << ' ';
} else {
Out << " \n"; // Extra space aids selection.
Out.indent(T.Indent);
}
Out << "<span class='token' id='t" << Stream.index(T) << "'>";
llvm::printHTMLEscaped(Text, Out);
Out << "</span>";
LastLine = T.Line;
LastText = Text;
}
if (!FirstToken)
Out << '\n';
}
// Writes a JSON array of forest nodes. Items are e.g.:
// {kind:'sequence', symbol:'compound-stmt', children:[5,8,33],
// rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'}
// {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3}
// {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'}
void Writer::writeForestJSON() {
// This is the flat array of nodes: the index into this array is the node ID.
std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence;
llvm::DenseMap<const ForestNode *, unsigned> Index;
auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned {
auto R = Index.try_emplace(N, Sequence.size());
if (R.second)
Sequence.push_back({N, End});
return R.first->second;
};
AssignID(&Root, Stream.tokens().size());
auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); };
llvm::json::OStream Out(this->Out, 2);
Out.array([&] {
for (unsigned I = 0; I < Sequence.size(); ++I) {
const ForestNode *N = Sequence[I].first;
Token::Index End = Sequence[I].second;
Out.object([&] {
Out.attribute("symbol", G.symbolName(N->symbol()));
switch (N->kind()) {
case ForestNode::Terminal:
Out.attribute("kind", "terminal");
Out.attribute("token", TokenID(N->startTokenIndex()));
break;
case ForestNode::Sequence:
Out.attribute("kind", "sequence");
Out.attribute("rule", G.dumpRule(N->rule()));
break;
case ForestNode::Ambiguous:
Out.attribute("kind", "ambiguous");
Out.attribute("selected", AssignID(N->children().front(), End));
break;
case ForestNode::Opaque:
Out.attribute("kind", "opaque");
Out.attribute("firstToken", TokenID(N->startTokenIndex()));
// [firstToken, lastToken] is a closed range.
// If empty, lastToken is omitted.
if (N->startTokenIndex() != End)
Out.attribute("lastToken", TokenID(End - 1));
break;
}
auto Children = N->children();
if (!Children.empty())
Out.attributeArray("children", [&] {
for (unsigned I = 0; I < Children.size(); ++I)
Out.value(AssignID(Children[I],
I + 1 == Children.size()
? End
: Children[I + 1]->startTokenIndex()));
});
});
}
});
}
} // namespace
// We only accept the derived stream here.
// FIXME: allow the original stream instead?
void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G,
const ForestNode &Root, const TokenStream &Stream) {
Writer{OS, G, Root, Stream}.write();
}
} // namespace pseudo
} // namespace clang

View File

@ -0,0 +1,93 @@
body {
position: absolute;
top: 0;
bottom: 0;
right: 0;
left: 0;
display: flex;
align-items: stretch;
margin: 0;
font-family: sans-serif;
white-space: nowrap;
height: 100%;
}
body > * {
overflow-y: auto; /* Scroll sections independently*/
margin: 0;
}
#code {
font-size: 18px;
line-height: 36px;
flex-grow: 1;
padding-right: 10em; /* Leave space for #info */
}
#code span {
padding: 9px 0; /* No "gaps" between lines due to line-height */
}
.node.ambiguous::before, .ancestors.ambiguous::after, .tree-node.ambiguous > header::after {
content: /*the thinking man's emoji*/'\01F914';
}
#info {
position: fixed;
right: 2em;
top: 1em;
width: 25em;
border: 1px solid black;
min-height: 20em;
background-color: whiteSmoke;
overflow-x: clip;
box-shadow: 3px 3px 5px rgba(0,0,0,0.2);
}
#info header {
background-color: black;
color: white;
font-size: larger;
padding: 0.5em;
}
#info.ambiguous header { background-color: #803; }
#info.sequence header { background-color: darkBlue; }
#info.terminal header { background-color: darkGreen; }
#info.opaque header { background-color: orangeRed; }
#i_kind {
float: right;
font-size: small;
}
#info section {
padding: 0.5em;
border-top: 1px solid lightGray;
overflow-x: auto;
}
#i_ancestors { font-size: small; }
#tree {
flex-grow: 0;
min-width: 20em;
margin-right: 1em;
border-right: 1px solid darkGray;
background-color: azure;
font-size: small;
overflow-x: auto;
resize: horizontal;
}
#tree ul {
margin: 0;
display: inline-block;
padding-left: 6px;
border-left: 1px solid rgba(0,0,0,0.2);
list-style: none;
}
#tree > ul { border-left: none; }
.tree-node.selected > header .name { font-weight: bold; }
.tree-node.terminal .name { font-family: monospace; }
.tree-node.ambiguous > header .name { color: #803; font-weight: bold; }
.tree-node.sequence > header .name { color: darkBlue; }
.tree-node.terminal > header .name { color: darkGreen; }
.tree-node.opaque > header .name { color: orangeRed; }
.selected { outline: 1px dashed black; }
.abbrev { opacity: 50%; }
.abbrev::after { content: '~'; }
.opaque { background-color: bisque; }

View File

@ -0,0 +1,15 @@
<div id="tree"><ul></ul></div>
<pre id="code"></pre>
<div id="info" hidden>
<header>
<span id="i_symbol"></span>
<span id="i_kind"></span>
</header>
<section>
<div id="i_rules"></div>
<div id="i_alternatives"></div>
</section>
<section>
<div id="i_ancestors"></div>
</section>
</div>

View File

@ -0,0 +1,290 @@
// The global map of forest node index => NodeView.
views = [];
// NodeView is a visible forest node.
// It has an entry in the navigation tree, and a span in the code itself.
// Each NodeView is associated with a forest node, but not all nodes have views:
// - nodes not reachable though current ambiguity selection
// - trivial "wrapping" sequence nodes are abbreviated away
class NodeView {
// Builds a node representing forest[index], or its target if it is a wrapper.
// Registers the node in the global map.
static make(index, parent, abbrev) {
var node = forest[index];
if (node.kind == 'sequence' && node.children.length == 1 &&
forest[node.children[0]].kind != 'ambiguous') {
abbrev ||= [];
abbrev.push(index);
return NodeView.make(node.children[0], parent, abbrev);
}
return views[index] = new NodeView(index, parent, node, abbrev);
}
constructor(index, parent, node, abbrev) {
this.abbrev = abbrev || [];
this.parent = parent;
this.children =
(node.kind == 'ambiguous' ? [ node.selected ] : node.children || [])
.map((c) => NodeView.make(c, this));
this.index = index;
this.node = node;
views[index] = this;
this.span = this.buildSpan();
this.tree = this.buildTree();
}
// Replaces the token sequence in #code with a <span class=node>.
buildSpan() {
var elt = document.createElement('span');
elt.dataset['index'] = this.index;
elt.classList.add("node");
elt.classList.add("selectable-node");
elt.classList.add(this.node.kind);
var begin = null, end = null;
if (this.children.length != 0) {
begin = this.children[0].span;
end = this.children[this.children.length - 1].span.nextSibling;
} else if (this.node.kind == 'terminal') {
begin = document.getElementById(this.node.token);
end = begin.nextSibling;
} else if (this.node.kind == 'opaque') {
begin = document.getElementById(this.node.firstToken);
end = (this.node.lastToken == null)
? begin
: document.getElementById(this.node.lastToken).nextSibling;
}
var parent = begin.parentNode;
splice(begin, end, elt);
parent.insertBefore(elt, end);
return elt;
}
// Returns a (detached) <li class=tree-node> suitable for use in #tree.
buildTree() {
var elt = document.createElement('li');
elt.dataset['index'] = this.index;
elt.classList.add('tree-node');
elt.classList.add('selectable-node');
elt.classList.add(this.node.kind);
var header = document.createElement('header');
elt.appendChild(header);
if (this.abbrev.length > 0) {
var abbrev = document.createElement('span');
abbrev.classList.add('abbrev');
abbrev.innerText = forest[this.abbrev[0]].symbol;
header.appendChild(abbrev);
}
var name = document.createElement('span');
name.classList.add('name');
name.innerText = this.node.symbol;
header.appendChild(name);
if (this.children.length != 0) {
var sublist = document.createElement('ul');
this.children.forEach((c) => sublist.appendChild(c.tree));
elt.appendChild(sublist);
}
return elt;
}
// Make this view visible on the screen by scrolling if needed.
scrollVisible() {
scrollIntoViewV(document.getElementById('tree'), this.tree.firstChild);
scrollIntoViewV(document.getElementById('code'), this.span);
}
// Fill #info with details of this node.
renderInfo() {
document.getElementById('info').classList = this.node.kind;
document.getElementById('i_symbol').innerText = this.node.symbol;
document.getElementById('i_kind').innerText = this.node.kind;
// For sequence nodes, add LHS := RHS rule.
// If this node abbreviates trivial sequences, we want those rules too.
var rules = document.getElementById('i_rules');
rules.textContent = '';
function addRule(i) {
var ruleText = forest[i].rule;
if (ruleText == null)
return;
var rule = document.createElement('div');
rule.classList.add('rule');
rule.innerText = ruleText;
rules.insertBefore(rule, rules.firstChild);
}
this.abbrev.forEach(addRule);
addRule(this.index);
// For ambiguous nodes, show a selectable list of alternatives.
var alternatives = document.getElementById('i_alternatives');
alternatives.textContent = '';
var that = this;
function addAlternative(i) {
var altNode = forest[i];
var text = altNode.rule || altNode.kind;
var alt = document.createElement('div');
alt.classList.add('alternative');
alt.innerText = text;
alt.dataset['index'] = i;
alt.dataset['parent'] = that.index;
if (i == that.node.selected)
alt.classList.add('selected');
alternatives.appendChild(alt);
}
if (this.node.kind == 'ambiguous')
this.node.children.forEach(addAlternative);
// Show the stack of ancestor nodes.
// The part of each rule that leads to the current node is bolded.
var ancestors = document.getElementById('i_ancestors');
ancestors.textContent = '';
var child = this;
for (var view = this.parent; view != null;
child = view, view = view.parent) {
var indexInParent = view.children.indexOf(child);
var ctx = document.createElement('div');
ctx.classList.add('ancestors');
ctx.classList.add('selectable-node');
ctx.classList.add(view.node.kind);
if (view.node.rule) {
// Rule syntax is LHS := RHS1 [annotation] RHS2.
// We walk through the chunks and bold the one at parentInIndex.
var chunkCount = 0;
ctx.innerHTML = view.node.rule.replaceAll(/[^ ]+/g, function(match) {
if (!(match.startsWith('[') && match.endsWith(']')) /*annotations*/
&& chunkCount++ == indexInParent + 2 /*skip LHS :=*/)
return '<b>' + match + '</b>';
return match;
});
} else /*ambiguous*/ {
ctx.innerHTML = '<b>' + view.node.symbol + '</b>';
}
ctx.dataset['index'] = view.index;
if (view.abbrev.length > 0) {
var abbrev = document.createElement('span');
abbrev.classList.add('abbrev');
abbrev.innerText = forest[view.abbrev[0]].symbol;
ctx.insertBefore(abbrev, ctx.firstChild);
}
ctx.dataset['index'] = view.index;
ancestors.appendChild(ctx, ancestors.firstChild);
}
}
remove() {
this.children.forEach((c) => c.remove());
splice(this.span.firstChild, null, this.span.parentNode,
this.span.nextSibling);
detach(this.span);
delete views[this.index];
}
};
var selection = null;
function selectView(view) {
var old = selection;
selection = view;
if (view == old)
return;
if (old) {
old.tree.classList.remove('selected');
old.span.classList.remove('selected');
}
document.getElementById('info').hidden = (view == null);
if (!view)
return;
view.tree.classList.add('selected');
view.span.classList.add('selected');
view.renderInfo();
view.scrollVisible();
}
// To highlight nodes on hover, we create dynamic CSS rules of the form
// .selectable-node[data-index="42"] { background-color: blue; }
// This avoids needing to find all the related nodes and update their classes.
var highlightSheet = new CSSStyleSheet();
document.adoptedStyleSheets.push(highlightSheet);
function highlightView(view) {
var text = '';
for (const color of ['#6af', '#bbb', '#ddd', '#eee']) {
if (view == null)
break;
text += '.selectable-node[data-index="' + view.index + '"] '
text += '{ background-color: ' + color + '; }\n';
view = view.parent;
}
highlightSheet.replace(text);
}
// Select which branch of an ambiguous node is taken.
function chooseAlternative(parent, index) {
var parentView = views[parent];
parentView.node.selected = index;
var oldChild = parentView.children[0];
oldChild.remove();
var newChild = NodeView.make(index, parentView);
parentView.children[0] = newChild;
parentView.tree.lastChild.replaceChild(newChild.tree, oldChild.tree);
highlightView(null);
// Force redraw of the info box.
selectView(null);
selectView(parentView);
}
// Attach event listeners and build content once the document is ready.
document.addEventListener("DOMContentLoaded", function() {
var code = document.getElementById('code');
var tree = document.getElementById('tree');
var ancestors = document.getElementById('i_ancestors');
var alternatives = document.getElementById('i_alternatives');
[code, tree, ancestors].forEach(function(container) {
container.addEventListener('click', function(e) {
var nodeElt = e.target.closest('.selectable-node');
selectView(nodeElt && views[Number(nodeElt.dataset['index'])]);
});
container.addEventListener('mousemove', function(e) {
var nodeElt = e.target.closest('.selectable-node');
highlightView(nodeElt && views[Number(nodeElt.dataset['index'])]);
});
});
alternatives.addEventListener('click', function(e) {
var altElt = e.target.closest('.alternative');
if (altElt)
chooseAlternative(Number(altElt.dataset['parent']),
Number(altElt.dataset['index']));
});
// The HTML provides #code content in a hidden DOM element, move it.
var hiddenCode = document.getElementById('hidden-code');
splice(hiddenCode.firstChild, hiddenCode.lastChild, code);
detach(hiddenCode);
// Build the tree of NodeViews and attach to #tree.
tree.firstChild.appendChild(NodeView.make(0).tree);
});
// Helper DOM functions //
// Moves the sibling range [first, until) into newParent.
function splice(first, until, newParent, before) {
for (var next = first; next != until;) {
var elt = next;
next = next.nextSibling;
newParent.insertBefore(elt, before);
}
}
function detach(node) { node.parentNode.removeChild(node); }
// Like scrollIntoView, but vertical only!
function scrollIntoViewV(container, elt) {
if (container.scrollTop > elt.offsetTop + elt.offsetHeight ||
container.scrollTop + container.clientHeight < elt.offsetTop)
container.scrollTo({top : elt.offsetTop, behavior : 'smooth'});
}

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python3
# Simple bundler of files into string constants.
#
# Usage: bundle_resources.py foo.inc a.js path/b.css ...
# Produces foo.inc containing:
# const char a_js[] = "...";
# const char b_css[] = "...";
import os
import sys
outfile = sys.argv[1]
infiles = sys.argv[2:]
with open(outfile, 'w') as out:
for filename in infiles:
varname = os.path.basename(filename).replace('.', '_')
out.write("const char " + varname + "[] = \n");
# MSVC limits each chunk of string to 2k.
# Not quite enough for the JS file, so split by lines.
# The overall limit is 64k, which ought to be enough for anyone.
for line in open(filename).read().split('\n'):
out.write(' R"x(' + line + ')x" "\\n"\n' )
out.write(' ;\n');