[libc++] Simpler Python script for generating a graph of libc++'s header dependencies

My attempts to play around with the old graph_header_deps.py were mostly fruitless;
I needed to modify it in various ways to make it work, and then even when I got it
working, it generated pretty ugly graphs.

Old graph_header_deps.py (after my local changes to simplify the usage)
(producing https://i.imgur.com/zATrsaP.jpg )

    mkdir foo
    time ./graph_header_deps.py --libcxx-only -o foo --clang-command ~/llvm-project/build/bin/clang++
    dot -Tpng < foo/all_headers.dot > old.png
    file old.png

    real    0m37.453s
    old.png: PNG image data, 25882 x 3035, 8-bit/color RGBA, non-interlaced

New graph_header_deps.py
(producing https://i.imgur.com/ZU0G52U.png )

    time ./graph_header_deps.py | dot -Tpng > new.png
    file new.png

    real    0m1.063s
    new.png: PNG image data, 6162 x 1344, 8-bit/color RGBA, non-interlaced

Differential Revision: https://reviews.llvm.org/D99124
This commit is contained in:
Arthur O'Dwyer 2021-03-22 17:38:28 -04:00
parent fdf97bc738
commit a644920a02
2 changed files with 190 additions and 485 deletions

View File

@ -7,202 +7,205 @@
#
#===----------------------------------------------------------------------===##
from argparse import ArgumentParser
import argparse
import os
import shutil
import sys
import shlex
import json
import re
import libcxx.graph as dot
import libcxx.util
def print_and_exit(msg):
sys.stderr.write(msg + '\n')
sys.exit(1)
def libcxx_include_path():
curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
include_dir = os.path.join(curr_dir, 'include')
return include_dir
def get_libcxx_headers():
headers = []
include_dir = libcxx_include_path()
for fname in os.listdir(include_dir):
f = os.path.join(include_dir, fname)
if not os.path.isfile(f):
continue
base, ext = os.path.splitext(fname)
if (ext == '' or ext == '.h') and (not fname.startswith('__') or fname == '__config'):
headers += [f]
return headers
def rename_headers_and_remove_test_root(graph):
inc_root = libcxx_include_path()
to_remove = set()
for n in graph.nodes:
assert 'label' in n.attributes
l = n.attributes['label']
if not l.startswith('/') and os.path.exists(os.path.join('/', l)):
l = '/' + l
if l.endswith('.tmp.cpp'):
to_remove.add(n)
if l.startswith(inc_root):
l = l[len(inc_root):]
if l.startswith('/'):
l = l[1:]
n.attributes['label'] = l
for n in to_remove:
graph.removeNode(n)
def is_config_header(h):
return os.path.basename(h) in ['__config', '__libcpp_version']
def remove_non_std_headers(graph):
inc_root = libcxx_include_path()
to_remove = set()
for n in graph.nodes:
test_file = os.path.join(inc_root, n.attributes['label'])
if not test_file.startswith(inc_root):
to_remove.add(n)
for xn in to_remove:
graph.removeNode(xn)
class DependencyCommand(object):
def __init__(self, compile_commands, output_dir, new_std=None):
output_dir = os.path.abspath(output_dir)
if not os.path.isdir(output_dir):
print_and_exit('"%s" must point to a directory' % output_dir)
self.output_dir = output_dir
self.new_std = new_std
cwd,bcmd = self._get_base_command(compile_commands)
self.cwd = cwd
self.base_cmd = bcmd
def is_experimental_header(h):
return ('experimental/' in h) or ('ext/' in h)
def run_for_headers(self, header_list):
outputs = []
for header in header_list:
header_name = os.path.basename(header)
out = os.path.join(self.output_dir, ('%s.dot' % header_name))
outputs += [out]
cmd = self.base_cmd + ["-fsyntax-only", "-Xclang", "-dependency-dot", "-Xclang", "%s" % out, '-xc++', '-']
libcxx.util.executeCommandOrDie(cmd, cwd=self.cwd, input='#include <%s>\n\n' % header_name)
return outputs
def _get_base_command(self, command_file):
commands = None
with open(command_file, 'r') as f:
commands = json.load(f)
for compile_cmd in commands:
file = compile_cmd['file']
if not file.endswith('src/algorithm.cpp'):
def is_support_header(h):
return '__support/' in h
class FileEntry:
def __init__(self, includes, individual_linecount):
self.includes = includes
self.individual_linecount = individual_linecount
self.cumulative_linecount = None # documentation: this gets filled in later
self.is_graph_root = None # documentation: this gets filled in later
def list_all_roots_under(root):
result = []
for root, _, files in os.walk(root):
for fname in files:
if '__support' in root:
pass
elif ('.' in fname and not fname.endswith('.h')):
pass
else:
result.append(root + '/' + fname)
return result
def build_file_entry(fname, options):
assert os.path.exists(fname)
def locate_header_file(h, paths):
for p in paths:
fullname = p + '/' + h
if os.path.exists(fullname):
return fullname
if options.error_on_file_not_found:
raise RuntimeError('Header not found: %s, included by %s' % (h, fname))
return None
local_includes = []
system_includes = []
linecount = 0
with open(fname, 'r') as f:
for line in f.readlines():
linecount += 1
m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
if m is not None:
local_includes.append(m.group(1))
m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
if m is not None:
system_includes.append(m.group(1))
fully_qualified_includes = [
locate_header_file(h, options.search_dirs)
for h in system_includes
] + [
locate_header_file(h, os.path.dirname(fname))
for h in local_includes
]
return FileEntry(
# If file-not-found wasn't an error, then skip non-found files
includes = [h for h in fully_qualified_includes if h is not None],
individual_linecount = linecount,
)
def transitive_closure_of_includes(graph, h1):
visited = set()
def explore(graph, h1):
if h1 not in visited:
visited.add(h1)
for h2 in graph[h1].includes:
explore(graph, h2)
explore(graph, h1)
return visited
def transitively_includes(graph, h1, h2):
return (h1 != h2) and (h2 in transitive_closure_of_includes(graph, h1))
def build_graph(roots, options):
original_roots = list(roots)
graph = {}
while roots:
frontier = roots
roots = []
for fname in frontier:
if fname not in graph:
graph[fname] = build_file_entry(fname, options)
graph[fname].is_graph_root = (fname in original_roots)
roots += graph[fname].includes
for fname, entry in graph.items():
entry.cumulative_linecount = sum(graph[h].individual_linecount for h in transitive_closure_of_includes(graph, fname))
return graph
def get_graphviz(graph, options):
def get_friendly_id(fname):
i = fname.index('include/')
assert(i >= 0)
result = fname[i+8:]
return result
def get_decorators(fname, entry):
result = ''
if entry.is_graph_root:
result += ' [style=bold]'
if options.show_individual_line_counts and options.show_cumulative_line_counts:
result += ' [label="%s\\n%d indiv, %d cumul"]' % (
get_friendly_id(fname), entry.individual_linecount, entry.cumulative_linecount
)
elif options.show_individual_line_counts:
result += ' [label="%s\\n%d indiv"]' % (get_friendly_id(fname), entry.individual_linecount)
elif options.show_cumulative_line_counts:
result += ' [label="%s\\n%d cumul"]' % (get_friendly_id(fname), entry.cumulative_linecount)
return result
result = ''
result += 'strict digraph {\n'
result += ' rankdir=LR;\n'
result += ' layout=dot;\n\n'
for fname, entry in graph.items():
result += ' "%s"%s;\n' % (get_friendly_id(fname), get_decorators(fname, entry))
for h in entry.includes:
if any(transitively_includes(graph, i, h) for i in entry.includes) and not options.show_transitive_edges:
continue
wd = compile_cmd['directory']
cmd_str = compile_cmd['command']
cmd = shlex.split(cmd_str)
out_arg = cmd.index('-o')
del cmd[out_arg]
del cmd[out_arg]
in_arg = cmd.index('-c')
del cmd[in_arg]
del cmd[in_arg]
if self.new_std is not None:
for f in cmd:
if f.startswith('-std='):
del cmd[cmd.index(f)]
cmd += [self.new_std]
break
return wd, cmd
print_and_exit("failed to find command to build algorithm.cpp")
def post_process_outputs(outputs, libcxx_only):
graphs = []
for dot_file in outputs:
g = dot.DirectedGraph.fromDotFile(dot_file)
rename_headers_and_remove_test_root(g)
if libcxx_only:
remove_non_std_headers(g)
graphs += [g]
g.toDotFile(dot_file)
return graphs
def build_canonical_names(graphs):
canonical_names = {}
next_idx = 0
for g in graphs:
for n in g.nodes:
if n.attributes['label'] not in canonical_names:
name = 'header_%d' % next_idx
next_idx += 1
canonical_names[n.attributes['label']] = name
return canonical_names
class CanonicalGraphBuilder(object):
def __init__(self, graphs):
self.graphs = list(graphs)
self.canonical_names = build_canonical_names(graphs)
def build(self):
self.canonical = dot.DirectedGraph('all_headers')
for k,v in self.canonical_names.iteritems():
n = dot.Node(v, edges=[], attributes={'shape': 'box', 'label': k})
self.canonical.addNode(n)
for g in self.graphs:
self._merge_graph(g)
return self.canonical
def _merge_graph(self, g):
for n in g.nodes:
new_name = self.canonical.getNodeByLabel(n.attributes['label']).id
for e in n.edges:
to_node = self.canonical.getNodeByLabel(e.attributes['label']).id
self.canonical.addEdge(new_name, to_node)
def main():
parser = ArgumentParser(
description="Generate a graph of libc++ header dependencies")
parser.add_argument(
'-v', '--verbose', dest='verbose', action='store_true', default=False)
parser.add_argument(
'-o', '--output', dest='output', required=True,
help='The output file. stdout is used if not given',
type=str, action='store')
parser.add_argument(
'--no-compile', dest='no_compile', action='store_true', default=False)
parser.add_argument(
'--libcxx-only', dest='libcxx_only', action='store_true', default=False)
parser.add_argument(
'compile_commands', metavar='compile-commands-file',
help='the compile commands database')
args = parser.parse_args()
builder = DependencyCommand(args.compile_commands, args.output, new_std='-std=c++2a')
if not args.no_compile:
outputs = builder.run_for_headers(get_libcxx_headers())
graphs = post_process_outputs(outputs, args.libcxx_only)
else:
outputs = [os.path.join(args.output, l) for l in os.listdir(args.output) if not l.endswith('all_headers.dot')]
graphs = [dot.DirectedGraph.fromDotFile(o) for o in outputs]
canon = CanonicalGraphBuilder(graphs).build()
canon.toDotFile(os.path.join(args.output, 'all_headers.dot'))
all_graphs = graphs + [canon]
found_cycles = False
for g in all_graphs:
cycle_finder = dot.CycleFinder(g)
all_cycles = cycle_finder.findCyclesInGraph()
if len(all_cycles):
found_cycles = True
print("cycle in graph %s" % g.name)
for start, path in all_cycles:
print("Cycle for %s = %s" % (start, path))
if not found_cycles:
print("No cycles found")
result += ' "%s" -> "%s";\n' % (get_friendly_id(fname), get_friendly_id(h))
result += '}\n'
return result
if __name__ == '__main__':
main()
parser = argparse.ArgumentParser(description='Produce a dependency graph of libc++ headers, in GraphViz dot format.')
parser.add_argument('--root', default=None, metavar='FILE', help='File or directory to be the root of the dependency graph')
parser.add_argument('-I', dest='search_dirs', default=[], action='append', metavar='DIR', help='Path(s) to search for local includes')
parser.add_argument('--show-transitive-edges', action='store_true', help='Show edges to headers that are transitively included anyway')
parser.add_argument('--show-config-headers', action='store_true', help='Show headers named __config')
parser.add_argument('--show-experimental-headers', action='store_true', help='Show headers in the experimental/ and ext/ directories')
parser.add_argument('--show-support-headers', action='store_true', help='Show headers in the __support/ directory')
parser.add_argument('--show-individual-line-counts', action='store_true', help='Include an individual line count in each node')
parser.add_argument('--show-cumulative-line-counts', action='store_true', help='Include a total line count in each node')
parser.add_argument('--error-on-file-not-found', action='store_true', help="Don't ignore failure to open an #included file")
options = parser.parse_args()
if options.root is None:
curr_dir = os.path.dirname(os.path.abspath(__file__))
options.root = os.path.join(curr_dir, '../include')
if options.search_dirs == [] and os.path.isdir(options.root):
options.search_dirs = [options.root]
options.root = os.path.abspath(options.root)
options.search_dirs = [os.path.abspath(p) for p in options.search_dirs]
if os.path.isdir(options.root):
roots = list_all_roots_under(options.root)
elif os.path.isfile(options.root):
roots = [options.root]
else:
raise RuntimeError('--root seems to be invalid')
graph = build_graph(roots, options)
# Eliminate certain kinds of "visual noise" headers, if asked for.
def should_keep(fname):
return all([
options.show_config_headers or not is_config_header(fname),
options.show_experimental_headers or not is_experimental_header(fname),
options.show_support_headers or not is_support_header(fname),
])
for fname in list(graph.keys()):
if should_keep(fname):
graph[fname].includes = [h for h in graph[fname].includes if should_keep(h)]
else:
del graph[fname]
# Look for cycles.
no_cycles_detected = True
for fname, entry in graph.items():
for h in entry.includes:
if transitively_includes(graph, h, fname):
print('Cycle detected between %s and %s' % (fname, h))
no_cycles_detected = False
assert no_cycles_detected
print(get_graphviz(graph, options))

View File

@ -1,298 +0,0 @@
#===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===----------------------------------------------------------------------===##
import platform
import os
from collections import defaultdict
import re
import libcxx.util
class DotEmitter(object):
def __init__(self, name):
self.name = name
self.node_strings = {}
self.edge_strings = []
def addNode(self, node):
res = str(node.id)
if len(node.attributes):
attr_strs = []
for k,v in node.attributes.iteritems():
attr_strs += ['%s="%s"' % (k, v)]
res += ' [ %s ]' % (', '.join(attr_strs))
res += ';'
assert node.id not in self.node_strings
self.node_strings[node.id] = res
def addEdge(self, n1, n2):
res = '%s -> %s;' % (n1.id, n2.id)
self.edge_strings += [res]
def node_key(self, n):
id = n.id
assert id.startswith('\w*\d+')
def emit(self):
node_definitions_list = []
sorted_keys = self.node_strings.keys()
sorted_keys.sort()
for k in sorted_keys:
node_definitions_list += [self.node_strings[k]]
node_definitions = '\n '.join(node_definitions_list)
edge_list = '\n '.join(self.edge_strings)
return '''
digraph "{name}" {{
{node_definitions}
{edge_list}
}}
'''.format(name=self.name, node_definitions=node_definitions, edge_list=edge_list).strip()
class DotReader(object):
def __init__(self):
self.graph = DirectedGraph(None)
def abortParse(self, msg="bad input"):
raise Exception(msg)
def parse(self, data):
lines = [l.strip() for l in data.splitlines() if l.strip()]
maxIdx = len(lines)
idx = 0
if not self.parseIntroducer(lines[idx]):
self.abortParse('failed to parse introducer')
idx += 1
while idx < maxIdx:
if self.parseNodeDefinition(lines[idx]) or self.parseEdgeDefinition(lines[idx]):
idx += 1
continue
else:
break
if idx == maxIdx or not self.parseCloser(lines[idx]):
self.abortParse("no closing } found")
return self.graph
def parseEdgeDefinition(self, l):
edge_re = re.compile('^\s*(\w+)\s+->\s+(\w+);\s*$')
m = edge_re.match(l)
if not m:
return False
n1 = m.group(1)
n2 = m.group(2)
self.graph.addEdge(n1, n2)
return True
def parseAttributes(self, raw_str):
attribute_re = re.compile('^\s*(\w+)="([^"]+)"')
parts = [l.strip() for l in raw_str.split(',') if l.strip()]
attribute_dict = {}
for a in parts:
m = attribute_re.match(a)
if not m:
self.abortParse('Bad attribute "%s"' % a)
attribute_dict[m.group(1)] = m.group(2)
return attribute_dict
def parseNodeDefinition(self, l):
node_definition_re = re.compile('^\s*(\w+)\s+\[([^\]]+)\]\s*;\s*$')
m = node_definition_re.match(l)
if not m:
return False
id = m.group(1)
attributes = self.parseAttributes(m.group(2))
n = Node(id, edges=[], attributes=attributes)
self.graph.addNode(n)
return True
def parseIntroducer(self, l):
introducer_re = re.compile('^\s*digraph "([^"]+)"\s+{\s*$')
m = introducer_re.match(l)
if not m:
return False
self.graph.setName(m.group(1))
return True
def parseCloser(self, l):
closer_re = re.compile('^\s*}\s*$')
m = closer_re.match(l)
if not m:
return False
return True
class Node(object):
def __init__(self, id, edges=[], attributes={}):
self.id = id
self.edges = set(edges)
self.attributes = dict(attributes)
def addEdge(self, dest):
self.edges.add(dest)
def __eq__(self, another):
if isinstance(another, str):
return another == self.id
return hasattr(another, 'id') and self.id == another.id
def __hash__(self):
return hash(self.id)
def __str__(self):
return self.attributes["label"]
def __repr__(self):
return self.__str__()
res = self.id
if len(self.attributes):
attr = []
for k,v in self.attributes.iteritems():
attr += ['%s="%s"' % (k, v)]
res += ' [%s ]' % (', '.join(attr))
return res
class DirectedGraph(object):
def __init__(self, name=None, nodes=None):
self.name = name
self.nodes = set() if nodes is None else set(nodes)
def setName(self, n):
self.name = n
def _getNode(self, n_or_id):
if isinstance(n_or_id, Node):
return n_or_id
return self.getNode(n_or_id)
def getNode(self, str_id):
assert isinstance(str_id, str) or isinstance(str_id, Node)
for s in self.nodes:
if s == str_id:
return s
return None
def getNodeByLabel(self, l):
found = None
for s in self.nodes:
if s.attributes['label'] == l:
assert found is None
found = s
return found
def addEdge(self, n1, n2):
n1 = self._getNode(n1)
n2 = self._getNode(n2)
assert n1 in self.nodes
assert n2 in self.nodes
n1.addEdge(n2)
def addNode(self, n):
self.nodes.add(n)
def removeNode(self, n):
n = self._getNode(n)
for other_n in self.nodes:
if other_n == n:
continue
new_edges = set()
for e in other_n.edges:
if e != n:
new_edges.add(e)
other_n.edges = new_edges
self.nodes.remove(n)
def toDot(self):
dot = DotEmitter(self.name)
for n in self.nodes:
dot.addNode(n)
for ndest in n.edges:
dot.addEdge(n, ndest)
return dot.emit()
@staticmethod
def fromDot(str):
reader = DotReader()
graph = reader.parse(str)
return graph
@staticmethod
def fromDotFile(fname):
with open(fname, 'r') as f:
return DirectedGraph.fromDot(f.read())
def toDotFile(self, fname):
with open(fname, 'w') as f:
f.write(self.toDot())
def __repr__(self):
return self.toDot()
class BFS(object):
def __init__(self, start):
self.visited = set()
self.to_visit = []
self.start = start
def __nonzero__(self):
return len(self.to_visit) != 0
def empty(self):
return len(self.to_visit) == 0
def push_back(self, node):
assert node not in self.visited
self.visited.add(node)
self.to_visit += [node]
def maybe_push_back(self, node):
if node in self.visited:
return
self.push_back(node)
def pop_front(self):
assert len(self.to_visit)
elem = self.to_visit[0]
del self.to_visit[0]
return elem
def seen(self, n):
return n in self.visited
class CycleFinder(object):
def __init__(self, graph):
self.graph = graph
def findCycleForNode(self, n):
assert n in self.graph.nodes
all_paths = {}
all_cycles = []
bfs = BFS(n)
bfs.push_back(n)
all_paths[n] = [n]
while bfs:
n = bfs.pop_front()
assert n in all_paths
for e in n.edges:
en = self.graph.getNode(e)
if not bfs.seen(en):
new_path = list(all_paths[n])
new_path.extend([en])
all_paths[en] = new_path
bfs.push_back(en)
if en == bfs.start:
all_cycles += [all_paths[n]]
return all_cycles
def findCyclesInGraph(self):
all_cycles = []
for n in self.graph.nodes:
cycle = self.findCycleForNode(n)
if cycle:
all_cycles += [(n, cycle)]
return all_cycles