[libc++] Add utility to generate and display libc++'s header dependency

graph.
2020-02-15 18:26:29 -05:00 · 2020-02-15 18:26:29 -05:00 · 99382e450f
parent 5d22b6a87f
commit 99382e450f
3 changed files with 519 additions and 0 deletions
--- a/libcxx/utils/graph_header_deps.py
+++ b/libcxx/utils/graph_header_deps.py
@ -0,0 +1,208 @@
+#!/usr/bin/env python
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+from argparse import ArgumentParser
+import os
+import shutil
+import sys
+import shlex
+import json
+import re
+import libcxx.graph as dot
+import libcxx.util
+
+def print_and_exit(msg):
+    sys.stderr.write(msg + '\n')
+    sys.exit(1)
+
+def libcxx_include_path():
+    curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    include_dir = os.path.join(curr_dir, 'include')
+    return include_dir
+
+def get_libcxx_headers():
+    headers = []
+    include_dir = libcxx_include_path()
+    for fname in os.listdir(include_dir):
+        f = os.path.join(include_dir, fname)
+        if not os.path.isfile(f):
+            continue
+        base, ext = os.path.splitext(fname)
+        if (ext == '' or ext == '.h') and (not fname.startswith('__') or fname == '__config'):
+            headers += [f]
+    return headers
+
+
+def rename_headers_and_remove_test_root(graph):
+    inc_root = libcxx_include_path()
+    to_remove = set()
+    for n in graph.nodes:
+        assert 'label' in n.attributes
+        l = n.attributes['label']
+        if not l.startswith('/') and os.path.exists(os.path.join('/', l)):
+            l = '/' + l
+        if l.endswith('.tmp.cpp'):
+            to_remove.add(n)
+        if l.startswith(inc_root):
+            l = l[len(inc_root):]
+            if l.startswith('/'):
+                l = l[1:]
+        n.attributes['label'] = l
+    for n in to_remove:
+        graph.removeNode(n)
+
+def remove_non_std_headers(graph):
+    inc_root = libcxx_include_path()
+    to_remove = set()
+    for n in graph.nodes:
+        test_file = os.path.join(inc_root, n.attributes['label'])
+        if not test_file.startswith(inc_root):
+            to_remove.add(n)
+    for xn in to_remove:
+        graph.removeNode(xn)
+
+class DependencyCommand(object):
+    def __init__(self, compile_commands, output_dir, new_std=None):
+        output_dir = os.path.abspath(output_dir)
+        if not os.path.isdir(output_dir):
+            print_and_exit('"%s" must point to a directory' % output_dir)
+        self.output_dir = output_dir
+        self.new_std = new_std
+        cwd,bcmd =  self._get_base_command(compile_commands)
+        self.cwd = cwd
+        self.base_cmd = bcmd
+
+    def run_for_headers(self, header_list):
+        outputs = []
+        for header in header_list:
+            header_name = os.path.basename(header)
+            out = os.path.join(self.output_dir, ('%s.dot' % header_name))
+            outputs += [out]
+            cmd =  self.base_cmd + ["-fsyntax-only", "-Xclang", "-dependency-dot", "-Xclang", "%s" % out, '-xc++', '-']
+            libcxx.util.executeCommandOrDie(cmd, cwd=self.cwd, input='#include <%s>\n\n' % header_name)
+        return outputs
+
+    def _get_base_command(self, command_file):
+        commands = None
+        with open(command_file, 'r') as f:
+            commands = json.load(f)
+        for compile_cmd in commands:
+            file = compile_cmd['file']
+            if not file.endswith('src/algorithm.cpp'):
+                continue
+            wd = compile_cmd['directory']
+            cmd_str = compile_cmd['command']
+            cmd = shlex.split(cmd_str)
+            out_arg = cmd.index('-o')
+            del cmd[out_arg]
+            del cmd[out_arg]
+            in_arg = cmd.index('-c')
+            del cmd[in_arg]
+            del cmd[in_arg]
+            if self.new_std is not None:
+                for f in cmd:
+                    if f.startswith('-std='):
+                        del cmd[cmd.index(f)]
+                        cmd += [self.new_std]
+                        break
+            return wd, cmd
+        print_and_exit("failed to find command to build algorithm.cpp")
+
+def post_process_outputs(outputs, libcxx_only):
+    graphs = []
+    for dot_file in outputs:
+        g = dot.DirectedGraph.fromDotFile(dot_file)
+        rename_headers_and_remove_test_root(g)
+        if libcxx_only:
+            remove_non_std_headers(g)
+        graphs += [g]
+        g.toDotFile(dot_file)
+    return graphs
+
+def build_canonical_names(graphs):
+    canonical_names = {}
+    next_idx = 0
+    for g in graphs:
+        for n in g.nodes:
+            if n.attributes['label'] not in canonical_names:
+                name = 'header_%d' % next_idx
+                next_idx += 1
+                canonical_names[n.attributes['label']] = name
+    return canonical_names
+
+
+
+class CanonicalGraphBuilder(object):
+    def __init__(self, graphs):
+        self.graphs = list(graphs)
+        self.canonical_names = build_canonical_names(graphs)
+
+    def build(self):
+        self.canonical = dot.DirectedGraph('all_headers')
+        for k,v in self.canonical_names.iteritems():
+            n = dot.Node(v, edges=[], attributes={'shape': 'box', 'label': k})
+            self.canonical.addNode(n)
+        for g in self.graphs:
+            self._merge_graph(g)
+        return self.canonical
+
+    def _merge_graph(self, g):
+        for n in g.nodes:
+            new_name = self.canonical.getNodeByLabel(n.attributes['label']).id
+            for e in n.edges:
+                to_node = self.canonical.getNodeByLabel(e.attributes['label']).id
+                self.canonical.addEdge(new_name, to_node)
+
+
+def main():
+    parser = ArgumentParser(
+        description="Generate a graph of libc++ header dependencies")
+    parser.add_argument(
+        '-v', '--verbose', dest='verbose', action='store_true', default=False)
+    parser.add_argument(
+        '-o', '--output', dest='output', required=True,
+        help='The output file. stdout is used if not given',
+        type=str, action='store')
+    parser.add_argument(
+        '--no-compile', dest='no_compile', action='store_true', default=False)
+    parser.add_argument(
+        '--libcxx-only', dest='libcxx_only', action='store_true', default=False)
+    parser.add_argument(
+        'compile_commands', metavar='compile-commands-file',
+        help='the compile commands database')
+
+    args = parser.parse_args()
+    builder = DependencyCommand(args.compile_commands, args.output, new_std='-std=c++2a')
+    if not args.no_compile:
+        outputs = builder.run_for_headers(get_libcxx_headers())
+        graphs = post_process_outputs(outputs, args.libcxx_only)
+    else:
+        outputs = [os.path.join(args.output, l) for l in os.listdir(args.output) if not l.endswith('all_headers.dot')]
+        graphs = [dot.DirectedGraph.fromDotFile(o) for o in outputs]
+
+    canon = CanonicalGraphBuilder(graphs).build()
+    canon.toDotFile(os.path.join(args.output, 'all_headers.dot'))
+    all_graphs = graphs + [canon]
+
+    found_cycles = False
+    for g in all_graphs:
+        cycle_finder = dot.CycleFinder(g)
+        all_cycles = cycle_finder.findCyclesInGraph()
+        if len(all_cycles):
+            found_cycles = True
+            print("cycle in graph %s" % g.name)
+            for start, path in all_cycles:
+                print("Cycle for %s = %s" % (start, path))
+    if not found_cycles:
+        print("No cycles found")
+
+
+
+if __name__ == '__main__':
+    main()
--- a/libcxx/utils/libcxx/graph.py
+++ b/libcxx/utils/libcxx/graph.py
@ -0,0 +1,298 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+import platform
+import os
+from collections import defaultdict
+import re
+import libcxx.util
+
+
+class DotEmitter(object):
+  def __init__(self, name):
+    self.name = name
+    self.node_strings = {}
+    self.edge_strings = []
+
+  def addNode(self, node):
+    res = str(node.id)
+    if len(node.attributes):
+      attr_strs = []
+      for k,v in node.attributes.iteritems():
+        attr_strs += ['%s="%s"' % (k, v)]
+      res += ' [ %s ]' % (', '.join(attr_strs))
+    res += ';'
+    assert node.id not in self.node_strings
+    self.node_strings[node.id] = res
+
+  def addEdge(self, n1, n2):
+    res = '%s -> %s;' % (n1.id, n2.id)
+    self.edge_strings += [res]
+
+  def node_key(self, n):
+    id = n.id
+    assert id.startswith('\w*\d+')
+
+  def emit(self):
+    node_definitions_list = []
+    sorted_keys = self.node_strings.keys()
+    sorted_keys.sort()
+    for k in sorted_keys:
+      node_definitions_list += [self.node_strings[k]]
+    node_definitions = '\n  '.join(node_definitions_list)
+    edge_list = '\n  '.join(self.edge_strings)
+    return '''
+digraph "{name}" {{
+  {node_definitions}
+  {edge_list}
+}}    
+'''.format(name=self.name, node_definitions=node_definitions, edge_list=edge_list).strip()
+
+
+class DotReader(object):
+  def __init__(self):
+    self.graph = DirectedGraph(None)
+
+  def abortParse(self, msg="bad input"):
+    raise Exception(msg)
+
+  def parse(self, data):
+    lines = [l.strip() for l in data.splitlines() if l.strip()]
+    maxIdx = len(lines)
+    idx = 0
+    if not self.parseIntroducer(lines[idx]):
+      self.abortParse('failed to parse introducer')
+    idx += 1
+    while idx < maxIdx:
+      if self.parseNodeDefinition(lines[idx]) or self.parseEdgeDefinition(lines[idx]):
+        idx += 1
+        continue
+      else:
+        break
+    if idx == maxIdx or not self.parseCloser(lines[idx]):
+      self.abortParse("no closing } found")
+    return self.graph
+
+  def parseEdgeDefinition(self, l):
+    edge_re = re.compile('^\s*(\w+)\s+->\s+(\w+);\s*$')
+    m = edge_re.match(l)
+    if not m:
+      return False
+    n1 = m.group(1)
+    n2 = m.group(2)
+    self.graph.addEdge(n1, n2)
+    return True
+
+  def parseAttributes(self, raw_str):
+    attribute_re = re.compile('^\s*(\w+)="([^"]+)"')
+    parts = [l.strip() for l in raw_str.split(',') if l.strip()]
+    attribute_dict = {}
+    for a in parts:
+      m = attribute_re.match(a)
+      if not m:
+        self.abortParse('Bad attribute "%s"' % a)
+      attribute_dict[m.group(1)] = m.group(2)
+    return attribute_dict
+
+  def parseNodeDefinition(self, l):
+    node_definition_re = re.compile('^\s*(\w+)\s+\[([^\]]+)\]\s*;\s*$')
+    m = node_definition_re.match(l)
+    if not m:
+      return False
+    id = m.group(1)
+    attributes = self.parseAttributes(m.group(2))
+    n = Node(id, edges=[], attributes=attributes)
+    self.graph.addNode(n)
+    return True
+
+  def parseIntroducer(self, l):
+    introducer_re = re.compile('^\s*digraph "([^"]+)"\s+{\s*$')
+    m = introducer_re.match(l)
+    if not m:
+      return False
+    self.graph.setName(m.group(1))
+    return True
+
+  def parseCloser(self, l):
+    closer_re = re.compile('^\s*}\s*$')
+    m = closer_re.match(l)
+    if not m:
+      return False
+    return True
+
+class Node(object):
+  def __init__(self, id, edges=[], attributes={}):
+    self.id = id
+    self.edges = set(edges)
+    self.attributes = dict(attributes)
+
+  def addEdge(self, dest):
+    self.edges.add(dest)
+
+  def __eq__(self, another):
+    if isinstance(another, str):
+      return another == self.id
+    return hasattr(another, 'id') and self.id == another.id
+
+  def __hash__(self):
+    return hash(self.id)
+
+  def __str__(self):
+    return self.attributes["label"]
+
+  def __repr__(self):
+    return self.__str__()
+    res = self.id
+    if len(self.attributes):
+      attr = []
+      for k,v in self.attributes.iteritems():
+        attr += ['%s="%s"' % (k, v)]
+      res += ' [%s ]' % (', '.join(attr))
+    return res
+
+class DirectedGraph(object):
+  def __init__(self, name=None, nodes=None):
+    self.name = name
+    self.nodes = set() if nodes is None else set(nodes)
+
+  def setName(self, n):
+    self.name = n
+
+  def _getNode(self, n_or_id):
+    if isinstance(n_or_id, Node):
+      return n_or_id
+    return self.getNode(n_or_id)
+
+  def getNode(self, str_id):
+    assert isinstance(str_id, str) or isinstance(str_id, Node)
+    for s in self.nodes:
+      if s == str_id:
+        return s
+    return None
+
+  def getNodeByLabel(self, l):
+    found = None
+    for s in self.nodes:
+      if s.attributes['label'] == l:
+        assert found is None
+        found = s
+    return found
+
+  def addEdge(self, n1, n2):
+    n1 = self._getNode(n1)
+    n2 = self._getNode(n2)
+    assert n1 in self.nodes
+    assert n2 in self.nodes
+    n1.addEdge(n2)
+
+  def addNode(self, n):
+    self.nodes.add(n)
+
+  def removeNode(self, n):
+    n = self._getNode(n)
+    for other_n in self.nodes:
+      if other_n == n:
+        continue
+      new_edges = set()
+      for e in other_n.edges:
+        if e != n:
+          new_edges.add(e)
+      other_n.edges = new_edges
+    self.nodes.remove(n)
+
+  def toDot(self):
+    dot = DotEmitter(self.name)
+    for n in self.nodes:
+      dot.addNode(n)
+      for ndest in n.edges:
+        dot.addEdge(n, ndest)
+    return dot.emit()
+
+  @staticmethod
+  def fromDot(str):
+    reader = DotReader()
+    graph = reader.parse(str)
+    return graph
+
+  @staticmethod
+  def fromDotFile(fname):
+    with open(fname, 'r') as f:
+      return DirectedGraph.fromDot(f.read())
+
+  def toDotFile(self, fname):
+    with open(fname, 'w') as f:
+      f.write(self.toDot())
+
+  def __repr__(self):
+    return self.toDot()
+
+class BFS(object):
+  def __init__(self, start):
+    self.visited = set()
+    self.to_visit = []
+    self.start = start
+
+  def __nonzero__(self):
+    return len(self.to_visit) != 0
+
+  def empty(self):
+    return len(self.to_visit) == 0
+
+  def push_back(self, node):
+    assert node not in self.visited
+    self.visited.add(node)
+    self.to_visit += [node]
+
+  def maybe_push_back(self, node):
+    if node in self.visited:
+      return
+    self.push_back(node)
+
+  def pop_front(self):
+    assert len(self.to_visit)
+    elem = self.to_visit[0]
+    del self.to_visit[0]
+    return elem
+
+  def seen(self, n):
+    return n in self.visited
+
+
+
+class CycleFinder(object):
+  def __init__(self, graph):
+    self.graph = graph
+
+  def findCycleForNode(self, n):
+    assert n in self.graph.nodes
+    all_paths = {}
+    all_cycles = []
+    bfs = BFS(n)
+    bfs.push_back(n)
+    all_paths[n] = [n]
+    while bfs:
+      n = bfs.pop_front()
+      assert n in all_paths
+      for e in n.edges:
+        en = self.graph.getNode(e)
+        if not bfs.seen(en):
+          new_path = list(all_paths[n])
+          new_path.extend([en])
+          all_paths[en] = new_path
+          bfs.push_back(en)
+        if en == bfs.start:
+          all_cycles += [all_paths[n]]
+    return all_cycles
+
+  def findCyclesInGraph(self):
+    all_cycles = []
+    for n in self.graph.nodes:
+      cycle = self.findCycleForNode(n)
+      if cycle:
+        all_cycles += [(n, cycle)]
+    return all_cycles
--- a/libcxx/utils/libcxx/util.py
+++ b/libcxx/utils/libcxx/util.py
@ -286,3 +286,16 @@ def executeCommandVerbose(cmd, *args, **kwargs):
        report += "\n\nFailed!"
        sys.stderr.write('%s\n' % report)
    return out, err, exitCode
+
+
+def executeCommandOrDie(cmd, *args, **kwargs):
+    """
+    Execute a command and print its output on failure.
+    """
+    out, err, exitCode = executeCommand(cmd, *args, **kwargs)
+    if exitCode != 0:
+        report = makeReport(cmd, out, err, exitCode)
+        report += "\n\nFailed!"
+        sys.stderr.write('%s\n' % report)
+        sys.exit(exitCode)
+    return out, err, exitCode