diff --git a/libcxx/utils/graph_header_deps.py b/libcxx/utils/graph_header_deps.py new file mode 100755 index 000000000000..b6f0a250ccef --- /dev/null +++ b/libcxx/utils/graph_header_deps.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +from argparse import ArgumentParser +import os +import shutil +import sys +import shlex +import json +import re +import libcxx.graph as dot +import libcxx.util + +def print_and_exit(msg): + sys.stderr.write(msg + '\n') + sys.exit(1) + +def libcxx_include_path(): + curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + include_dir = os.path.join(curr_dir, 'include') + return include_dir + +def get_libcxx_headers(): + headers = [] + include_dir = libcxx_include_path() + for fname in os.listdir(include_dir): + f = os.path.join(include_dir, fname) + if not os.path.isfile(f): + continue + base, ext = os.path.splitext(fname) + if (ext == '' or ext == '.h') and (not fname.startswith('__') or fname == '__config'): + headers += [f] + return headers + + +def rename_headers_and_remove_test_root(graph): + inc_root = libcxx_include_path() + to_remove = set() + for n in graph.nodes: + assert 'label' in n.attributes + l = n.attributes['label'] + if not l.startswith('/') and os.path.exists(os.path.join('/', l)): + l = '/' + l + if l.endswith('.tmp.cpp'): + to_remove.add(n) + if l.startswith(inc_root): + l = l[len(inc_root):] + if l.startswith('/'): + l = l[1:] + n.attributes['label'] = l + for n in to_remove: + graph.removeNode(n) + +def remove_non_std_headers(graph): + inc_root = libcxx_include_path() + to_remove = set() + for n in graph.nodes: + test_file = os.path.join(inc_root, n.attributes['label']) + if not test_file.startswith(inc_root): + to_remove.add(n) + for xn in to_remove: + graph.removeNode(xn) + +class DependencyCommand(object): + def __init__(self, compile_commands, output_dir, new_std=None): + output_dir = os.path.abspath(output_dir) + if not os.path.isdir(output_dir): + print_and_exit('"%s" must point to a directory' % output_dir) + self.output_dir = output_dir + self.new_std = new_std + cwd,bcmd = self._get_base_command(compile_commands) + self.cwd = cwd + self.base_cmd = bcmd + + def run_for_headers(self, header_list): + outputs = [] + for header in header_list: + header_name = os.path.basename(header) + out = os.path.join(self.output_dir, ('%s.dot' % header_name)) + outputs += [out] + cmd = self.base_cmd + ["-fsyntax-only", "-Xclang", "-dependency-dot", "-Xclang", "%s" % out, '-xc++', '-'] + libcxx.util.executeCommandOrDie(cmd, cwd=self.cwd, input='#include <%s>\n\n' % header_name) + return outputs + + def _get_base_command(self, command_file): + commands = None + with open(command_file, 'r') as f: + commands = json.load(f) + for compile_cmd in commands: + file = compile_cmd['file'] + if not file.endswith('src/algorithm.cpp'): + continue + wd = compile_cmd['directory'] + cmd_str = compile_cmd['command'] + cmd = shlex.split(cmd_str) + out_arg = cmd.index('-o') + del cmd[out_arg] + del cmd[out_arg] + in_arg = cmd.index('-c') + del cmd[in_arg] + del cmd[in_arg] + if self.new_std is not None: + for f in cmd: + if f.startswith('-std='): + del cmd[cmd.index(f)] + cmd += [self.new_std] + break + return wd, cmd + print_and_exit("failed to find command to build algorithm.cpp") + +def post_process_outputs(outputs, libcxx_only): + graphs = [] + for dot_file in outputs: + g = dot.DirectedGraph.fromDotFile(dot_file) + rename_headers_and_remove_test_root(g) + if libcxx_only: + remove_non_std_headers(g) + graphs += [g] + g.toDotFile(dot_file) + return graphs + +def build_canonical_names(graphs): + canonical_names = {} + next_idx = 0 + for g in graphs: + for n in g.nodes: + if n.attributes['label'] not in canonical_names: + name = 'header_%d' % next_idx + next_idx += 1 + canonical_names[n.attributes['label']] = name + return canonical_names + + + +class CanonicalGraphBuilder(object): + def __init__(self, graphs): + self.graphs = list(graphs) + self.canonical_names = build_canonical_names(graphs) + + def build(self): + self.canonical = dot.DirectedGraph('all_headers') + for k,v in self.canonical_names.iteritems(): + n = dot.Node(v, edges=[], attributes={'shape': 'box', 'label': k}) + self.canonical.addNode(n) + for g in self.graphs: + self._merge_graph(g) + return self.canonical + + def _merge_graph(self, g): + for n in g.nodes: + new_name = self.canonical.getNodeByLabel(n.attributes['label']).id + for e in n.edges: + to_node = self.canonical.getNodeByLabel(e.attributes['label']).id + self.canonical.addEdge(new_name, to_node) + + +def main(): + parser = ArgumentParser( + description="Generate a graph of libc++ header dependencies") + parser.add_argument( + '-v', '--verbose', dest='verbose', action='store_true', default=False) + parser.add_argument( + '-o', '--output', dest='output', required=True, + help='The output file. stdout is used if not given', + type=str, action='store') + parser.add_argument( + '--no-compile', dest='no_compile', action='store_true', default=False) + parser.add_argument( + '--libcxx-only', dest='libcxx_only', action='store_true', default=False) + parser.add_argument( + 'compile_commands', metavar='compile-commands-file', + help='the compile commands database') + + args = parser.parse_args() + builder = DependencyCommand(args.compile_commands, args.output, new_std='-std=c++2a') + if not args.no_compile: + outputs = builder.run_for_headers(get_libcxx_headers()) + graphs = post_process_outputs(outputs, args.libcxx_only) + else: + outputs = [os.path.join(args.output, l) for l in os.listdir(args.output) if not l.endswith('all_headers.dot')] + graphs = [dot.DirectedGraph.fromDotFile(o) for o in outputs] + + canon = CanonicalGraphBuilder(graphs).build() + canon.toDotFile(os.path.join(args.output, 'all_headers.dot')) + all_graphs = graphs + [canon] + + found_cycles = False + for g in all_graphs: + cycle_finder = dot.CycleFinder(g) + all_cycles = cycle_finder.findCyclesInGraph() + if len(all_cycles): + found_cycles = True + print("cycle in graph %s" % g.name) + for start, path in all_cycles: + print("Cycle for %s = %s" % (start, path)) + if not found_cycles: + print("No cycles found") + + + +if __name__ == '__main__': + main() diff --git a/libcxx/utils/libcxx/graph.py b/libcxx/utils/libcxx/graph.py new file mode 100644 index 000000000000..681d3ad2568f --- /dev/null +++ b/libcxx/utils/libcxx/graph.py @@ -0,0 +1,298 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +import platform +import os +from collections import defaultdict +import re +import libcxx.util + + +class DotEmitter(object): + def __init__(self, name): + self.name = name + self.node_strings = {} + self.edge_strings = [] + + def addNode(self, node): + res = str(node.id) + if len(node.attributes): + attr_strs = [] + for k,v in node.attributes.iteritems(): + attr_strs += ['%s="%s"' % (k, v)] + res += ' [ %s ]' % (', '.join(attr_strs)) + res += ';' + assert node.id not in self.node_strings + self.node_strings[node.id] = res + + def addEdge(self, n1, n2): + res = '%s -> %s;' % (n1.id, n2.id) + self.edge_strings += [res] + + def node_key(self, n): + id = n.id + assert id.startswith('\w*\d+') + + def emit(self): + node_definitions_list = [] + sorted_keys = self.node_strings.keys() + sorted_keys.sort() + for k in sorted_keys: + node_definitions_list += [self.node_strings[k]] + node_definitions = '\n '.join(node_definitions_list) + edge_list = '\n '.join(self.edge_strings) + return ''' +digraph "{name}" {{ + {node_definitions} + {edge_list} +}} +'''.format(name=self.name, node_definitions=node_definitions, edge_list=edge_list).strip() + + +class DotReader(object): + def __init__(self): + self.graph = DirectedGraph(None) + + def abortParse(self, msg="bad input"): + raise Exception(msg) + + def parse(self, data): + lines = [l.strip() for l in data.splitlines() if l.strip()] + maxIdx = len(lines) + idx = 0 + if not self.parseIntroducer(lines[idx]): + self.abortParse('failed to parse introducer') + idx += 1 + while idx < maxIdx: + if self.parseNodeDefinition(lines[idx]) or self.parseEdgeDefinition(lines[idx]): + idx += 1 + continue + else: + break + if idx == maxIdx or not self.parseCloser(lines[idx]): + self.abortParse("no closing } found") + return self.graph + + def parseEdgeDefinition(self, l): + edge_re = re.compile('^\s*(\w+)\s+->\s+(\w+);\s*$') + m = edge_re.match(l) + if not m: + return False + n1 = m.group(1) + n2 = m.group(2) + self.graph.addEdge(n1, n2) + return True + + def parseAttributes(self, raw_str): + attribute_re = re.compile('^\s*(\w+)="([^"]+)"') + parts = [l.strip() for l in raw_str.split(',') if l.strip()] + attribute_dict = {} + for a in parts: + m = attribute_re.match(a) + if not m: + self.abortParse('Bad attribute "%s"' % a) + attribute_dict[m.group(1)] = m.group(2) + return attribute_dict + + def parseNodeDefinition(self, l): + node_definition_re = re.compile('^\s*(\w+)\s+\[([^\]]+)\]\s*;\s*$') + m = node_definition_re.match(l) + if not m: + return False + id = m.group(1) + attributes = self.parseAttributes(m.group(2)) + n = Node(id, edges=[], attributes=attributes) + self.graph.addNode(n) + return True + + def parseIntroducer(self, l): + introducer_re = re.compile('^\s*digraph "([^"]+)"\s+{\s*$') + m = introducer_re.match(l) + if not m: + return False + self.graph.setName(m.group(1)) + return True + + def parseCloser(self, l): + closer_re = re.compile('^\s*}\s*$') + m = closer_re.match(l) + if not m: + return False + return True + +class Node(object): + def __init__(self, id, edges=[], attributes={}): + self.id = id + self.edges = set(edges) + self.attributes = dict(attributes) + + def addEdge(self, dest): + self.edges.add(dest) + + def __eq__(self, another): + if isinstance(another, str): + return another == self.id + return hasattr(another, 'id') and self.id == another.id + + def __hash__(self): + return hash(self.id) + + def __str__(self): + return self.attributes["label"] + + def __repr__(self): + return self.__str__() + res = self.id + if len(self.attributes): + attr = [] + for k,v in self.attributes.iteritems(): + attr += ['%s="%s"' % (k, v)] + res += ' [%s ]' % (', '.join(attr)) + return res + +class DirectedGraph(object): + def __init__(self, name=None, nodes=None): + self.name = name + self.nodes = set() if nodes is None else set(nodes) + + def setName(self, n): + self.name = n + + def _getNode(self, n_or_id): + if isinstance(n_or_id, Node): + return n_or_id + return self.getNode(n_or_id) + + def getNode(self, str_id): + assert isinstance(str_id, str) or isinstance(str_id, Node) + for s in self.nodes: + if s == str_id: + return s + return None + + def getNodeByLabel(self, l): + found = None + for s in self.nodes: + if s.attributes['label'] == l: + assert found is None + found = s + return found + + def addEdge(self, n1, n2): + n1 = self._getNode(n1) + n2 = self._getNode(n2) + assert n1 in self.nodes + assert n2 in self.nodes + n1.addEdge(n2) + + def addNode(self, n): + self.nodes.add(n) + + def removeNode(self, n): + n = self._getNode(n) + for other_n in self.nodes: + if other_n == n: + continue + new_edges = set() + for e in other_n.edges: + if e != n: + new_edges.add(e) + other_n.edges = new_edges + self.nodes.remove(n) + + def toDot(self): + dot = DotEmitter(self.name) + for n in self.nodes: + dot.addNode(n) + for ndest in n.edges: + dot.addEdge(n, ndest) + return dot.emit() + + @staticmethod + def fromDot(str): + reader = DotReader() + graph = reader.parse(str) + return graph + + @staticmethod + def fromDotFile(fname): + with open(fname, 'r') as f: + return DirectedGraph.fromDot(f.read()) + + def toDotFile(self, fname): + with open(fname, 'w') as f: + f.write(self.toDot()) + + def __repr__(self): + return self.toDot() + +class BFS(object): + def __init__(self, start): + self.visited = set() + self.to_visit = [] + self.start = start + + def __nonzero__(self): + return len(self.to_visit) != 0 + + def empty(self): + return len(self.to_visit) == 0 + + def push_back(self, node): + assert node not in self.visited + self.visited.add(node) + self.to_visit += [node] + + def maybe_push_back(self, node): + if node in self.visited: + return + self.push_back(node) + + def pop_front(self): + assert len(self.to_visit) + elem = self.to_visit[0] + del self.to_visit[0] + return elem + + def seen(self, n): + return n in self.visited + + + +class CycleFinder(object): + def __init__(self, graph): + self.graph = graph + + def findCycleForNode(self, n): + assert n in self.graph.nodes + all_paths = {} + all_cycles = [] + bfs = BFS(n) + bfs.push_back(n) + all_paths[n] = [n] + while bfs: + n = bfs.pop_front() + assert n in all_paths + for e in n.edges: + en = self.graph.getNode(e) + if not bfs.seen(en): + new_path = list(all_paths[n]) + new_path.extend([en]) + all_paths[en] = new_path + bfs.push_back(en) + if en == bfs.start: + all_cycles += [all_paths[n]] + return all_cycles + + def findCyclesInGraph(self): + all_cycles = [] + for n in self.graph.nodes: + cycle = self.findCycleForNode(n) + if cycle: + all_cycles += [(n, cycle)] + return all_cycles diff --git a/libcxx/utils/libcxx/util.py b/libcxx/utils/libcxx/util.py index 2fd95232abb4..8c93f392ed32 100644 --- a/libcxx/utils/libcxx/util.py +++ b/libcxx/utils/libcxx/util.py @@ -286,3 +286,16 @@ def executeCommandVerbose(cmd, *args, **kwargs): report += "\n\nFailed!" sys.stderr.write('%s\n' % report) return out, err, exitCode + + +def executeCommandOrDie(cmd, *args, **kwargs): + """ + Execute a command and print its output on failure. + """ + out, err, exitCode = executeCommand(cmd, *args, **kwargs) + if exitCode != 0: + report = makeReport(cmd, out, err, exitCode) + report += "\n\nFailed!" + sys.stderr.write('%s\n' % report) + sys.exit(exitCode) + return out, err, exitCode