2017-06-29 21:02:15 +08:00
|
|
|
#! /usr/bin/env python
|
|
|
|
|
2017-03-07 01:41:00 +08:00
|
|
|
import argparse
|
2017-03-22 06:46:46 +08:00
|
|
|
import itertools
|
2017-03-07 01:40:36 +08:00
|
|
|
import os
|
|
|
|
import re
|
2017-03-22 06:46:46 +08:00
|
|
|
import sys
|
2017-03-23 02:23:14 +08:00
|
|
|
from collections import defaultdict
|
2017-03-07 01:40:36 +08:00
|
|
|
|
|
|
|
from use_lldb_suite import lldb_root
|
|
|
|
|
2017-03-07 01:41:00 +08:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='Analyze LLDB project #include dependencies.')
|
|
|
|
parser.add_argument('--show-counts', default=False, action='store_true',
|
|
|
|
help='When true, show the number of dependencies from each subproject')
|
2017-03-21 07:54:26 +08:00
|
|
|
parser.add_argument('--discover-cycles', default=False, action='store_true',
|
|
|
|
help='When true, find and display all project dependency cycles. Note,'
|
|
|
|
'this option is very slow')
|
|
|
|
|
2017-03-07 01:41:00 +08:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2017-03-07 01:40:36 +08:00
|
|
|
src_dir = os.path.join(lldb_root, "source")
|
|
|
|
inc_dir = os.path.join(lldb_root, "include")
|
|
|
|
|
|
|
|
src_map = {}
|
|
|
|
|
2017-03-07 01:41:00 +08:00
|
|
|
include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
|
|
|
|
|
2017-03-21 07:54:26 +08:00
|
|
|
def is_sublist(small, big):
|
2017-03-22 06:46:46 +08:00
|
|
|
it = iter(big)
|
2017-03-21 07:54:26 +08:00
|
|
|
return all(c in it for c in small)
|
|
|
|
|
2017-03-07 01:41:00 +08:00
|
|
|
def normalize_host(str):
|
|
|
|
if str.startswith("lldb/Host"):
|
|
|
|
return "lldb/Host"
|
2017-03-21 07:54:26 +08:00
|
|
|
if str.startswith("Plugins"):
|
|
|
|
return "lldb/" + str
|
|
|
|
if str.startswith("lldb/../../source"):
|
|
|
|
return str.replace("lldb/../../source", "lldb")
|
2017-03-07 01:41:00 +08:00
|
|
|
return str
|
2017-03-07 01:40:36 +08:00
|
|
|
|
|
|
|
def scan_deps(this_dir, file):
|
2017-03-07 01:41:00 +08:00
|
|
|
global src_map
|
|
|
|
deps = {}
|
|
|
|
this_dir = normalize_host(this_dir)
|
|
|
|
if this_dir in src_map:
|
|
|
|
deps = src_map[this_dir]
|
|
|
|
|
2017-03-07 01:40:36 +08:00
|
|
|
with open(file) as f:
|
|
|
|
for line in list(f):
|
|
|
|
m = include_regex.match(line)
|
2017-03-07 01:41:00 +08:00
|
|
|
if m is None:
|
|
|
|
continue
|
|
|
|
relative = m.groups()[0].rstrip("/")
|
|
|
|
if relative == this_dir:
|
|
|
|
continue
|
|
|
|
relative = normalize_host(relative)
|
|
|
|
if relative in deps:
|
|
|
|
deps[relative] += 1
|
2017-03-21 07:54:26 +08:00
|
|
|
elif relative != this_dir:
|
2017-03-07 01:41:00 +08:00
|
|
|
deps[relative] = 1
|
|
|
|
if this_dir not in src_map and len(deps) > 0:
|
|
|
|
src_map[this_dir] = deps
|
2017-03-07 01:40:36 +08:00
|
|
|
|
|
|
|
for (base, dirs, files) in os.walk(inc_dir):
|
|
|
|
dir = os.path.basename(base)
|
|
|
|
relative = os.path.relpath(base, inc_dir)
|
|
|
|
inc_files = filter(lambda x : os.path.splitext(x)[1] in [".h"], files)
|
|
|
|
relative = relative.replace("\\", "/")
|
|
|
|
for inc in inc_files:
|
|
|
|
inc_path = os.path.join(base, inc)
|
2017-03-07 01:41:00 +08:00
|
|
|
scan_deps(relative, inc_path)
|
2017-03-07 01:40:36 +08:00
|
|
|
|
|
|
|
for (base, dirs, files) in os.walk(src_dir):
|
|
|
|
dir = os.path.basename(base)
|
|
|
|
relative = os.path.relpath(base, src_dir)
|
|
|
|
src_files = filter(lambda x : os.path.splitext(x)[1] in [".cpp", ".h", ".mm"], files)
|
|
|
|
norm_base_path = os.path.normpath(os.path.join("lldb", relative))
|
|
|
|
norm_base_path = norm_base_path.replace("\\", "/")
|
|
|
|
for src in src_files:
|
|
|
|
src_path = os.path.join(base, src)
|
2017-03-07 01:41:00 +08:00
|
|
|
scan_deps(norm_base_path, src_path)
|
2017-03-07 01:40:36 +08:00
|
|
|
pass
|
|
|
|
|
2017-03-21 07:54:26 +08:00
|
|
|
def is_existing_cycle(path, cycles):
|
|
|
|
# If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
|
|
|
|
# then we don't just want to check for an occurrence of A -> B -> C in the
|
|
|
|
# list of known cycles, but every possible rotation of A -> B -> C. For
|
|
|
|
# example, if we previously encountered B -> C -> A (with an implicit -> B
|
|
|
|
# at the end), then A -> B -> C is also a cycle. This is an important
|
|
|
|
# optimization which reduces the search space by multiple orders of
|
|
|
|
# magnitude.
|
2018-12-06 18:27:38 +08:00
|
|
|
for i in range(0,len(path)):
|
2017-03-21 07:54:26 +08:00
|
|
|
if any(is_sublist(x, path) for x in cycles):
|
|
|
|
return True
|
|
|
|
path = [path[-1]] + path[0:-1]
|
|
|
|
return False
|
|
|
|
|
|
|
|
def expand(path_queue, path_lengths, cycles, src_map):
|
|
|
|
# We do a breadth first search, to make sure we visit all paths in order
|
|
|
|
# of ascending length. This is an important optimization to make sure that
|
|
|
|
# short cycles are discovered first, which will allow us to discard longer
|
|
|
|
# cycles which grow the search space exponentially the longer they get.
|
|
|
|
while len(path_queue) > 0:
|
|
|
|
cur_path = path_queue.pop(0)
|
|
|
|
if is_existing_cycle(cur_path, cycles):
|
|
|
|
continue
|
|
|
|
|
|
|
|
next_len = path_lengths.pop(0) + 1
|
|
|
|
last_component = cur_path[-1]
|
2017-03-23 02:04:20 +08:00
|
|
|
|
2017-03-21 07:54:26 +08:00
|
|
|
for item in src_map[last_component]:
|
|
|
|
if item.startswith("clang"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if item in cur_path:
|
|
|
|
# This is a cycle. Minimize it and then check if the result is
|
|
|
|
# already in the list of cycles. Insert it (or not) and then
|
|
|
|
# exit.
|
|
|
|
new_index = cur_path.index(item)
|
|
|
|
cycle = cur_path[new_index:]
|
|
|
|
if not is_existing_cycle(cycle, cycles):
|
|
|
|
cycles.append(cycle)
|
|
|
|
continue
|
|
|
|
|
|
|
|
path_lengths.append(next_len)
|
|
|
|
path_queue.append(cur_path + [item])
|
|
|
|
pass
|
|
|
|
|
|
|
|
cycles = []
|
|
|
|
|
2018-12-06 18:27:38 +08:00
|
|
|
path_queue = [[x] for x in iter(src_map)]
|
2017-03-21 07:54:26 +08:00
|
|
|
path_lens = [1] * len(path_queue)
|
|
|
|
|
2018-12-06 18:27:38 +08:00
|
|
|
items = list(src_map.items())
|
|
|
|
items.sort(key = lambda A : A[0])
|
2017-03-07 01:40:36 +08:00
|
|
|
|
|
|
|
for (path, deps) in items:
|
2018-12-06 18:27:38 +08:00
|
|
|
print(path + ":")
|
|
|
|
sorted_deps = list(deps.items())
|
2017-03-07 01:41:00 +08:00
|
|
|
if args.show_counts:
|
2018-12-06 18:27:38 +08:00
|
|
|
sorted_deps.sort(key = lambda A: (A[1], A[0]))
|
2017-03-07 01:41:00 +08:00
|
|
|
for dep in sorted_deps:
|
2018-12-06 18:27:38 +08:00
|
|
|
print("\t{} [{}]".format(dep[0], dep[1]))
|
2017-03-07 01:41:00 +08:00
|
|
|
else:
|
2018-12-06 18:27:38 +08:00
|
|
|
sorted_deps.sort(key = lambda A: A[0])
|
2017-03-07 01:41:00 +08:00
|
|
|
for dep in sorted_deps:
|
2018-12-06 18:27:38 +08:00
|
|
|
print("\t{}".format(dep[0]))
|
2017-03-21 07:54:26 +08:00
|
|
|
|
2017-03-22 06:46:46 +08:00
|
|
|
def iter_cycles(cycles):
|
|
|
|
global src_map
|
|
|
|
for cycle in cycles:
|
|
|
|
cycle.append(cycle[0])
|
|
|
|
zipper = list(zip(cycle[0:-1], cycle[1:]))
|
|
|
|
result = [(x, src_map[x][y], y) for (x,y) in zipper]
|
|
|
|
total = 0
|
|
|
|
smallest = result[0][1]
|
|
|
|
for (first, value, last) in result:
|
|
|
|
total += value
|
|
|
|
smallest = min(smallest, value)
|
|
|
|
yield (total, smallest, result)
|
|
|
|
|
2017-03-21 07:54:26 +08:00
|
|
|
if args.discover_cycles:
|
2018-12-06 18:27:38 +08:00
|
|
|
print("Analyzing cycles...")
|
2017-03-21 07:54:26 +08:00
|
|
|
|
|
|
|
expand(path_queue, path_lens, cycles, src_map)
|
|
|
|
|
|
|
|
average = sum([len(x)+1 for x in cycles]) / len(cycles)
|
|
|
|
|
2018-12-06 18:27:38 +08:00
|
|
|
print("Found {} cycles. Average cycle length = {}.".format(len(cycles), average))
|
2017-03-23 02:23:14 +08:00
|
|
|
counted = list(iter_cycles(cycles))
|
2017-03-22 06:46:46 +08:00
|
|
|
if args.show_counts:
|
2018-12-06 18:27:38 +08:00
|
|
|
counted.sort(key = lambda A: A[0])
|
2017-03-22 06:46:46 +08:00
|
|
|
for (total, smallest, cycle) in counted:
|
|
|
|
sys.stdout.write("{} deps to break: ".format(total))
|
|
|
|
sys.stdout.write(cycle[0][0])
|
|
|
|
for (first, count, last) in cycle:
|
|
|
|
sys.stdout.write(" [{}->] {}".format(count, last))
|
|
|
|
sys.stdout.write("\n")
|
|
|
|
else:
|
|
|
|
for cycle in cycles:
|
|
|
|
cycle.append(cycle[0])
|
2018-12-06 18:27:38 +08:00
|
|
|
print(" -> ".join(cycle))
|
2017-03-23 02:04:20 +08:00
|
|
|
|
2018-12-06 18:27:38 +08:00
|
|
|
print("Analyzing islands...")
|
2017-03-23 02:04:20 +08:00
|
|
|
islands = []
|
2017-03-23 02:23:14 +08:00
|
|
|
outgoing_counts = defaultdict(int)
|
|
|
|
incoming_counts = defaultdict(int)
|
|
|
|
for (total, smallest, cycle) in counted:
|
|
|
|
for (first, count, last) in cycle:
|
|
|
|
outgoing_counts[first] += count
|
|
|
|
incoming_counts[last] += count
|
2017-03-23 02:04:20 +08:00
|
|
|
for cycle in cycles:
|
|
|
|
this_cycle = set(cycle)
|
|
|
|
disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
|
|
|
|
overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
|
|
|
|
islands = disjoints + [set.union(this_cycle, *overlaps)]
|
2018-12-06 18:27:38 +08:00
|
|
|
print("Found {} disjoint cycle islands...".format(len(islands)))
|
2017-03-23 02:04:20 +08:00
|
|
|
for island in islands:
|
2018-12-06 18:27:38 +08:00
|
|
|
print("Island ({} elements)".format(len(island)))
|
2017-03-23 02:23:14 +08:00
|
|
|
sorted = []
|
2017-03-23 02:04:20 +08:00
|
|
|
for node in island:
|
2017-03-23 02:23:14 +08:00
|
|
|
sorted.append((node, incoming_counts[node], outgoing_counts[node]))
|
2018-12-06 18:27:38 +08:00
|
|
|
sorted.sort(key = lambda x: x[1]+x[2])
|
2017-03-23 02:23:14 +08:00
|
|
|
for (node, inc, outg) in sorted:
|
2018-12-06 18:27:38 +08:00
|
|
|
print(" {} [{} in, {} out]".format(node, inc, outg))
|
2017-03-22 06:46:46 +08:00
|
|
|
sys.stdout.flush()
|
2017-06-29 21:02:15 +08:00
|
|
|
pass
|