[libc++] Rewrites graph_header_deps.py.

The new version is a lot simpler and has less option which were not
used. This uses the CSV files as generated by D133127 as input data.

The current Python script has more features but uses a simple "grep"
making the output less accurate:
- Conditionally included header are always included. This is an issue
  since part of our includes are unneeded transitive includes. Based on
  the language version they may be omitted. The script however always
  includes them.
- Includes in comments are processed as-if they are includes. This is an
  issue when comments explain how certain data is generated; of course
  there are digraphs which the script omits.

This implementation uses Clang's --trace-includes to generate the includes
per header. This means the input of the generation script always has the
real list of includes.

Libc++ is moving from large monolithic Standard headers to more fine
grained headers. For example, algorithm includes every header in
`__algorithm`. Adding all these detail headers in the graph makes the
output unusable. Instead it only shows the Standard headers. The
transitive includes of the detail headers are parsed and "attributed" to
the Standard header including them. This gives an accurate include graph
without the unneeded clutter. Note that this graph is still big.

This changes fixes the cyclic dependency issue with the previous version
of the tool so the markers and its documentation is removed.

Since the input has no cycles the CI test is removed.

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D134188
This commit is contained in:
Mark de Wever 2022-09-19 17:37:35 +02:00
parent dcc756d03e
commit ddb6c2b301
9 changed files with 34 additions and 235 deletions

View File

@ -49,19 +49,6 @@ include transitive headers, regardless of the language version. This can be
useful for users to aid the transition to a newer language version, or by users
who simply want to make sure they include what they use in their code.
One of the issues for libc++ with transitive includes is that these includes
may create dependency cycles and cause the validation script
``libcxx/utils/graph_header_deps.py`` to have false positives. To ignore an
include from the validation script, add a comment containing ``IGNORE-CYCLE``.
This should only be used when there is a cycle and it has been verified it is a
false positive.
.. code-block:: cpp
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
#endif
Rationale
---------

View File

@ -1910,7 +1910,7 @@ template <class BidirectionalIterator, class Compare>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20

View File

@ -695,7 +695,7 @@ any_cast(any * __any) _NOEXCEPT
_LIBCPP_END_NAMESPACE_STD
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20

View File

@ -2656,7 +2656,7 @@ typedef atomic<__libcpp_unsigned_lock_free> atomic_unsigned_lock_free;
_LIBCPP_END_NAMESPACE_STD
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20

View File

@ -2433,7 +2433,7 @@ future<void>::share() _NOEXCEPT
_LIBCPP_END_NAMESPACE_STD
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#endif // _LIBCPP_FUTURE

View File

@ -1573,7 +1573,7 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER > 14
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20

View File

@ -409,7 +409,7 @@ _LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
# include <chrono> // IGNORE-CYCLE due to <format>
# include <chrono>
#endif
#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20

View File

@ -198,9 +198,6 @@ check-generated-output)
# Reject code with trailing whitespace
! grep -rn '[[:blank:]]$' libcxx/include libcxx/src libcxx/test libcxx/benchmarks || false
# Reject patches that introduce dependency cycles in the headers.
python3 libcxx/utils/graph_header_deps.py >/dev/null
;;
generic-cxx03)
clean

View File

@ -1,232 +1,47 @@
#!/usr/bin/env python
#===----------------------------------------------------------------------===##
# ===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===----------------------------------------------------------------------===##
# ===----------------------------------------------------------------------===##
import argparse
import os
import re
import sys
if __name__ == "__main__":
"""Converts a header dependency CSV file to Graphviz dot file.
def is_config_header(h):
return os.path.basename(h) in ['__config', '__undef_macros', 'version']
The header dependency CSV files are found on the directory
libcxx/test/libcxx/transitive_includes
"""
def is_experimental_header(h):
return ('experimental/' in h) or ('ext/' in h)
def is_support_header(h):
return '__support/' in h
class FileEntry:
def __init__(self, includes, individual_linecount):
self.includes = includes
self.individual_linecount = individual_linecount
self.cumulative_linecount = None # documentation: this gets filled in later
self.is_graph_root = None # documentation: this gets filled in later
def list_all_roots_under(root):
result = []
for root, _, files in os.walk(root):
for fname in files:
if os.path.basename(root).startswith('__') or fname.startswith('__'):
pass
elif ('.' in fname and not fname.endswith('.h')):
pass
else:
result.append(root + '/' + fname)
return result
def build_file_entry(fname, options):
assert os.path.exists(fname)
def locate_header_file(h, paths):
for p in paths:
fullname = p + '/' + h
if os.path.exists(fullname):
return fullname
if options.error_on_file_not_found:
raise RuntimeError('Header not found: %s, included by %s' % (h, fname))
return None
local_includes = []
system_includes = []
linecount = 0
with open(fname, 'r', encoding='utf-8') as f:
for line in f.readlines():
linecount += 1
m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
if m is not None:
local_includes.append(m.group(1))
m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
if m is not None:
# Since libc++ keeps transitive includes guarded by the
# language version some cycles can be ignored. For example
# before C++20 several headers included <chrono> without using
# it. In C++20 <chrono> conditionally includes <format> in
# C++20. This causes multiple cycles in this script that can't
# happen in practice. Since the script uses a regex instead of
# a parser use a magic word.
if re.search(r'IGNORE-CYCLE', line) is None:
system_includes.append(m.group(1))
fully_qualified_includes = [
locate_header_file(h, options.search_dirs)
for h in system_includes
] + [
locate_header_file(h, os.path.dirname(fname))
for h in local_includes
]
return FileEntry(
# If file-not-found wasn't an error, then skip non-found files
includes = [h for h in fully_qualified_includes if h is not None],
individual_linecount = linecount,
)
def transitive_closure_of_includes(graph, h1):
visited = set()
def explore(graph, h1):
if h1 not in visited:
visited.add(h1)
for h2 in graph[h1].includes:
explore(graph, h2)
explore(graph, h1)
return visited
def transitively_includes(graph, h1, h2):
return (h1 != h2) and (h2 in transitive_closure_of_includes(graph, h1))
def build_graph(roots, options):
original_roots = list(roots)
graph = {}
while roots:
frontier = roots
roots = []
for fname in frontier:
if fname not in graph:
graph[fname] = build_file_entry(fname, options)
graph[fname].is_graph_root = (fname in original_roots)
roots += graph[fname].includes
for fname, entry in graph.items():
entry.cumulative_linecount = sum(graph[h].individual_linecount for h in transitive_closure_of_includes(graph, fname))
return graph
def get_friendly_id(fname):
i = fname.index('include/')
assert(i >= 0)
result = fname[i+8:]
return result
def get_graphviz(graph, options):
def get_decorators(fname, entry):
result = ''
if entry.is_graph_root:
result += ' [style=bold]'
if options.show_individual_line_counts and options.show_cumulative_line_counts:
result += ' [label="%s\\n%d indiv, %d cumul"]' % (
get_friendly_id(fname), entry.individual_linecount, entry.cumulative_linecount
)
elif options.show_individual_line_counts:
result += ' [label="%s\\n%d indiv"]' % (get_friendly_id(fname), entry.individual_linecount)
elif options.show_cumulative_line_counts:
result += ' [label="%s\\n%d cumul"]' % (get_friendly_id(fname), entry.cumulative_linecount)
return result
result = ''
result += 'strict digraph {\n'
result += ' rankdir=LR;\n'
result += ' layout=dot;\n\n'
for fname, entry in graph.items():
result += ' "%s"%s;\n' % (get_friendly_id(fname), get_decorators(fname, entry))
for h in entry.includes:
if any(transitively_includes(graph, i, h) for i in entry.includes) and not options.show_transitive_edges:
continue
result += ' "%s" -> "%s";\n' % (get_friendly_id(fname), get_friendly_id(h))
result += '}\n'
return result
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Produce a dependency graph of libc++ headers, in GraphViz dot format.\n' +
'For example, ./graph_header_deps.py | dot -Tpng > graph.png',
description="""Converts a libc++ dependency CSV file to a Graphviz dot file.
For example:
libcxx/utils/graph_header_deps.py libcxx/test/libcxx/transitive_includes/cxx20.csv | dot -Tsvg > graph.svg
""",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('--root', default=None, metavar='FILE', help='File or directory to be the root of the dependency graph')
parser.add_argument('-I', dest='search_dirs', default=[], action='append', metavar='DIR', help='Path(s) to search for local includes')
parser.add_argument('--show-transitive-edges', action='store_true', help='Show edges to headers that are transitively included anyway')
parser.add_argument('--show-config-headers', action='store_true', help='Show universally included headers, such as __config')
parser.add_argument('--show-experimental-headers', action='store_true', help='Show headers in the experimental/ and ext/ directories')
parser.add_argument('--show-support-headers', action='store_true', help='Show headers in the __support/ directory')
parser.add_argument('--show-individual-line-counts', action='store_true', help='Include an individual line count in each node')
parser.add_argument('--show-cumulative-line-counts', action='store_true', help='Include a total line count in each node')
parser.add_argument('--error-on-file-not-found', action='store_true', help="Don't ignore failure to open an #included file")
parser.add_argument(
"input",
default=None,
metavar="FILE",
help="The header dependency CSV file.",
)
options = parser.parse_args()
if options.root is None:
curr_dir = os.path.dirname(os.path.abspath(__file__))
options.root = os.path.join(curr_dir, '../include')
print(
"""digraph includes {
graph [nodesep=0.5, ranksep=1];
node [shape=box, width=4];"""
)
with open(options.input, "r") as f:
for line in f.readlines():
elements = line.rstrip().split(" ")
assert len(elements) == 2
if options.search_dirs == [] and os.path.isdir(options.root):
options.search_dirs = [options.root]
print(f'\t"{elements[0]}" -> "{elements[1]}"')
options.root = os.path.abspath(options.root)
options.search_dirs = [os.path.abspath(p) for p in options.search_dirs]
if os.path.isdir(options.root):
roots = list_all_roots_under(options.root)
elif os.path.isfile(options.root):
roots = [options.root]
else:
raise RuntimeError('--root seems to be invalid')
graph = build_graph(roots, options)
# Eliminate certain kinds of "visual noise" headers, if asked for.
def should_keep(fname):
return all([
options.show_config_headers or not is_config_header(fname),
options.show_experimental_headers or not is_experimental_header(fname),
options.show_support_headers or not is_support_header(fname),
])
for fname in list(graph.keys()):
if should_keep(fname):
graph[fname].includes = [h for h in graph[fname].includes if should_keep(h)]
else:
del graph[fname]
# Look for cycles.
no_cycles_detected = True
for fname, entry in graph.items():
for h in entry.includes:
if h == fname:
sys.stderr.write('Cycle detected: %s includes itself\n' % (
get_friendly_id(fname)
))
no_cycles_detected = False
elif transitively_includes(graph, h, fname):
sys.stderr.write('Cycle detected between %s and %s\n' % (
get_friendly_id(fname), get_friendly_id(h)
))
no_cycles_detected = False
assert no_cycles_detected
print(get_graphviz(graph, options))
print("}")