New tool: opt-stats.py

I am planning to use this tool to find too noisy (missed) optimization remarks. Long term it may actually be better to just have another tool that exports the remarks into an sqlite database and perform queries like this in SQL. This splits out the YAML parsing from opt-viewer.py into a new Python module optrecord.py. This is the result of the script on the LLVM testsuite: Total number of remarks 714433 Top 10 remarks by pass: inline 52% gvn 24% licm 13% loop-vectorize 5% asm-printer 3% loop-unroll 1% regalloc 1% inline-cost 0% slp-vectorizer 0% loop-delete 0% Top 10 remarks: gvn/LoadClobbered 20% inline/Inlined 19% inline/CanBeInlined 18% inline/NoDefinition 9% licm/LoadWithLoopInvariantAddressInvalidated 6% licm/Hoisted 6% asm-printer/InstructionCount 3% inline/TooCostly 3% gvn/LoadElim 3% loop-vectorize/MissedDetails 2% Beside some refactoring, I also changed optrecords not to use context to access global data (max_hotness). Because of the separate module this would have required splitting context into two. However it's not possible to access the optrecord context from the SourceFileRenderer when calling back to Remark.RelativeHotness. llvm-svn: 296682
2017-03-01 21:35:00 +00:00 · 2017-03-01 21:35:00 +00:00 · b7278af54b
parent 7329569a05
commit b7278af54b
3 changed files with 246 additions and 188 deletions
--- a/llvm/utils/opt-viewer/opt-stats.py
+++ b/llvm/utils/opt-viewer/opt-stats.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+
+desc = '''Generate statistics about optimization records from the YAML files
+generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
+
+The tools requires PyYAML and Pygments Python packages.'''
+
+import optrecord
+import argparse
+import operator
+from collections import defaultdict
+from multiprocessing import cpu_count, Pool
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('yaml_files', nargs='+')
+    parser.add_argument(
+        '--jobs',
+        '-j',
+        default=cpu_count(),
+        type=int,
+        help='Max job count (defaults to current CPU count)')
+    args = parser.parse_args()
+
+    if len(args.yaml_files) == 0:
+        parser.print_help()
+        sys.exit(1)
+
+    if args.jobs == 1:
+        pmap = map
+    else:
+        pool = Pool(processes=args.jobs)
+        pmap = pool.map
+
+    all_remarks, file_remarks, _ = optrecord.gather_results(pmap, args.yaml_files)
+
+    bypass = defaultdict(int)
+    byname = defaultdict(int)
+    for r in all_remarks.itervalues():
+        bypass[r.Pass] += 1
+        byname[r.Pass + "/" + r.Name] += 1
+
+    total = len(all_remarks)
+    print("{:24s} {:10d}\n".format("Total number of remarks", total))
+
+    print("Top 10 remarks by pass:")
+    for (passname, count) in sorted(bypass.items(), key=operator.itemgetter(1),
+                                    reverse=True)[:10]:
+        print("  {:30s} {:2.0f}%". format(passname, count * 100. / total))
+
+    print("\nTop 10 remarks:")
+    for (name, count) in sorted(byname.items(), key=operator.itemgetter(1),
+                                reverse=True)[:10]:
+        print("  {:30s} {:2.0f}%". format(name, count * 100. / total))
--- a/llvm/utils/opt-viewer/opt-viewer.py
+++ b/llvm/utils/opt-viewer/opt-viewer.py
@ -7,160 +7,28 @@ generated with -fsave-optimization-record and -fdiagnostics-show-hotness.

 The tools requires PyYAML and Pygments Python packages.'''

-import yaml
-# Try to use the C parser.
-try:
-    from yaml import CLoader as Loader
-except ImportError:
-    print("For faster parsing, you may want to install libYAML for PyYAML")
-    from yaml import Loader
-
+import optrecord
 import functools
-from collections import defaultdict
-import itertools
 from multiprocessing import Pool
 from multiprocessing import Lock, cpu_count
 import errno
 import argparse
 import os.path
 import re
-import subprocess
 import shutil
 from pygments import highlight
 from pygments.lexers.c_cpp import CppLexer
 from pygments.formatters import HtmlFormatter
 import cgi

-p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-p_lock = Lock()
-
-
-def demangle(name):
-    with p_lock:
-        p.stdin.write(name + '\n')
-        return p.stdout.readline().rstrip()
-
 # This allows passing the global context to the child processes.
 class Context:
-    def __init__(self, max_hotness = 0, caller_loc = dict()):
-       self.max_hotness = max_hotness
-
+    def __init__(self, caller_loc = dict()):
       # Map function names to their source location for function where inlining happened
       self.caller_loc = caller_loc

-    def should_display_hotness(self):
-        # If max_hotness is 0 at the end, we assume hotness information is
-        # missing and no relative hotness information is displayed
-        return self.max_hotness != 0
-
 context = Context()

-class Remark(yaml.YAMLObject):
-    # Work-around for http://pyyaml.org/ticket/154.
-    yaml_loader = Loader
-
-    def __getattr__(self, name):
-        # If hotness is missing, assume 0
-        if name == 'Hotness':
-            return 0
-        raise AttributeError
-
-    @property
-    def File(self):
-        return self.DebugLoc['File']
-
-    @property
-    def Line(self):
-        return int(self.DebugLoc['Line'])
-
-    @property
-    def Column(self):
-        return self.DebugLoc['Column']
-
-    @property
-    def DebugLocString(self):
-        return "{}:{}:{}".format(self.File, self.Line, self.Column)
-
-    @property
-    def DemangledFunctionName(self):
-        return demangle(self.Function)
-
-    @classmethod
-    def make_link(cls, File, Line):
-        return "{}#L{}".format(SourceFileRenderer.html_file_name(File), Line)
-
-    @property
-    def Link(self):
-        return Remark.make_link(self.File, self.Line)
-
-    def getArgString(self, mapping):
-        mapping = mapping.copy()
-        dl = mapping.get('DebugLoc')
-        if dl:
-            del mapping['DebugLoc']
-
-        assert(len(mapping) == 1)
-        (key, value) = mapping.items()[0]
-
-        if key == 'Caller' or key == 'Callee':
-            value = cgi.escape(demangle(value))
-
-        if dl and key != 'Caller':
-            return "<a href={}>{}</a>".format(
-                Remark.make_link(dl['File'], dl['Line']), value)
-        else:
-            return value
-
-    @property
-    def message(self):
-        # Args is a list of mappings (dictionaries)
-        values = [self.getArgString(mapping) for mapping in self.Args]
-        return "".join(values)
-
-    @property
-    def RelativeHotness(self):
-        if context.should_display_hotness():
-            return "{}%".format(int(round(self.Hotness * 100 / context.max_hotness)))
-        else:
-            return ''
-
-    @property
-    def key(self):
-        return (self.__class__, self.Pass, self.Name, self.File, self.Line, self.Column, self.Function)
-
-
-class Analysis(Remark):
-    yaml_tag = '!Analysis'
-
-    @property
-    def color(self):
-        return "white"
-
-
-class AnalysisFPCommute(Analysis):
-    yaml_tag = '!AnalysisFPCommute'
-
-
-class AnalysisAliasing(Analysis):
-    yaml_tag = '!AnalysisAliasing'
-
-
-class Passed(Remark):
-    yaml_tag = '!Passed'
-
-    @property
-    def color(self):
-        return "green"
-
-
-class Missed(Remark):
-    yaml_tag = '!Missed'
-
-    @property
-    def color(self):
-        return "red"
-
-
 class SourceFileRenderer:
    def __init__(self, source_dir, output_dir, filename):
        existing_filename = None
@ -171,7 +39,7 @@ class SourceFileRenderer:
            if os.path.exists(fn):
                existing_filename = fn

-        self.stream = open(os.path.join(output_dir, SourceFileRenderer.html_file_name(filename)), 'w')
+        self.stream = open(os.path.join(output_dir, optrecord.html_file_name(filename)), 'w')
        if existing_filename:
            self.source_stream = open(existing_filename)
        else:
@ -208,10 +76,9 @@ class SourceFileRenderer:

    def render_inline_remarks(self, r, line):
        inlining_context = r.DemangledFunctionName
-        print
        dl = context.caller_loc.get(r.Function)
        if dl:
-            link = Remark.make_link(dl['File'], dl['Line'] - 2)
+            link = optrecord.make_link(dl['File'], dl['Line'] - 2)
            inlining_context = "<a href={link}>{r.DemangledFunctionName}</a>".format(**locals())

        # Column is the number of characters *including* tabs, keep those and
@ -254,10 +121,6 @@ class SourceFileRenderer:
 </body>
 </html>''', file=self.stream)

-    @classmethod
-    def html_file_name(cls, filename):
-        return filename.replace('/', '_') + ".html"
-

 class IndexRenderer:
    def __init__(self, output_dir):
@ -296,27 +159,6 @@ class IndexRenderer:
 </html>''', file=self.stream)


-def get_remarks(input_file):
-    max_hotness = 0
-    all_remarks = dict()
-    file_remarks = defaultdict(functools.partial(defaultdict, list))
-
-    with open(input_file) as f:
-        docs = yaml.load_all(f, Loader=Loader)
-
-        for remark in docs:
-            # Avoid remarks withoug debug location or if they are duplicated
-            if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
-                continue
-            all_remarks[remark.key] = remark
-
-            file_remarks[remark.File][remark.Line].append(remark)
-
-            max_hotness = max(max_hotness, remark.Hotness)
-
-    return max_hotness, all_remarks, file_remarks
-
-
 def _render_file(source_dir, output_dir, ctx, entry):
    global context
    context = ctx
@ -324,39 +166,18 @@ def _render_file(source_dir, output_dir, ctx, entry):
    SourceFileRenderer(source_dir, output_dir, filename).render(remarks)


-def gather_results(pmap, filenames):
-    remarks = pmap(get_remarks, filenames)
-
-    def merge_file_remarks(file_remarks_job, all_remarks, merged):
-        for filename, d in file_remarks_job.iteritems():
-            for line, remarks in d.iteritems():
-                for remark in remarks:
-                    if remark.key not in all_remarks:
-                        merged[filename][line].append(remark)
-
-    all_remarks = dict()
-    file_remarks = defaultdict(functools.partial(defaultdict, list))
-    for _, all_remarks_job, file_remarks_job in remarks:
-        merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
-        all_remarks.update(all_remarks_job)
-
-    context.max_hotness = max(entry[0] for entry in remarks)
-
-    return all_remarks, file_remarks
-
-
 def map_remarks(all_remarks):
    # Set up a map between function names and their source location for
    # function where inlining happened
    for remark in all_remarks.itervalues():
-        if isinstance(remark, Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
+        if isinstance(remark, optrecord.Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
            for arg in remark.Args:
                caller = arg.get('Caller')
                if caller:
                    context.caller_loc[caller] = arg['DebugLoc']


-def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir):
+def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, should_display_hotness):
    try:
        os.makedirs(output_dir)
    except OSError as e:
@ -368,7 +189,7 @@ def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir):
    _render_file_bound = functools.partial(_render_file, source_dir, output_dir, context)
    pmap(_render_file_bound, file_remarks.items())

-    if context.should_display_hotness():
+    if should_display_hotness:
        sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.Hotness, r.__dict__), reverse=True)
    else:
        sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.File, r.Line, r.Column, r.__dict__))
@ -405,8 +226,8 @@ if __name__ == '__main__':
        pool = Pool(processes=args.jobs)
        pmap = pool.map

-    all_remarks, file_remarks = gather_results(pmap, args.yaml_files)
+    all_remarks, file_remarks, should_display_hotness = optrecord.gather_results(pmap, args.yaml_files)

    map_remarks(all_remarks)

-    generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir)
+    generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir, should_display_hotness)
--- a/llvm/utils/opt-viewer/optrecord.py
+++ b/llvm/utils/opt-viewer/optrecord.py
@ -0,0 +1,181 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+
+import yaml
+# Try to use the C parser.
+try:
+    from yaml import CLoader as Loader
+except ImportError:
+    print("For faster parsing, you may want to install libYAML for PyYAML")
+    from yaml import Loader
+
+import functools
+from collections import defaultdict
+import itertools
+from multiprocessing import Pool
+from multiprocessing import Lock, cpu_count
+import cgi
+import subprocess
+
+import traceback
+
+p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+p_lock = Lock()
+
+
+def demangle(name):
+    with p_lock:
+        p.stdin.write(name + '\n')
+        return p.stdout.readline().rstrip()
+
+def html_file_name(filename):
+    return filename.replace('/', '_') + ".html"
+
+def make_link(File, Line):
+    return "{}#L{}".format(html_file_name(File), Line)
+
+
+class Remark(yaml.YAMLObject):
+    # Work-around for http://pyyaml.org/ticket/154.
+    yaml_loader = Loader
+
+    def __getattr__(self, name):
+        # If hotness is missing, assume 0
+        if name == 'Hotness':
+            return 0
+        raise AttributeError(name)
+
+    @property
+    def File(self):
+        return self.DebugLoc['File']
+
+    @property
+    def Line(self):
+        return int(self.DebugLoc['Line'])
+
+    @property
+    def Column(self):
+        return self.DebugLoc['Column']
+
+    @property
+    def DebugLocString(self):
+        return "{}:{}:{}".format(self.File, self.Line, self.Column)
+
+    @property
+    def DemangledFunctionName(self):
+        return demangle(self.Function)
+
+    @property
+    def Link(self):
+        return make_link(self.File, self.Line)
+
+    def getArgString(self, mapping):
+        mapping = mapping.copy()
+        dl = mapping.get('DebugLoc')
+        if dl:
+            del mapping['DebugLoc']
+
+        assert(len(mapping) == 1)
+        (key, value) = mapping.items()[0]
+
+        if key == 'Caller' or key == 'Callee':
+            value = cgi.escape(demangle(value))
+
+        if dl and key != 'Caller':
+            return "<a href={}>{}</a>".format(
+                make_link(dl['File'], dl['Line']), value)
+        else:
+            return value
+
+    @property
+    def message(self):
+        # Args is a list of mappings (dictionaries)
+        values = [self.getArgString(mapping) for mapping in self.Args]
+        return "".join(values)
+
+    @property
+    def RelativeHotness(self):
+        if self.max_hotness:
+            return "{}%".format(int(round(self.Hotness * 100 / self.max_hotness)))
+        else:
+            return ''
+
+    @property
+    def key(self):
+        return (self.__class__, self.Pass, self.Name, self.File, self.Line, self.Column, self.Function)
+
+
+class Analysis(Remark):
+    yaml_tag = '!Analysis'
+
+    @property
+    def color(self):
+        return "white"
+
+
+class AnalysisFPCommute(Analysis):
+    yaml_tag = '!AnalysisFPCommute'
+
+
+class AnalysisAliasing(Analysis):
+    yaml_tag = '!AnalysisAliasing'
+
+
+class Passed(Remark):
+    yaml_tag = '!Passed'
+
+    @property
+    def color(self):
+        return "green"
+
+
+class Missed(Remark):
+    yaml_tag = '!Missed'
+
+    @property
+    def color(self):
+        return "red"
+
+
+def get_remarks(input_file):
+    max_hotness = 0
+    all_remarks = dict()
+    file_remarks = defaultdict(functools.partial(defaultdict, list))
+
+    with open(input_file) as f:
+        docs = yaml.load_all(f, Loader=Loader)
+        for remark in docs:
+            # Avoid remarks withoug debug location or if they are duplicated
+            if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
+                continue
+            all_remarks[remark.key] = remark
+
+            file_remarks[remark.File][remark.Line].append(remark)
+
+            max_hotness = max(max_hotness, remark.Hotness)
+
+    return max_hotness, all_remarks, file_remarks
+
+
+def gather_results(pmap, filenames):
+    remarks = pmap(get_remarks, filenames)
+    max_hotness = max(entry[0] for entry in remarks)
+
+    def merge_file_remarks(file_remarks_job, all_remarks, merged):
+        for filename, d in file_remarks_job.iteritems():
+            for line, remarks in d.iteritems():
+                for remark in remarks:
+                    # Bring max_hotness into the remarks so that
+                    # RelativeHotness does not depend on an external global.
+                    remark.max_hotness = max_hotness
+                    if remark.key not in all_remarks:
+                        merged[filename][line].append(remark)
+
+    all_remarks = dict()
+    file_remarks = defaultdict(functools.partial(defaultdict, list))
+    for _, all_remarks_job, file_remarks_job in remarks:
+        merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
+        all_remarks.update(all_remarks_job)
+
+    return all_remarks, file_remarks, max_hotness != 0