collect and report on code probe information

2022-08-20 20:01:09 -06:00 · 2022-08-20 20:01:09 -06:00 · 4808307354
parent 90863d07ad
commit 4808307354
4 changed files with 158 additions and 24 deletions
--- a/contrib/TestHarness2/test_harness/config.py
+++ b/contrib/TestHarness2/test_harness/config.py
@ -119,8 +119,11 @@ class Config:
        self.print_coverage_args = {'action': 'store_true'}
        self.binary = Path('bin') / ('fdbserver.exe' if os.name == 'nt' else 'fdbserver')
        self.binary_args = {'help': 'Path to executable'}
+        self.hit_per_runs_ratio: int = 20000
+        self.hit_per_runs_ratio_args = {'help': 'How many test runs should hit each code probe at least once'}
        self.output_format: str = 'xml'
-        self.output_format_args = {'short_name': 'O', 'choices': ['json', 'xml']}
+        self.output_format_args = {'short_name': 'O', 'choices': ['json', 'xml'],
+                                   'help': 'What format TestHarness should produce'}
        self.include_test_files: str = r'.*'
        self.include_test_files_args = {'help': 'Only consider test files whose path match against the given regex'}
        self.exclude_test_files: str = r'.^'
@ -129,6 +132,12 @@ class Config:
        self.include_test_names_args = {'help': 'Only consider tests whose names match against the given regex'}
        self.exclude_test_names: str = r'.^'
        self.exclude_test_names_args = {'help': 'Don\'t consider tests whose names match against the given regex'}
+        self.details: bool = False
+        self.details_args = {'help': 'Print detailed results', 'short_name': 'c'}
+        self.cov_include_files: str = r'.*'
+        self.cov_include_files_args = {'help': 'Only consider coverage traces that originated in files matching regex'}
+        self.cov_exclude_files: str = r'.^'
+        self.cov_exclude_files_args = {'help': 'Ignore coverage traces that originated in files matching regex'}
        self.max_stderr_bytes: int = 1000
        self.write_stats: bool = True
        self.read_stats: bool = True
--- a/contrib/TestHarness2/test_harness/fdb.py
+++ b/contrib/TestHarness2/test_harness/fdb.py
@ -1,6 +1,6 @@
 from __future__ import annotations

-from typing import OrderedDict, Tuple
+from typing import OrderedDict, Tuple, List

 import collections
 import fdb
@ -8,7 +8,7 @@ import struct

 from test_harness.run import StatFetcher, TestDescription
 from test_harness.config import config
-from test_harness.summarize import SummaryTree
+from test_harness.summarize import SummaryTree, Coverage

 fdb.api_version(630)

@ -20,6 +20,59 @@ def str_to_tuple(s: str | None):
    return tuple(res)


+fdb_db = None
+
+
+def open_db(cluster_file: str | None):
+    global fdb_db
+    if fdb_db is None:
+        fdb_db = fdb.open(cluster_file)
+    return fdb_db
+
+def chunkify(iterable, sz: int):
+    count = 0
+    res = []
+    for item in iterable:
+        res.append(item)
+        count += 1
+        if count >= sz:
+            yield res
+            res = []
+            count = 0
+    if len(res) > 0:
+        yield res
+
+
+@fdb.transactional
+def write_coverage_chunk(tr, path: Tuple[str, ...], coverage: List[Tuple[Coverage, bool]]):
+    cov_dir = fdb.directory.create_or_open(tr, path)
+    for cov, covered in coverage:
+        tr.add(cov_dir.pack((cov.file, cov.line, cov.comment)), struct.pack('<I', 1 if covered else 0))
+
+
+def write_coverage(cluster_file: str | None, cov_path: Tuple[str, ...], coverage: OrderedDict[Coverage, bool]):
+    db = open_db(cluster_file)
+    assert config.joshua_dir is not None
+    for chunk in chunkify(coverage.items(), 100):
+        write_coverage_chunk(db, cov_path, chunk)
+
+
+@fdb.transactional
+def _read_coverage(tr, cov_path: Tuple[str, ...]) -> OrderedDict[Coverage, int]:
+    res = collections.OrderedDict()
+    cov_dir = fdb.directory.create_or_open(tr, cov_path)
+    for k, v in tr[cov_dir.range()]:
+        file, line, comment = cov_dir.unpack(k)
+        count = struct.unpack('<I', v)[0]
+        res[Coverage(file, line, comment)] = count
+    return res
+
+
+def read_coverage(cluster_file: str | None, cov_path: Tuple[str, ...]) -> OrderedDict[Coverage, int]:
+    db = open_db(cluster_file)
+    return _read_coverage(db, cov_path)
+
+
 class TestStatistics:
    def __init__(self, runtime: int, run_count: int):
        self.runtime: int = runtime
@ -28,12 +81,12 @@ class TestStatistics:

 class Statistics:
    def __init__(self, cluster_file: str | None, joshua_dir: Tuple[str, ...]):
-        self.db: fdb.Database = fdb.open(cluster_file)
-        self.stats_dir: fdb.DirectorySubspace = self.open_stats_dir(self.db, joshua_dir)
+        self.db = open_db(cluster_file)
+        self.stats_dir = self.open_stats_dir(self.db, joshua_dir)
        self.stats: OrderedDict[str, TestStatistics] = self.read_stats_from_db(self.db)

    @fdb.transactional
-    def open_stats_dir(self, tr, app_dir: Tuple[str]) -> fdb.DirectorySubspace:
+    def open_stats_dir(self, tr, app_dir: Tuple[str]):
        stats_dir = app_dir + ('runtime_stats',)
        return fdb.directory.create_or_open(tr, stats_dir)

@ -47,11 +100,12 @@ class Statistics:
        return result

    @fdb.transactional
-    def _write_runtime(self, tr: fdb.Transaction, test_name: str, time: int) -> None:
+    def _write_runtime(self, tr, test_name: str, time: int) -> None:
        key = self.stats_dir.pack((test_name,))
        tr.add(key, struct.pack('<II', time, 1))

    def write_runtime(self, test_name: str, time: int) -> None:
+        assert self.db is not None
        self._write_runtime(self.db, test_name, time)


--- a/contrib/TestHarness2/test_harness/results.py
+++ b/contrib/TestHarness2/test_harness/results.py
@ -1,39 +1,99 @@
 from __future__ import annotations

+import re
 import sys
-from typing import List, Tuple
+from typing import List, Tuple, OrderedDict

-from test_harness.summarize import SummaryTree
+from test_harness.summarize import SummaryTree, Coverage
 from test_harness.config import config

 import argparse
 import test_harness.fdb


+class GlobalStatistics:
+    def __init__(self):
+        self.total_probes_hit: int = 0
+        self.total_cpu_time: int = 0
+        self.total_test_runs: int = 0
+        self.total_missed_probes: int = 0
+
+
 class EnsembleResults:
    def __init__(self, cluster_file: str | None, ensemble_id: str):
+        self.global_statistics = GlobalStatistics()
        self.fdb_path = ('joshua', 'ensembles', 'results', 'application', ensemble_id)
+        self.coverage_path = self.fdb_path + ('coverage',)
        self.statistics = test_harness.fdb.Statistics(cluster_file, self.fdb_path)
-        self.out = SummaryTree('EnsembleResults')
-        stats: List[Tuple[str, int, int]] = []
+        coverage_dict: OrderedDict[Coverage, int] = test_harness.fdb.read_coverage(cluster_file, self.coverage_path)
+        self.coverage: List[Tuple[Coverage, int]] = []
+        self.min_coverage_hit: int | None = None
+        self.ratio = self.global_statistics.total_test_runs / config.hit_per_runs_ratio
+        for cov, count in coverage_dict.items():
+            if re.search(config.cov_include_files, cov.file) is None:
+                continue
+            if re.search(config.cov_exclude_files, cov.file) is not None:
+                continue
+            self.global_statistics.total_probes_hit += count
+            self.coverage.append((cov, count))
+            if count <= self.ratio:
+                self.global_statistics.total_missed_probes += 1
+            if self.min_coverage_hit is None or self.min_coverage_hit > count:
+                self.min_coverage_hit = count
+        self.coverage.sort(key=lambda x: (x[1], x[0].file, x[0].line))
+        self.stats: List[Tuple[str, int, int]] = []
        for k, v in self.statistics.stats.items():
-            stats.append((k, v.runtime, v.run_count))
-        stats.sort(key=lambda x: x[1], reverse=True)
-        for k, runtime, run_count in stats:
-            child = SummaryTree('Test')
-            child.attributes['Name'] = k
-            child.attributes['Runtime'] = str(runtime)
-            child.attributes['RunCount'] = str(run_count)
-            self.out.append(child)
+            self.global_statistics.total_test_runs += v.run_count
+            self.global_statistics.total_cpu_time += v.runtime
+            self.stats.append((k, v.runtime, v.run_count))
+        self.stats.sort(key=lambda x: x[1], reverse=True)
+        self.coverage_ok: bool = self.min_coverage_hit is not None
+        if self.coverage_ok:
+            self.coverage_ok = self.min_coverage_hit > self.ratio
+
+    def dump(self):
+        errors = 0
+        out = SummaryTree('EnsembleResults')
+        out.attributes['TotalRunTime'] = str(self.global_statistics.total_cpu_time)
+        out.attributes['TotalTestRuns'] = str(self.global_statistics.total_test_runs)
+        out.attributes['TotalProbesHit'] = str(self.global_statistics.total_probes_hit)
+        out.attributes['MinProbeHit'] = str(self.min_coverage_hit)
+        out.attributes['TotalProbes'] = str(len(self.coverage))
+        out.attributes['MissedProbes'] = str(self.global_statistics.total_missed_probes)
+
+        for cov, count in self.coverage:
+            severity = 10 if count > self.ratio else 40
+            if severity == 40:
+                errors += 1
+            if (severity == 40 and errors <= config.max_errors) or config.details:
+                child = SummaryTree('CodeProbe')
+                child.attributes['Severity'] = str(severity)
+                child.attributes['File'] = cov.file
+                child.attributes['Line'] = str(cov.line)
+                child.attributes['Comment'] = cov.comment
+                child.attributes['HitCount'] = str(count)
+                out.append(child)
+
+        if config.details:
+            for k, runtime, run_count in self.stats:
+                child = SummaryTree('Test')
+                child.attributes['Name'] = k
+                child.attributes['Runtime'] = str(runtime)
+                child.attributes['RunCount'] = str(run_count)
+                out.append(child)
+        if errors > 0:
+            out.attributes['Errors'] = str(errors)
+        out.dump(sys.stdout)


 if __name__ == '__main__':
    parser = argparse.ArgumentParser('TestHarness Results', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-C', '--cluster-file', required=False, help='Path to cluster file')
-    parser.add_argument('-o', '--output-format', default='json', choices=['json', 'xml'], help='Format of the output')
+    config.build_arguments(parser)
    parser.add_argument('ensemble_id', type=str, help='The ensemble to fetch the result for')
    args = parser.parse_args()
+    config.extract_args(args)
    config.pretty_print = True
    config.output_format = args.output_format
-    results = EnsembleResults(args.cluster_file, args.ensemble_id)
-    results.out.dump(sys.stdout)
+    results = EnsembleResults(config.cluster_file, args.ensemble_id)
+    results.dump()
+    exit(0 if results.coverage_ok else 1)
--- a/contrib/TestHarness2/test_harness/summarize.py
+++ b/contrib/TestHarness2/test_harness/summarize.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import collections
 import inspect
 import json
+import os
 import re
 import sys
 import traceback
@ -44,6 +45,11 @@ class SummaryTree:
        # minidom doesn't support omitting the xml declaration which is a problem for joshua
        # However, our xml is very simple and therefore serializing manually is easy enough
        attrs = []
+        print_width = 120
+        try:
+            print_width, _ = os.get_terminal_size()
+        except OSError:
+            pass
        for k, v in self.attributes.items():
            attrs.append('{}={}'.format(k, xml.sax.saxutils.quoteattr(v)))
        elem = '{}<{}{}'.format(prefix, self.name, ('' if len(attrs) == 0 else ' '))
@ -52,7 +58,7 @@ class SummaryTree:
            curr_line_len = len(elem)
            for i in range(len(attrs)):
                attr_len = len(attrs[i])
-                if i == 0 or attr_len + curr_line_len + 1 <= 120:
+                if i == 0 or attr_len + curr_line_len + 1 <= print_width:
                    if i != 0:
                        out.write(' ')
                    out.write(attrs[i])
@ -381,6 +387,11 @@ class Summary:
        for f in trace_files[0]:
            self.parse_file(f)
        self.done()
+        if config.joshua_dir is not None:
+            import test_harness.fdb
+            test_harness.fdb.write_coverage(config.cluster_file,
+                                            test_harness.fdb.str_to_tuple(config.joshua_dir) + ('coverage',),
+                                            self.coverage)

    def ok(self):
        return not self.error