[analyzer][tests] Introduce analyzer benchmarking framework

Summary:
This commit includes a couple of changes:
  * Benchmark selected projects by analyzing them multiple times
  * Compare two benchmarking results and visualizing them on one chart
  * Organize project build logging, so we can use the same code
    in benchmarks

Differential Revision: https://reviews.llvm.org/D83539
This commit is contained in:
Valeriy Savchenko 2020-07-10 10:52:25 +03:00
parent faa7e306e4
commit 5b4f143564
5 changed files with 281 additions and 58 deletions

View File

@ -34,29 +34,10 @@ def add(parser, args):
def build(parser, args):
import SATestBuild
from ProjectMap import ProjectMap
SATestBuild.VERBOSE = args.verbose
project_map = ProjectMap()
projects = project_map.projects
if args.projects:
projects_arg = args.projects.split(",")
available_projects = [project.name
for project in projects]
# validate that given projects are present in the project map file
for manual_project in projects_arg:
if manual_project not in available_projects:
parser.error("Project '{project}' is not found in "
"the project map file. Available projects are "
"{all}.".format(project=manual_project,
all=available_projects))
projects = [project.with_fields(enabled=project.name in projects_arg)
for project in projects]
projects = get_projects(parser, args.projects)
tester = SATestBuild.RegressionTester(args.jobs,
projects,
args.override_compiler,
@ -100,6 +81,44 @@ def update(parser, args):
SATestUpdateDiffs.update_reference_results(project)
def benchmark(parser, args):
from SATestBenchmark import Benchmark
projects = get_projects(parser, args.projects)
benchmark = Benchmark(projects, args.iterations, args.output)
benchmark.run()
def benchmark_compare(parser, args):
import SATestBenchmark
SATestBenchmark.compare(args.old, args.new, args.output)
def get_projects(parser, projects_str):
from ProjectMap import ProjectMap
project_map = ProjectMap()
projects = project_map.projects
if projects_str:
projects_arg = projects_str.split(",")
available_projects = [project.name
for project in projects]
# validate that given projects are present in the project map file
for manual_project in projects_arg:
if manual_project not in available_projects:
parser.error("Project '{project}' is not found in "
"the project map file. Available projects are "
"{all}.".format(project=manual_project,
all=available_projects))
projects = [project.with_fields(enabled=project.name in projects_arg)
for project in projects]
return projects
def docker(parser, args):
if len(args.rest) > 0:
if args.rest[0] != "--":
@ -284,6 +303,36 @@ def main():
"to the docker's entrypoint.")
dock_parser.set_defaults(func=docker)
# benchmark subcommand
bench_parser = subparsers.add_parser(
"benchmark",
help="Run benchmarks by building a set of projects multiple times.")
bench_parser.add_argument("-i", "--iterations", action="store",
type=int, default=20,
help="Number of iterations for building each "
"project.")
bench_parser.add_argument("-o", "--output", action="store",
default="benchmark.csv",
help="Output csv file for the benchmark results")
bench_parser.add_argument("--projects", action="store", default="",
help="Comma-separated list of projects to test")
bench_parser.set_defaults(func=benchmark)
bench_subparsers = bench_parser.add_subparsers()
bench_compare_parser = bench_subparsers.add_parser(
"compare",
help="Compare benchmark runs.")
bench_compare_parser.add_argument("--old", action="store", required=True,
help="Benchmark reference results to "
"compare agains.")
bench_compare_parser.add_argument("--new", action="store", required=True,
help="New benchmark results to check.")
bench_compare_parser.add_argument("-o", "--output",
action="store", required=True,
help="Output file for plots.")
bench_compare_parser.set_defaults(func=benchmark_compare)
args = parser.parse_args()
args.func(parser, args)

View File

@ -0,0 +1,158 @@
"""
Static Analyzer qualification infrastructure.
This source file contains all the functionality related to benchmarking
the analyzer on a set projects. Right now, this includes measuring
execution time and peak memory usage. Benchmark runs analysis on every
project multiple times to get a better picture about the distribution
of measured values.
Additionally, this file includes a comparison routine for two benchmarking
results that plots the result together on one chart.
"""
import SATestUtils as utils
from SATestBuild import ProjectTester, stdout, TestInfo
from ProjectMap import ProjectInfo
import pandas as pd
from typing import List, Tuple
INDEX_COLUMN = "index"
def _save(data: pd.DataFrame, file_path: str):
data.to_csv(file_path, index_label=INDEX_COLUMN)
def _load(file_path: str) -> pd.DataFrame:
return pd.read_csv(file_path, index_col=INDEX_COLUMN)
class Benchmark:
"""
Becnhmark class encapsulates one functionality: it runs the analysis
multiple times for the given set of projects and stores results in the
specified file.
"""
def __init__(self, projects: List[ProjectInfo], iterations: int,
output_path: str):
self.projects = projects
self.iterations = iterations
self.out = output_path
def run(self):
results = [self._benchmark_project(project)
for project in self.projects]
data = pd.concat(results, ignore_index=True)
_save(data, self.out)
def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
if not project.enabled:
stdout(f" \n\n--- Skipping disabled project {project.name}\n")
return
stdout(f" \n\n--- Benchmarking project {project.name}\n")
test_info = TestInfo(project)
tester = ProjectTester(test_info, silent=True)
project_dir = tester.get_project_dir()
output_dir = tester.get_output_dir()
raw_data = []
for i in range(self.iterations):
stdout(f"Iteration #{i + 1}")
time, mem = tester.build(project_dir, output_dir)
raw_data.append({"time": time, "memory": mem,
"iteration": i, "project": project.name})
stdout(f"time: {utils.time_to_str(time)}, "
f"peak memory: {utils.memory_to_str(mem)}")
return pd.DataFrame(raw_data)
def compare(old_path: str, new_path: str, plot_file: str):
"""
Compare two benchmarking results stored as .csv files
and produce a plot in the specified file.
"""
old = _load(old_path)
new = _load(new_path)
old_projects = set(old["project"])
new_projects = set(new["project"])
common_projects = old_projects & new_projects
# Leave only rows for projects common to both dataframes.
old = old[old["project"].isin(common_projects)]
new = new[new["project"].isin(common_projects)]
old, new = _normalize(old, new)
# Seaborn prefers all the data to be in one dataframe.
old["kind"] = "old"
new["kind"] = "new"
data = pd.concat([old, new], ignore_index=True)
# TODO: compare data in old and new dataframes using statistical tests
# to check if they belong to the same distribution
_plot(data, plot_file)
def _normalize(old: pd.DataFrame,
new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
# This creates a dataframe with all numerical data averaged.
means = old.groupby("project").mean()
return _normalize_impl(old, means), _normalize_impl(new, means)
def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
# Right now 'means' has one row corresponding to one project,
# while 'data' has N rows for each project (one for each iteration).
#
# In order for us to work easier with this data, we duplicate
# 'means' data to match the size of the 'data' dataframe.
#
# All the columns from 'data' will maintain their names, while
# new columns coming from 'means' will have "_mean" suffix.
joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
_normalize_key(joined_data, "time")
_normalize_key(joined_data, "memory")
return joined_data
def _normalize_key(data: pd.DataFrame, key: str):
norm_key = _normalized_name(key)
mean_key = f"{key}_mean"
data[norm_key] = data[key] / data[mean_key]
def _normalized_name(name: str) -> str:
return f"normalized {name}"
def _plot(data: pd.DataFrame, plot_file: str):
import matplotlib
import seaborn as sns
from matplotlib import pyplot as plt
sns.set_style("whitegrid")
# We want to have time and memory charts one above the other.
figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
def _subplot(key: str, ax: matplotlib.axes.Axes):
sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
_subplot("time", ax1)
# No need to have xlabels on both top and bottom charts.
ax1.set_xlabel("")
_subplot("memory", ax2)
# The legend on the top chart is enough.
ax2.get_legend().remove()
figure.savefig(plot_file)

View File

@ -87,10 +87,18 @@ class StreamToLogger:
return 0
Logger = logging.getLogger("main")
LOCAL = threading.local()
LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
def init_logger(name: str):
# TODO: use debug levels for VERBOSE messages
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
LOCAL.stdout = StreamToLogger(logger, logging.INFO)
LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
init_logger("main")
def stderr(message: str):
@ -102,7 +110,6 @@ def stdout(message: str):
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
@ -298,12 +305,13 @@ class ProjectTester:
"""
A component aggregating testing for one project.
"""
def __init__(self, test_info: TestInfo):
def __init__(self, test_info: TestInfo, silent: bool = False):
self.project = test_info.project
self.override_compiler = test_info.override_compiler
self.extra_analyzer_config = test_info.extra_analyzer_config
self.is_reference_build = test_info.is_reference_build
self.strictness = test_info.strictness
self.silent = silent
def test(self) -> bool:
"""
@ -312,20 +320,19 @@ class ProjectTester:
to the :param strictness: criteria.
"""
if not self.project.enabled:
stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
self.out(
f" \n\n--- Skipping disabled project {self.project.name}\n")
return True
stdout(f" \n\n--- Building project {self.project.name}\n")
self.out(f" \n\n--- Building project {self.project.name}\n")
start_time = time.time()
project_dir = self.get_project_dir()
if VERBOSE >= 1:
stdout(f" Build directory: {project_dir}.\n")
self.vout(f" Build directory: {project_dir}.\n")
# Set the build results directory.
output_dir = self.get_output_dir()
output_dir = os.path.join(project_dir, output_dir)
self.build(project_dir, output_dir)
check_build(output_dir)
@ -336,8 +343,8 @@ class ProjectTester:
else:
passed = run_cmp_results(project_dir, self.strictness)
stdout(f"Completed tests for project {self.project.name} "
f"(time: {time.time() - start_time:.2f}).\n")
self.out(f"Completed tests for project {self.project.name} "
f"(time: {time.time() - start_time:.2f}).\n")
return passed
@ -346,22 +353,23 @@ class ProjectTester:
def get_output_dir(self) -> str:
if self.is_reference_build:
return REF_PREFIX + OUTPUT_DIR_NAME
dirname = REF_PREFIX + OUTPUT_DIR_NAME
else:
return OUTPUT_DIR_NAME
dirname = OUTPUT_DIR_NAME
def build(self, directory: str, output_dir: str):
return os.path.join(self.get_project_dir(), dirname)
def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
build_log_path = get_build_log_path(output_dir)
stdout(f"Log file: {build_log_path}\n")
stdout(f"Output directory: {output_dir}\n")
self.out(f"Log file: {build_log_path}\n")
self.out(f"Output directory: {output_dir}\n")
remove_log_file(output_dir)
# Clean up scan build results.
if os.path.exists(output_dir):
if VERBOSE >= 1:
stdout(f" Removing old results: {output_dir}\n")
self.vout(f" Removing old results: {output_dir}\n")
shutil.rmtree(output_dir)
@ -374,7 +382,7 @@ class ProjectTester:
self._download_and_patch(directory, build_log_file)
run_cleanup_script(directory, build_log_file)
build_time, memory = self.scan_build(directory, output_dir,
build_log_file)
build_log_file)
else:
build_time, memory = self.analyze_preprocessed(directory,
output_dir)
@ -384,9 +392,11 @@ class ProjectTester:
normalize_reference_results(directory, output_dir,
self.project.mode)
stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
f"peak memory: {utils.memory_to_str(memory)}). "
f"See the log for more details: {build_log_path}\n")
self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
f"peak memory: {utils.memory_to_str(memory)}). "
f"See the log for more details: {build_log_path}\n")
return build_time, memory
def scan_build(self, directory: str, output_dir: str,
build_log_file: IO) -> Tuple[float, int]:
@ -454,8 +464,7 @@ class ProjectTester:
command_to_run = command_prefix + command
if VERBOSE >= 1:
stdout(f" Executing: {command_to_run}\n")
self.vout(f" Executing: {command_to_run}\n")
time, mem = utils.check_and_measure_call(
command_to_run, cwd=cwd,
@ -522,8 +531,7 @@ class ProjectTester:
log_path = os.path.join(fail_path, file_name + ".stderr.txt")
with open(log_path, "w+") as log_file:
try:
if VERBOSE >= 1:
stdout(f" Executing: {command}\n")
self.vout(f" Executing: {command}\n")
time, mem = utils.check_and_measure_call(
command, cwd=directory, stderr=log_file,
@ -592,8 +600,10 @@ class ProjectTester:
f"for the '{self.project.name}' project")
def _download_from_git(self, directory: str, build_log_file: IO):
repo = self.project.origin
cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
check_call(f"git clone --recursive {self.project.origin} {cached_source}",
check_call(f"git clone --recursive {repo} {cached_source}",
cwd=directory, stderr=build_log_file,
stdout=build_log_file, shell=True)
check_call(f"git checkout --quiet {self.project.commit}",
@ -624,16 +634,15 @@ class ProjectTester:
out=LOCAL.stdout, err=LOCAL.stderr,
verbose=VERBOSE)
@staticmethod
def _apply_patch(directory: str, build_log_file: IO):
def _apply_patch(self, directory: str, build_log_file: IO):
patchfile_path = os.path.join(directory, PATCHFILE_NAME)
patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
if not os.path.exists(patchfile_path):
stdout(" No local patches.\n")
self.out(" No local patches.\n")
return
stdout(" Applying patch.\n")
self.out(" Applying patch.\n")
try:
check_call(f"patch -p1 < '{patchfile_path}'",
cwd=patched_source,
@ -646,6 +655,14 @@ class ProjectTester:
f"See {build_log_file.name} for details.\n")
sys.exit(1)
def out(self, what: str):
if not self.silent:
stdout(what)
def vout(self, what: str):
if VERBOSE >= 1:
self.out(what)
class TestProjectThread(threading.Thread):
def __init__(self, tasks_queue: TestQueue,
@ -668,10 +685,7 @@ class TestProjectThread(threading.Thread):
while not self.tasks_queue.empty():
try:
test_info = self.tasks_queue.get()
Logger = logging.getLogger(test_info.project.name)
LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
init_logger(test_info.project.name)
tester = ProjectTester(test_info)
if not tester.test():

View File

@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
project_dir = tester.get_project_dir()
tester.is_reference_build = True
ref_results_path = os.path.join(project_dir, tester.get_output_dir())
ref_results_path = tester.get_output_dir()
tester.is_reference_build = False
created_results_path = os.path.join(project_dir, tester.get_output_dir())
created_results_path = tester.get_output_dir()
if not os.path.exists(created_results_path):
print("New results not found, was SATestBuild.py previously run?",

View File

@ -1,4 +1,6 @@
graphviz
humanize
matplotlib
pandas
psutil
seaborn