forked from OSchip/llvm-project
[analyzer][tests] Introduce analyzer benchmarking framework
Summary: This commit includes a couple of changes: * Benchmark selected projects by analyzing them multiple times * Compare two benchmarking results and visualizing them on one chart * Organize project build logging, so we can use the same code in benchmarks Differential Revision: https://reviews.llvm.org/D83539
This commit is contained in:
parent
faa7e306e4
commit
5b4f143564
|
@ -34,29 +34,10 @@ def add(parser, args):
|
|||
|
||||
def build(parser, args):
|
||||
import SATestBuild
|
||||
from ProjectMap import ProjectMap
|
||||
|
||||
SATestBuild.VERBOSE = args.verbose
|
||||
|
||||
project_map = ProjectMap()
|
||||
projects = project_map.projects
|
||||
|
||||
if args.projects:
|
||||
projects_arg = args.projects.split(",")
|
||||
available_projects = [project.name
|
||||
for project in projects]
|
||||
|
||||
# validate that given projects are present in the project map file
|
||||
for manual_project in projects_arg:
|
||||
if manual_project not in available_projects:
|
||||
parser.error("Project '{project}' is not found in "
|
||||
"the project map file. Available projects are "
|
||||
"{all}.".format(project=manual_project,
|
||||
all=available_projects))
|
||||
|
||||
projects = [project.with_fields(enabled=project.name in projects_arg)
|
||||
for project in projects]
|
||||
|
||||
projects = get_projects(parser, args.projects)
|
||||
tester = SATestBuild.RegressionTester(args.jobs,
|
||||
projects,
|
||||
args.override_compiler,
|
||||
|
@ -100,6 +81,44 @@ def update(parser, args):
|
|||
SATestUpdateDiffs.update_reference_results(project)
|
||||
|
||||
|
||||
def benchmark(parser, args):
|
||||
from SATestBenchmark import Benchmark
|
||||
|
||||
projects = get_projects(parser, args.projects)
|
||||
benchmark = Benchmark(projects, args.iterations, args.output)
|
||||
benchmark.run()
|
||||
|
||||
|
||||
def benchmark_compare(parser, args):
|
||||
import SATestBenchmark
|
||||
SATestBenchmark.compare(args.old, args.new, args.output)
|
||||
|
||||
|
||||
def get_projects(parser, projects_str):
|
||||
from ProjectMap import ProjectMap
|
||||
|
||||
project_map = ProjectMap()
|
||||
projects = project_map.projects
|
||||
|
||||
if projects_str:
|
||||
projects_arg = projects_str.split(",")
|
||||
available_projects = [project.name
|
||||
for project in projects]
|
||||
|
||||
# validate that given projects are present in the project map file
|
||||
for manual_project in projects_arg:
|
||||
if manual_project not in available_projects:
|
||||
parser.error("Project '{project}' is not found in "
|
||||
"the project map file. Available projects are "
|
||||
"{all}.".format(project=manual_project,
|
||||
all=available_projects))
|
||||
|
||||
projects = [project.with_fields(enabled=project.name in projects_arg)
|
||||
for project in projects]
|
||||
|
||||
return projects
|
||||
|
||||
|
||||
def docker(parser, args):
|
||||
if len(args.rest) > 0:
|
||||
if args.rest[0] != "--":
|
||||
|
@ -284,6 +303,36 @@ def main():
|
|||
"to the docker's entrypoint.")
|
||||
dock_parser.set_defaults(func=docker)
|
||||
|
||||
# benchmark subcommand
|
||||
bench_parser = subparsers.add_parser(
|
||||
"benchmark",
|
||||
help="Run benchmarks by building a set of projects multiple times.")
|
||||
|
||||
bench_parser.add_argument("-i", "--iterations", action="store",
|
||||
type=int, default=20,
|
||||
help="Number of iterations for building each "
|
||||
"project.")
|
||||
bench_parser.add_argument("-o", "--output", action="store",
|
||||
default="benchmark.csv",
|
||||
help="Output csv file for the benchmark results")
|
||||
bench_parser.add_argument("--projects", action="store", default="",
|
||||
help="Comma-separated list of projects to test")
|
||||
bench_parser.set_defaults(func=benchmark)
|
||||
|
||||
bench_subparsers = bench_parser.add_subparsers()
|
||||
bench_compare_parser = bench_subparsers.add_parser(
|
||||
"compare",
|
||||
help="Compare benchmark runs.")
|
||||
bench_compare_parser.add_argument("--old", action="store", required=True,
|
||||
help="Benchmark reference results to "
|
||||
"compare agains.")
|
||||
bench_compare_parser.add_argument("--new", action="store", required=True,
|
||||
help="New benchmark results to check.")
|
||||
bench_compare_parser.add_argument("-o", "--output",
|
||||
action="store", required=True,
|
||||
help="Output file for plots.")
|
||||
bench_compare_parser.set_defaults(func=benchmark_compare)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(parser, args)
|
||||
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
"""
|
||||
Static Analyzer qualification infrastructure.
|
||||
|
||||
This source file contains all the functionality related to benchmarking
|
||||
the analyzer on a set projects. Right now, this includes measuring
|
||||
execution time and peak memory usage. Benchmark runs analysis on every
|
||||
project multiple times to get a better picture about the distribution
|
||||
of measured values.
|
||||
|
||||
Additionally, this file includes a comparison routine for two benchmarking
|
||||
results that plots the result together on one chart.
|
||||
"""
|
||||
|
||||
import SATestUtils as utils
|
||||
from SATestBuild import ProjectTester, stdout, TestInfo
|
||||
from ProjectMap import ProjectInfo
|
||||
|
||||
import pandas as pd
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
INDEX_COLUMN = "index"
|
||||
|
||||
|
||||
def _save(data: pd.DataFrame, file_path: str):
|
||||
data.to_csv(file_path, index_label=INDEX_COLUMN)
|
||||
|
||||
|
||||
def _load(file_path: str) -> pd.DataFrame:
|
||||
return pd.read_csv(file_path, index_col=INDEX_COLUMN)
|
||||
|
||||
|
||||
class Benchmark:
|
||||
"""
|
||||
Becnhmark class encapsulates one functionality: it runs the analysis
|
||||
multiple times for the given set of projects and stores results in the
|
||||
specified file.
|
||||
"""
|
||||
def __init__(self, projects: List[ProjectInfo], iterations: int,
|
||||
output_path: str):
|
||||
self.projects = projects
|
||||
self.iterations = iterations
|
||||
self.out = output_path
|
||||
|
||||
def run(self):
|
||||
results = [self._benchmark_project(project)
|
||||
for project in self.projects]
|
||||
|
||||
data = pd.concat(results, ignore_index=True)
|
||||
_save(data, self.out)
|
||||
|
||||
def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
|
||||
if not project.enabled:
|
||||
stdout(f" \n\n--- Skipping disabled project {project.name}\n")
|
||||
return
|
||||
|
||||
stdout(f" \n\n--- Benchmarking project {project.name}\n")
|
||||
|
||||
test_info = TestInfo(project)
|
||||
tester = ProjectTester(test_info, silent=True)
|
||||
project_dir = tester.get_project_dir()
|
||||
output_dir = tester.get_output_dir()
|
||||
|
||||
raw_data = []
|
||||
|
||||
for i in range(self.iterations):
|
||||
stdout(f"Iteration #{i + 1}")
|
||||
time, mem = tester.build(project_dir, output_dir)
|
||||
raw_data.append({"time": time, "memory": mem,
|
||||
"iteration": i, "project": project.name})
|
||||
stdout(f"time: {utils.time_to_str(time)}, "
|
||||
f"peak memory: {utils.memory_to_str(mem)}")
|
||||
|
||||
return pd.DataFrame(raw_data)
|
||||
|
||||
|
||||
def compare(old_path: str, new_path: str, plot_file: str):
|
||||
"""
|
||||
Compare two benchmarking results stored as .csv files
|
||||
and produce a plot in the specified file.
|
||||
"""
|
||||
old = _load(old_path)
|
||||
new = _load(new_path)
|
||||
|
||||
old_projects = set(old["project"])
|
||||
new_projects = set(new["project"])
|
||||
common_projects = old_projects & new_projects
|
||||
|
||||
# Leave only rows for projects common to both dataframes.
|
||||
old = old[old["project"].isin(common_projects)]
|
||||
new = new[new["project"].isin(common_projects)]
|
||||
|
||||
old, new = _normalize(old, new)
|
||||
|
||||
# Seaborn prefers all the data to be in one dataframe.
|
||||
old["kind"] = "old"
|
||||
new["kind"] = "new"
|
||||
data = pd.concat([old, new], ignore_index=True)
|
||||
|
||||
# TODO: compare data in old and new dataframes using statistical tests
|
||||
# to check if they belong to the same distribution
|
||||
_plot(data, plot_file)
|
||||
|
||||
|
||||
def _normalize(old: pd.DataFrame,
|
||||
new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
||||
# This creates a dataframe with all numerical data averaged.
|
||||
means = old.groupby("project").mean()
|
||||
return _normalize_impl(old, means), _normalize_impl(new, means)
|
||||
|
||||
|
||||
def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
|
||||
# Right now 'means' has one row corresponding to one project,
|
||||
# while 'data' has N rows for each project (one for each iteration).
|
||||
#
|
||||
# In order for us to work easier with this data, we duplicate
|
||||
# 'means' data to match the size of the 'data' dataframe.
|
||||
#
|
||||
# All the columns from 'data' will maintain their names, while
|
||||
# new columns coming from 'means' will have "_mean" suffix.
|
||||
joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
|
||||
_normalize_key(joined_data, "time")
|
||||
_normalize_key(joined_data, "memory")
|
||||
return joined_data
|
||||
|
||||
|
||||
def _normalize_key(data: pd.DataFrame, key: str):
|
||||
norm_key = _normalized_name(key)
|
||||
mean_key = f"{key}_mean"
|
||||
data[norm_key] = data[key] / data[mean_key]
|
||||
|
||||
|
||||
def _normalized_name(name: str) -> str:
|
||||
return f"normalized {name}"
|
||||
|
||||
|
||||
def _plot(data: pd.DataFrame, plot_file: str):
|
||||
import matplotlib
|
||||
import seaborn as sns
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
sns.set_style("whitegrid")
|
||||
# We want to have time and memory charts one above the other.
|
||||
figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
|
||||
|
||||
def _subplot(key: str, ax: matplotlib.axes.Axes):
|
||||
sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
|
||||
data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
|
||||
|
||||
_subplot("time", ax1)
|
||||
# No need to have xlabels on both top and bottom charts.
|
||||
ax1.set_xlabel("")
|
||||
|
||||
_subplot("memory", ax2)
|
||||
# The legend on the top chart is enough.
|
||||
ax2.get_legend().remove()
|
||||
|
||||
figure.savefig(plot_file)
|
|
@ -87,10 +87,18 @@ class StreamToLogger:
|
|||
return 0
|
||||
|
||||
|
||||
Logger = logging.getLogger("main")
|
||||
LOCAL = threading.local()
|
||||
LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
|
||||
LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
|
||||
|
||||
|
||||
def init_logger(name: str):
|
||||
# TODO: use debug levels for VERBOSE messages
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
LOCAL.stdout = StreamToLogger(logger, logging.INFO)
|
||||
LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
|
||||
|
||||
|
||||
init_logger("main")
|
||||
|
||||
|
||||
def stderr(message: str):
|
||||
|
@ -102,7 +110,6 @@ def stdout(message: str):
|
|||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
|
||||
|
||||
|
||||
|
@ -298,12 +305,13 @@ class ProjectTester:
|
|||
"""
|
||||
A component aggregating testing for one project.
|
||||
"""
|
||||
def __init__(self, test_info: TestInfo):
|
||||
def __init__(self, test_info: TestInfo, silent: bool = False):
|
||||
self.project = test_info.project
|
||||
self.override_compiler = test_info.override_compiler
|
||||
self.extra_analyzer_config = test_info.extra_analyzer_config
|
||||
self.is_reference_build = test_info.is_reference_build
|
||||
self.strictness = test_info.strictness
|
||||
self.silent = silent
|
||||
|
||||
def test(self) -> bool:
|
||||
"""
|
||||
|
@ -312,20 +320,19 @@ class ProjectTester:
|
|||
to the :param strictness: criteria.
|
||||
"""
|
||||
if not self.project.enabled:
|
||||
stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
|
||||
self.out(
|
||||
f" \n\n--- Skipping disabled project {self.project.name}\n")
|
||||
return True
|
||||
|
||||
stdout(f" \n\n--- Building project {self.project.name}\n")
|
||||
self.out(f" \n\n--- Building project {self.project.name}\n")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
project_dir = self.get_project_dir()
|
||||
if VERBOSE >= 1:
|
||||
stdout(f" Build directory: {project_dir}.\n")
|
||||
self.vout(f" Build directory: {project_dir}.\n")
|
||||
|
||||
# Set the build results directory.
|
||||
output_dir = self.get_output_dir()
|
||||
output_dir = os.path.join(project_dir, output_dir)
|
||||
|
||||
self.build(project_dir, output_dir)
|
||||
check_build(output_dir)
|
||||
|
@ -336,8 +343,8 @@ class ProjectTester:
|
|||
else:
|
||||
passed = run_cmp_results(project_dir, self.strictness)
|
||||
|
||||
stdout(f"Completed tests for project {self.project.name} "
|
||||
f"(time: {time.time() - start_time:.2f}).\n")
|
||||
self.out(f"Completed tests for project {self.project.name} "
|
||||
f"(time: {time.time() - start_time:.2f}).\n")
|
||||
|
||||
return passed
|
||||
|
||||
|
@ -346,22 +353,23 @@ class ProjectTester:
|
|||
|
||||
def get_output_dir(self) -> str:
|
||||
if self.is_reference_build:
|
||||
return REF_PREFIX + OUTPUT_DIR_NAME
|
||||
dirname = REF_PREFIX + OUTPUT_DIR_NAME
|
||||
else:
|
||||
return OUTPUT_DIR_NAME
|
||||
dirname = OUTPUT_DIR_NAME
|
||||
|
||||
def build(self, directory: str, output_dir: str):
|
||||
return os.path.join(self.get_project_dir(), dirname)
|
||||
|
||||
def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
|
||||
build_log_path = get_build_log_path(output_dir)
|
||||
|
||||
stdout(f"Log file: {build_log_path}\n")
|
||||
stdout(f"Output directory: {output_dir}\n")
|
||||
self.out(f"Log file: {build_log_path}\n")
|
||||
self.out(f"Output directory: {output_dir}\n")
|
||||
|
||||
remove_log_file(output_dir)
|
||||
|
||||
# Clean up scan build results.
|
||||
if os.path.exists(output_dir):
|
||||
if VERBOSE >= 1:
|
||||
stdout(f" Removing old results: {output_dir}\n")
|
||||
self.vout(f" Removing old results: {output_dir}\n")
|
||||
|
||||
shutil.rmtree(output_dir)
|
||||
|
||||
|
@ -374,7 +382,7 @@ class ProjectTester:
|
|||
self._download_and_patch(directory, build_log_file)
|
||||
run_cleanup_script(directory, build_log_file)
|
||||
build_time, memory = self.scan_build(directory, output_dir,
|
||||
build_log_file)
|
||||
build_log_file)
|
||||
else:
|
||||
build_time, memory = self.analyze_preprocessed(directory,
|
||||
output_dir)
|
||||
|
@ -384,9 +392,11 @@ class ProjectTester:
|
|||
normalize_reference_results(directory, output_dir,
|
||||
self.project.mode)
|
||||
|
||||
stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
|
||||
f"peak memory: {utils.memory_to_str(memory)}). "
|
||||
f"See the log for more details: {build_log_path}\n")
|
||||
self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
|
||||
f"peak memory: {utils.memory_to_str(memory)}). "
|
||||
f"See the log for more details: {build_log_path}\n")
|
||||
|
||||
return build_time, memory
|
||||
|
||||
def scan_build(self, directory: str, output_dir: str,
|
||||
build_log_file: IO) -> Tuple[float, int]:
|
||||
|
@ -454,8 +464,7 @@ class ProjectTester:
|
|||
|
||||
command_to_run = command_prefix + command
|
||||
|
||||
if VERBOSE >= 1:
|
||||
stdout(f" Executing: {command_to_run}\n")
|
||||
self.vout(f" Executing: {command_to_run}\n")
|
||||
|
||||
time, mem = utils.check_and_measure_call(
|
||||
command_to_run, cwd=cwd,
|
||||
|
@ -522,8 +531,7 @@ class ProjectTester:
|
|||
log_path = os.path.join(fail_path, file_name + ".stderr.txt")
|
||||
with open(log_path, "w+") as log_file:
|
||||
try:
|
||||
if VERBOSE >= 1:
|
||||
stdout(f" Executing: {command}\n")
|
||||
self.vout(f" Executing: {command}\n")
|
||||
|
||||
time, mem = utils.check_and_measure_call(
|
||||
command, cwd=directory, stderr=log_file,
|
||||
|
@ -592,8 +600,10 @@ class ProjectTester:
|
|||
f"for the '{self.project.name}' project")
|
||||
|
||||
def _download_from_git(self, directory: str, build_log_file: IO):
|
||||
repo = self.project.origin
|
||||
cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
|
||||
check_call(f"git clone --recursive {self.project.origin} {cached_source}",
|
||||
|
||||
check_call(f"git clone --recursive {repo} {cached_source}",
|
||||
cwd=directory, stderr=build_log_file,
|
||||
stdout=build_log_file, shell=True)
|
||||
check_call(f"git checkout --quiet {self.project.commit}",
|
||||
|
@ -624,16 +634,15 @@ class ProjectTester:
|
|||
out=LOCAL.stdout, err=LOCAL.stderr,
|
||||
verbose=VERBOSE)
|
||||
|
||||
@staticmethod
|
||||
def _apply_patch(directory: str, build_log_file: IO):
|
||||
def _apply_patch(self, directory: str, build_log_file: IO):
|
||||
patchfile_path = os.path.join(directory, PATCHFILE_NAME)
|
||||
patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
|
||||
|
||||
if not os.path.exists(patchfile_path):
|
||||
stdout(" No local patches.\n")
|
||||
self.out(" No local patches.\n")
|
||||
return
|
||||
|
||||
stdout(" Applying patch.\n")
|
||||
self.out(" Applying patch.\n")
|
||||
try:
|
||||
check_call(f"patch -p1 < '{patchfile_path}'",
|
||||
cwd=patched_source,
|
||||
|
@ -646,6 +655,14 @@ class ProjectTester:
|
|||
f"See {build_log_file.name} for details.\n")
|
||||
sys.exit(1)
|
||||
|
||||
def out(self, what: str):
|
||||
if not self.silent:
|
||||
stdout(what)
|
||||
|
||||
def vout(self, what: str):
|
||||
if VERBOSE >= 1:
|
||||
self.out(what)
|
||||
|
||||
|
||||
class TestProjectThread(threading.Thread):
|
||||
def __init__(self, tasks_queue: TestQueue,
|
||||
|
@ -668,10 +685,7 @@ class TestProjectThread(threading.Thread):
|
|||
while not self.tasks_queue.empty():
|
||||
try:
|
||||
test_info = self.tasks_queue.get()
|
||||
|
||||
Logger = logging.getLogger(test_info.project.name)
|
||||
LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
|
||||
LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
|
||||
init_logger(test_info.project.name)
|
||||
|
||||
tester = ProjectTester(test_info)
|
||||
if not tester.test():
|
||||
|
|
|
@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
|
|||
project_dir = tester.get_project_dir()
|
||||
|
||||
tester.is_reference_build = True
|
||||
ref_results_path = os.path.join(project_dir, tester.get_output_dir())
|
||||
ref_results_path = tester.get_output_dir()
|
||||
|
||||
tester.is_reference_build = False
|
||||
created_results_path = os.path.join(project_dir, tester.get_output_dir())
|
||||
created_results_path = tester.get_output_dir()
|
||||
|
||||
if not os.path.exists(created_results_path):
|
||||
print("New results not found, was SATestBuild.py previously run?",
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
graphviz
|
||||
humanize
|
||||
matplotlib
|
||||
pandas
|
||||
psutil
|
||||
seaborn
|
||||
|
|
Loading…
Reference in New Issue