From 242962aed3a1741f03e698b4dc08325e34dac2ee Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 22 Feb 2023 13:15:14 -0800 Subject: [PATCH 1/6] Provide a tool that allows downloading logs when simulation RocksDB failures A script, rocksdb_logtool.py, is available to upload/download generated XML/JSON log files when test harness 2 detects that the test is failed, and the script detects that the test is using RocksDB. To upload, no actions is needed, using the regular ``` joshua start --tarball correctness.tgz ``` The build system will automatically pack the rocksdb_logtool.py into the tarball and test harness 2 will call the script if it thinks the test is failed. To download, simply provide the ensemble id and test uid, e.g. ``` python3 rocksdb_logtool.py download --ensemble-id 20230222-204240-xiaogesu-cb6ea277a898f134 --test-uid ab6fb792-088f-49d6-92d2-43bc4fb81668 ``` Note the test UID can be retrieved by ``` joshua tail ensemble_id ``` output, it is in the field `TestUID` in element of test harness 2 generated XML. For convenience, it is possible to do a ``` python3 rocksdb_logtool.py list --ensemble-id ensemble-id ``` to generate all possible download commands for failed tests. However, the list subcommand will *NOT* verify if the test failure is coming from RocksDB, i.e. other test failues may be included and it is the caller's responsibility to verify. If the test is not RocksDB related, the download would fail as nothing is uploaded. --- cmake/AddFdbTest.cmake | 8 +- contrib/TestHarness2/test_harness/run.py | 27 +++- contrib/rocksdb_logtool.py | 198 +++++++++++++++++++++++ 3 files changed, 230 insertions(+), 3 deletions(-) create mode 100755 contrib/rocksdb_logtool.py diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index 8465e76fe5..e8ad2dcfe8 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -270,7 +270,13 @@ function(stage_correctness_package) list(APPEND package_files "${out_file}") endforeach() - list(APPEND package_files ${test_files} ${external_files}) + add_custom_command( + OUTPUT "${STAGE_OUT_DIR}/rocksdb_logtool.py" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/contrib/rocksdb_logtool.py" "${STAGE_OUT_DIR}/rocksdb_logtool.py" + DEPENDS "${CMAKE_SOURCE_DIR}/contrib/rocksdb_logtool.py" + ) + + list(APPEND package_files ${test_files} ${external_files} "${STAGE_OUT_DIR}/rocksdb_logtool.py") if(STAGE_OUT_FILES) set(${STAGE_OUT_FILES} ${package_files} PARENT_SCOPE) endif() diff --git a/contrib/TestHarness2/test_harness/run.py b/contrib/TestHarness2/test_harness/run.py index 33108784ed..aeff64a88c 100644 --- a/contrib/TestHarness2/test_harness/run.py +++ b/contrib/TestHarness2/test_harness/run.py @@ -88,7 +88,8 @@ class TestPicker: if not self.tests: raise Exception( - "No tests to run! Please check if tests are included/excluded incorrectly or old binaries are missing for restarting tests") + "No tests to run! Please check if tests are included/excluded incorrectly or old binaries are missing for restarting tests" + ) def add_time(self, test_file: Path, run_time: int, out: SummaryTree) -> None: # getting the test name is fairly inefficient. But since we only have 100s of tests, I won't bother @@ -144,7 +145,11 @@ class TestPicker: candidates: List[Path] = [] dirs = path.parent.parts version_expr = dirs[-1].split("_") - if (version_expr[0] == "from" or version_expr[0] == "to") and len(version_expr) == 4 and version_expr[2] == "until": + if ( + (version_expr[0] == "from" or version_expr[0] == "to") + and len(version_expr) == 4 + and version_expr[2] == "until" + ): max_version = Version.parse(version_expr[3]) min_version = Version.parse(version_expr[1]) for ver, binary in self.old_binaries.items(): @@ -384,6 +389,21 @@ class TestRun: def delete_simdir(self): shutil.rmtree(self.temp_path / Path("simfdb")) + def _run_rocksdb_logtool(self): + """Calls RocksDB LogTool to upload the test logs if 1) test failed 2) test is RocksDB related""" + if not os.path.exists("rocksdb_logtool.py"): + raise RuntimeError("rocksdb_logtool.py missing") + command = [ + "python3", + "rocksdb_logtool.py", + "report", + "--test-uid", + str(self.uid), + "--log-directory", + str(self.temp_path), + ] + subprocess.run(command, check=True) + def run(self): command: List[str] = [] env: Dict[str, str] = os.environ.copy() @@ -473,6 +493,9 @@ class TestRun: self.summary.valgrind_out_file = valgrind_file self.summary.error_out = err_out self.summary.summarize(self.temp_path, " ".join(command)) + + if not self.summary.ok(): + self._run_rocksdb_logtool() return self.summary.ok() diff --git a/contrib/rocksdb_logtool.py b/contrib/rocksdb_logtool.py new file mode 100755 index 0000000000..2f6625014b --- /dev/null +++ b/contrib/rocksdb_logtool.py @@ -0,0 +1,198 @@ +#! /usr/bin/env python3 + +"""rocksdb_logtool.py + +Provides uploading/downloading FoundationDB XML log files to Joshua cluster. + +This is a temporary solution for Joshua tests with RocksDB as the storage server +of FoundationDB. Since RocksDB is *not* deterministic, FoundationDB simulation +failures in Joshua may not be reproducible locally. With this script, it is +possible to upload the XML logs on the Joshua side and download them to local +directories. +""" + +import argparse +import logging +import os +import os.path +import re +import pathlib +import subprocess +import tempfile + +import fdb +import joshua.joshua_model as joshua + +from typing import List + +# Defined in SimulatedCluster.actor.cpp:SimulationConfig::setStorageEngine +ROCKSDB_TRACEEVENT_STRING = ["RocksDBNonDeterminism", "ShardedRocksDBNonDeterminism"] + +# e.g. /var/joshua/ensembles/20230221-051349-xiaogesu-c9fc5b230dcd91cf +ENSEMBLE_ID_REGEXP = re.compile(r"ensembles\/(?P[0-9A-Za-z\-_]+)$") + +# e.g. [0-9a-fA-F\-]+)\"") + +logger = logging.getLogger(__name__) + + +def _execute_grep(string: str, paths: List[pathlib.Path]) -> bool: + command = ["grep", "-F", string] + [str(path) for path in paths] + result = subprocess.run(command, stdout=subprocess.DEVNULL) + return result.returncode == 0 + + +def _is_rocksdb_test(log_files: List[pathlib.Path]) -> bool: + for event_str in ROCKSDB_TRACEEVENT_STRING: + if _execute_grep(event_str, log_files): + return True + return False + + +def _extract_ensemble_id(work_directory: str) -> str: + match = ENSEMBLE_ID_REGEXP.search(work_directory) + if not match: + return None + return match.groupdict()["ensemble_id"] + + +def _get_log_subspace(ensemble_id: str, test_uid: str): + ensemble_space = joshua.dir_ensembles + rocksdb_log_space = ensemble_space.create_or_open(joshua.db, "rocksdb_logs") + return rocksdb_log_space[bytes(ensemble_id, "utf-8")][bytes(test_uid, "utf-8")] + + +def _tar_xmls(xml_files: List[pathlib.Path], output_file_name: pathlib.Path): + command = ["tar", "-c", "-f", str(output_file_name), "--xz"] + [ + str(xml_file) for xml_file in xml_files + ] + logger.debug(f"Execute tar: {command}") + subprocess.run(command, check=True, stdout=subprocess.DEVNULL) + + +def _tar_extract(path_to_archive: pathlib.Path): + command = ["tar", "xf", str(path_to_archive)] + subprocess.run(command, check=True, stdout=subprocess.DEVNULL) + + +def report_error( + work_directory: str, log_directory: str, ensemble_id: str, test_uid: str +): + log_files = list(pathlib.Path(log_directory).glob("**/trace*.xml")) + if len(log_files) == 0: + logger.debug(f"No XML file found in directory {log_directory}") + log_files += list(pathlib.Path(log_directory).glob("**/trace*.json")) + if len(log_files) == 0: + logger.debug(f"No JSON file found in directory {log_directory}") + return + logger.debug(f"Total {len(log_files)} files found") + + if not _is_rocksdb_test(log_files): + logger.debug("Not a RocksDB test") + return + + ensemble_id = ensemble_id or _extract_ensemble_id(work_directory) + if not ensemble_id: + logger.debug(f"Ensemble ID missing in work directory {work_directory}") + raise RuntimeError(f"Ensemble ID missing in work directory {work_directory}") + logger.debug(f"Ensemble ID: {ensemble_id}") + + with tempfile.NamedTemporaryFile() as archive: + logger.debug(f"Tarfile: {archive.name}") + _tar_xmls(log_files, archive.name) + logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") + subspace = _get_log_subspace(ensemble_id, test_uid) + joshua._insert_blob(joshua.db, subspace, archive, offset=0) + + +def download_logs(ensemble_id: str, test_uid: str): + with tempfile.NamedTemporaryFile() as archive: + subspace = _get_log_subspace(ensemble_id, test_uid) + logger.debug( + f"Downloading the archive to {archive.name} at subspace {subspace}" + ) + joshua._read_blob(joshua.db, subspace, archive) + logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") + _tar_extract(archive.name) + + +def list_commands(ensemble_id: str): + for item in joshua.tail_results(ensemble_id, errors_only=True): + test_harness_output = item[4] + match = TEST_UID_REGEXP.search(test_harness_output) + if not match: + logger.warning(f"Test UID not found in {test_harness_output}") + continue + test_uid = match.groupdict()["uid"] + print(f"python3 {__file__} download --ensemble-id {ensemble_id} --test-uid {test_uid}") + + +def _setup_args(): + parser = argparse.ArgumentParser(prog="rocksdb_logtool.py") + + parser.add_argument( + "--cluster-file", type=str, default=None, help="Joshua FDB cluster file" + ) + + subparsers = parser.add_subparsers(help="Possible actions", dest="action") + + report_parser = subparsers.add_parser( + "report", help="Check the log file, upload them to Joshua cluster if necessary" + ) + report_parser.add_argument( + "--work-directory", type=str, default=os.getcwd(), help="Work directory" + ) + report_parser.add_argument( + "--log-directory", + type=str, + required=True, + help="Directory contains XML/JSON logs", + ) + report_parser.add_argument( + "--ensemble-id", type=str, default=None, required=False, help="Ensemble ID" + ) + report_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") + + download_parser = subparsers.add_parser( + "download", help="Download the log file from Joshua to local directory" + ) + download_parser.add_argument( + "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" + ) + download_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") + + list_parser = subparsers.add_parser( + "list", + help="List the possible download commands for failed tests in a given ensemble. NOTE: It is possible that the test is not relevant to RocksDB and no log file is available. It is the user's responsibility to verify if this happens.", + ) + list_parser.add_argument( + "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" + ) + + return parser.parse_args() + + +def _main(): + args = _setup_args() + + logging.basicConfig(level=logging.DEBUG) + + logger.debug(f"Using cluster file {args.cluster_file}") + joshua.open(args.cluster_file) + + if args.action == "report": + report_error( + work_directory=args.work_directory, + log_directory=args.log_directory, + ensemble_id=args.ensemble_id, + test_uid=args.test_uid, + ) + elif args.action == "download": + download_logs(ensemble_id=args.ensemble_id, test_uid=args.test_uid) + elif args.action == "list": + list_commands(ensemble_id=args.ensemble_id) + + +if __name__ == "__main__": + _main() From ce54bca2bdfbe72bc85a49073dbbe9a356c29c1d Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 22 Feb 2023 13:16:52 -0800 Subject: [PATCH 2/6] fixup! Escalate log level from DEBUG to INFO for less pollution --- contrib/rocksdb_logtool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/rocksdb_logtool.py b/contrib/rocksdb_logtool.py index 2f6625014b..0c78e25521 100755 --- a/contrib/rocksdb_logtool.py +++ b/contrib/rocksdb_logtool.py @@ -176,7 +176,7 @@ def _setup_args(): def _main(): args = _setup_args() - logging.basicConfig(level=logging.DEBUG) + logging.basicConfig(level=logging.INFO) logger.debug(f"Using cluster file {args.cluster_file}") joshua.open(args.cluster_file) From 282f681d13f95b922bec0827492d6928e406919b Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 22 Feb 2023 18:11:07 -0800 Subject: [PATCH 3/6] fixup! Address comments --- cmake/AddFdbTest.cmake | 8 +- contrib/TestHarness2/test_harness/run.py | 11 +- contrib/rocksdb_logtool.py | 198 ----------------------- 3 files changed, 10 insertions(+), 207 deletions(-) delete mode 100755 contrib/rocksdb_logtool.py diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index e8ad2dcfe8..6d0af130b4 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -271,12 +271,12 @@ function(stage_correctness_package) endforeach() add_custom_command( - OUTPUT "${STAGE_OUT_DIR}/rocksdb_logtool.py" - COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/contrib/rocksdb_logtool.py" "${STAGE_OUT_DIR}/rocksdb_logtool.py" - DEPENDS "${CMAKE_SOURCE_DIR}/contrib/rocksdb_logtool.py" + OUTPUT "${STAGE_OUT_DIR}/joshua_logtool.py" + COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/contrib/joshua_logtool.py" "${STAGE_OUT_DIR}/joshua_logtool.py" + DEPENDS "${CMAKE_SOURCE_DIR}/contrib/joshua_logtool.py" ) - list(APPEND package_files ${test_files} ${external_files} "${STAGE_OUT_DIR}/rocksdb_logtool.py") + list(APPEND package_files ${test_files} ${external_files} "${STAGE_OUT_DIR}/joshua_logtool.py") if(STAGE_OUT_FILES) set(${STAGE_OUT_FILES} ${package_files} PARENT_SCOPE) endif() diff --git a/contrib/TestHarness2/test_harness/run.py b/contrib/TestHarness2/test_harness/run.py index aeff64a88c..f922a1273f 100644 --- a/contrib/TestHarness2/test_harness/run.py +++ b/contrib/TestHarness2/test_harness/run.py @@ -390,17 +390,18 @@ class TestRun: shutil.rmtree(self.temp_path / Path("simfdb")) def _run_rocksdb_logtool(self): - """Calls RocksDB LogTool to upload the test logs if 1) test failed 2) test is RocksDB related""" - if not os.path.exists("rocksdb_logtool.py"): - raise RuntimeError("rocksdb_logtool.py missing") + """Calls Joshua LogTool to upload the test logs if 1) test failed 2) test is RocksDB related""" + if not os.path.exists("joshua_logtool.py"): + raise RuntimeError("joshua_logtool.py missing") command = [ "python3", - "rocksdb_logtool.py", - "report", + "joshua_logtool.py", + "upload", "--test-uid", str(self.uid), "--log-directory", str(self.temp_path), + "--check-rocksdb" ] subprocess.run(command, check=True) diff --git a/contrib/rocksdb_logtool.py b/contrib/rocksdb_logtool.py deleted file mode 100755 index 0c78e25521..0000000000 --- a/contrib/rocksdb_logtool.py +++ /dev/null @@ -1,198 +0,0 @@ -#! /usr/bin/env python3 - -"""rocksdb_logtool.py - -Provides uploading/downloading FoundationDB XML log files to Joshua cluster. - -This is a temporary solution for Joshua tests with RocksDB as the storage server -of FoundationDB. Since RocksDB is *not* deterministic, FoundationDB simulation -failures in Joshua may not be reproducible locally. With this script, it is -possible to upload the XML logs on the Joshua side and download them to local -directories. -""" - -import argparse -import logging -import os -import os.path -import re -import pathlib -import subprocess -import tempfile - -import fdb -import joshua.joshua_model as joshua - -from typing import List - -# Defined in SimulatedCluster.actor.cpp:SimulationConfig::setStorageEngine -ROCKSDB_TRACEEVENT_STRING = ["RocksDBNonDeterminism", "ShardedRocksDBNonDeterminism"] - -# e.g. /var/joshua/ensembles/20230221-051349-xiaogesu-c9fc5b230dcd91cf -ENSEMBLE_ID_REGEXP = re.compile(r"ensembles\/(?P[0-9A-Za-z\-_]+)$") - -# e.g. [0-9a-fA-F\-]+)\"") - -logger = logging.getLogger(__name__) - - -def _execute_grep(string: str, paths: List[pathlib.Path]) -> bool: - command = ["grep", "-F", string] + [str(path) for path in paths] - result = subprocess.run(command, stdout=subprocess.DEVNULL) - return result.returncode == 0 - - -def _is_rocksdb_test(log_files: List[pathlib.Path]) -> bool: - for event_str in ROCKSDB_TRACEEVENT_STRING: - if _execute_grep(event_str, log_files): - return True - return False - - -def _extract_ensemble_id(work_directory: str) -> str: - match = ENSEMBLE_ID_REGEXP.search(work_directory) - if not match: - return None - return match.groupdict()["ensemble_id"] - - -def _get_log_subspace(ensemble_id: str, test_uid: str): - ensemble_space = joshua.dir_ensembles - rocksdb_log_space = ensemble_space.create_or_open(joshua.db, "rocksdb_logs") - return rocksdb_log_space[bytes(ensemble_id, "utf-8")][bytes(test_uid, "utf-8")] - - -def _tar_xmls(xml_files: List[pathlib.Path], output_file_name: pathlib.Path): - command = ["tar", "-c", "-f", str(output_file_name), "--xz"] + [ - str(xml_file) for xml_file in xml_files - ] - logger.debug(f"Execute tar: {command}") - subprocess.run(command, check=True, stdout=subprocess.DEVNULL) - - -def _tar_extract(path_to_archive: pathlib.Path): - command = ["tar", "xf", str(path_to_archive)] - subprocess.run(command, check=True, stdout=subprocess.DEVNULL) - - -def report_error( - work_directory: str, log_directory: str, ensemble_id: str, test_uid: str -): - log_files = list(pathlib.Path(log_directory).glob("**/trace*.xml")) - if len(log_files) == 0: - logger.debug(f"No XML file found in directory {log_directory}") - log_files += list(pathlib.Path(log_directory).glob("**/trace*.json")) - if len(log_files) == 0: - logger.debug(f"No JSON file found in directory {log_directory}") - return - logger.debug(f"Total {len(log_files)} files found") - - if not _is_rocksdb_test(log_files): - logger.debug("Not a RocksDB test") - return - - ensemble_id = ensemble_id or _extract_ensemble_id(work_directory) - if not ensemble_id: - logger.debug(f"Ensemble ID missing in work directory {work_directory}") - raise RuntimeError(f"Ensemble ID missing in work directory {work_directory}") - logger.debug(f"Ensemble ID: {ensemble_id}") - - with tempfile.NamedTemporaryFile() as archive: - logger.debug(f"Tarfile: {archive.name}") - _tar_xmls(log_files, archive.name) - logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") - subspace = _get_log_subspace(ensemble_id, test_uid) - joshua._insert_blob(joshua.db, subspace, archive, offset=0) - - -def download_logs(ensemble_id: str, test_uid: str): - with tempfile.NamedTemporaryFile() as archive: - subspace = _get_log_subspace(ensemble_id, test_uid) - logger.debug( - f"Downloading the archive to {archive.name} at subspace {subspace}" - ) - joshua._read_blob(joshua.db, subspace, archive) - logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") - _tar_extract(archive.name) - - -def list_commands(ensemble_id: str): - for item in joshua.tail_results(ensemble_id, errors_only=True): - test_harness_output = item[4] - match = TEST_UID_REGEXP.search(test_harness_output) - if not match: - logger.warning(f"Test UID not found in {test_harness_output}") - continue - test_uid = match.groupdict()["uid"] - print(f"python3 {__file__} download --ensemble-id {ensemble_id} --test-uid {test_uid}") - - -def _setup_args(): - parser = argparse.ArgumentParser(prog="rocksdb_logtool.py") - - parser.add_argument( - "--cluster-file", type=str, default=None, help="Joshua FDB cluster file" - ) - - subparsers = parser.add_subparsers(help="Possible actions", dest="action") - - report_parser = subparsers.add_parser( - "report", help="Check the log file, upload them to Joshua cluster if necessary" - ) - report_parser.add_argument( - "--work-directory", type=str, default=os.getcwd(), help="Work directory" - ) - report_parser.add_argument( - "--log-directory", - type=str, - required=True, - help="Directory contains XML/JSON logs", - ) - report_parser.add_argument( - "--ensemble-id", type=str, default=None, required=False, help="Ensemble ID" - ) - report_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") - - download_parser = subparsers.add_parser( - "download", help="Download the log file from Joshua to local directory" - ) - download_parser.add_argument( - "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" - ) - download_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") - - list_parser = subparsers.add_parser( - "list", - help="List the possible download commands for failed tests in a given ensemble. NOTE: It is possible that the test is not relevant to RocksDB and no log file is available. It is the user's responsibility to verify if this happens.", - ) - list_parser.add_argument( - "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" - ) - - return parser.parse_args() - - -def _main(): - args = _setup_args() - - logging.basicConfig(level=logging.INFO) - - logger.debug(f"Using cluster file {args.cluster_file}") - joshua.open(args.cluster_file) - - if args.action == "report": - report_error( - work_directory=args.work_directory, - log_directory=args.log_directory, - ensemble_id=args.ensemble_id, - test_uid=args.test_uid, - ) - elif args.action == "download": - download_logs(ensemble_id=args.ensemble_id, test_uid=args.test_uid) - elif args.action == "list": - list_commands(ensemble_id=args.ensemble_id) - - -if __name__ == "__main__": - _main() From 28598a3100dfc7524a3e444d18afe9b4d527ecc8 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 22 Feb 2023 18:11:36 -0800 Subject: [PATCH 4/6] fixup! Add the joshua_logtool.py --- contrib/joshua_logtool.py | 204 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100755 contrib/joshua_logtool.py diff --git a/contrib/joshua_logtool.py b/contrib/joshua_logtool.py new file mode 100755 index 0000000000..e5b7cd7c0a --- /dev/null +++ b/contrib/joshua_logtool.py @@ -0,0 +1,204 @@ +#! /usr/bin/env python3 + +"""rocksdb_logtool.py + +Provides uploading/downloading FoundationDB log files to Joshua cluster. +""" + +import argparse +import logging +import os +import os.path +import re +import pathlib +import subprocess +import tempfile + +import fdb +import joshua.joshua_model as joshua + +from typing import List + +# Defined in SimulatedCluster.actor.cpp:SimulationConfig::setStorageEngine +ROCKSDB_TRACEEVENT_STRING = ["RocksDBNonDeterminism", "ShardedRocksDBNonDeterminism"] + +# e.g. /var/joshua/ensembles/20230221-051349-xiaogesu-c9fc5b230dcd91cf +ENSEMBLE_ID_REGEXP = re.compile(r"ensembles\/(?P[0-9A-Za-z\-_]+)$") + +# e.g. [0-9a-fA-F\-]+)\"") + +logger = logging.getLogger(__name__) + + +def _execute_grep(string: str, paths: List[pathlib.Path]) -> bool: + command = ["grep", "-F", string] + [str(path) for path in paths] + result = subprocess.run(command, stdout=subprocess.DEVNULL) + return result.returncode == 0 + + +def _is_rocksdb_test(log_files: List[pathlib.Path]) -> bool: + for event_str in ROCKSDB_TRACEEVENT_STRING: + if _execute_grep(event_str, log_files): + return True + return False + + +def _extract_ensemble_id(work_directory: str) -> str: + match = ENSEMBLE_ID_REGEXP.search(work_directory) + if not match: + return None + return match.groupdict()["ensemble_id"] + + +def _get_log_subspace(ensemble_id: str, test_uid: str): + ensemble_space = joshua.dir_ensembles + log_space = ensemble_space.create_or_open(joshua.db, "simulation_logs") + return log_space[bytes(ensemble_id, "utf-8")][bytes(test_uid, "utf-8")] + + +def _tar_logs(log_files: List[pathlib.Path], output_file_name: pathlib.Path): + command = ["tar", "-c", "-f", str(output_file_name), "--xz"] + [ + str(log_file) for log_file in log_files + ] + logger.debug(f"Execute tar: {command}") + subprocess.run(command, check=True, stdout=subprocess.DEVNULL) + + +def _tar_extract(path_to_archive: pathlib.Path): + command = ["tar", "xf", str(path_to_archive)] + subprocess.run(command, check=True, stdout=subprocess.DEVNULL) + + +def report_error( + work_directory: str, + log_directory: str, + ensemble_id: str, + test_uid: str, + check_rocksdb: bool, +): + log_files = list(pathlib.Path(log_directory).glob("**/trace*.xml")) + if len(log_files) == 0: + logger.debug(f"No XML file found in directory {log_directory}") + log_files += list(pathlib.Path(log_directory).glob("**/trace*.json")) + if len(log_files) == 0: + logger.debug(f"No JSON file found in directory {log_directory}") + return + logger.debug(f"Total {len(log_files)} files found") + + if check_rocksdb and not _is_rocksdb_test(log_files): + logger.debug("Not a RocksDB test") + return + + ensemble_id = ensemble_id or _extract_ensemble_id(work_directory) + if not ensemble_id: + logger.debug(f"Ensemble ID missing in work directory {work_directory}") + raise RuntimeError(f"Ensemble ID missing in work directory {work_directory}") + logger.debug(f"Ensemble ID: {ensemble_id}") + + with tempfile.NamedTemporaryFile() as archive: + logger.debug(f"Tarfile: {archive.name}") + _tar_logs(log_files, archive.name) + logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") + subspace = _get_log_subspace(ensemble_id, test_uid) + joshua._insert_blob(joshua.db, subspace, archive, offset=0) + + +def download_logs(ensemble_id: str, test_uid: str): + with tempfile.NamedTemporaryFile() as archive: + subspace = _get_log_subspace(ensemble_id, test_uid) + logger.debug( + f"Downloading the archive to {archive.name} at subspace {subspace}" + ) + joshua._read_blob(joshua.db, subspace, archive) + logger.debug(f"Tarfile size: {os.path.getsize(archive.name)}") + _tar_extract(archive.name) + + +def list_commands(ensemble_id: str): + for item in joshua.tail_results(ensemble_id, errors_only=True): + test_harness_output = item[4] + match = TEST_UID_REGEXP.search(test_harness_output) + if not match: + logger.warning(f"Test UID not found in {test_harness_output}") + continue + test_uid = match.groupdict()["uid"] + print( + f"python3 {__file__} download --ensemble-id {ensemble_id} --test-uid {test_uid}" + ) + + +def _setup_args(): + parser = argparse.ArgumentParser(prog="rocksdb_logtool.py") + + parser.add_argument( + "--cluster-file", type=str, default=None, help="Joshua FDB cluster file" + ) + + subparsers = parser.add_subparsers(help="Possible actions", dest="action") + + upload_parser = subparsers.add_parser( + "upload", help="Check the log file, upload them to Joshua cluster if necessary" + ) + upload_parser.add_argument( + "--work-directory", type=str, default=os.getcwd(), help="Work directory" + ) + upload_parser.add_argument( + "--log-directory", + type=str, + required=True, + help="Directory contains XML/JSON logs", + ) + upload_parser.add_argument( + "--ensemble-id", type=str, default=None, required=False, help="Ensemble ID" + ) + upload_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") + upload_parser.add_argument( + "--check-rocksdb", + action="store_true", + help="Only upload logs when RocksDB is involved", + ) + + download_parser = subparsers.add_parser( + "download", help="Download the log file from Joshua to local directory" + ) + download_parser.add_argument( + "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" + ) + download_parser.add_argument("--test-uid", type=str, required=True, help="Test UID") + + list_parser = subparsers.add_parser( + "list", + help="List the possible download commands for failed tests in a given ensemble. NOTE: It is possible that the test is not relevant to RocksDB and no log file is available. It is the user's responsibility to verify if this happens.", + ) + list_parser.add_argument( + "--ensemble-id", type=str, required=True, help="Joshua ensemble ID" + ) + + return parser.parse_args() + + +def _main(): + args = _setup_args() + + logging.basicConfig(level=logging.INFO) + + logger.debug(f"Using cluster file {args.cluster_file}") + joshua.open(args.cluster_file) + + if args.action == "upload": + report_error( + work_directory=args.work_directory, + log_directory=args.log_directory, + ensemble_id=args.ensemble_id, + test_uid=args.test_uid, + check_rocksdb=args.check_rocksdb, + ) + elif args.action == "download": + download_logs(ensemble_id=args.ensemble_id, test_uid=args.test_uid) + elif args.action == "list": + list_commands(ensemble_id=args.ensemble_id) + + +if __name__ == "__main__": + _main() From 6408208c382027b055e7580736af48a8f74b101e Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Thu, 23 Feb 2023 10:49:11 -0800 Subject: [PATCH 5/6] Update contrib/joshua_logtool.py Co-authored-by: Jingyu Zhou --- contrib/joshua_logtool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/joshua_logtool.py b/contrib/joshua_logtool.py index e5b7cd7c0a..b6d69fdbfe 100755 --- a/contrib/joshua_logtool.py +++ b/contrib/joshua_logtool.py @@ -156,7 +156,7 @@ def _setup_args(): upload_parser.add_argument( "--check-rocksdb", action="store_true", - help="Only upload logs when RocksDB is involved", + help="If true, only upload logs when RocksDB is involved; otherwise, always upload logs.", ) download_parser = subparsers.add_parser( From 4c9c357d2cf85ee9be75f32a56227046b731c595 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Thu, 23 Feb 2023 13:03:33 -0800 Subject: [PATCH 6/6] Change the storage directory to joshua/ensembles/results/applications/simulation_logs --- contrib/joshua_logtool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/joshua_logtool.py b/contrib/joshua_logtool.py index b6d69fdbfe..479542a22a 100755 --- a/contrib/joshua_logtool.py +++ b/contrib/joshua_logtool.py @@ -52,8 +52,8 @@ def _extract_ensemble_id(work_directory: str) -> str: def _get_log_subspace(ensemble_id: str, test_uid: str): - ensemble_space = joshua.dir_ensembles - log_space = ensemble_space.create_or_open(joshua.db, "simulation_logs") + subspace = joshua.dir_ensemble_results_application + log_space = subspace.create_or_open(joshua.db, "simulation_logs") return log_space[bytes(ensemble_id, "utf-8")][bytes(test_uid, "utf-8")]