From 367e5ed1dd1694feb560af49ee17fbf3c6274c9f Mon Sep 17 00:00:00 2001 From: Cathy Wong Date: Wed, 1 Dec 2021 19:42:56 -0500 Subject: [PATCH] MD UT: Use pytest.mark.forked for profiling tests --- tests/ut/python/dataset/test_profiling.py | 58 ++++++++-------- .../dataset/test_profiling_startstop.py | 13 ++-- .../profiler/parser/test_minddata_analyzer.py | 68 ++++++++++--------- 3 files changed, 72 insertions(+), 67 deletions(-) diff --git a/tests/ut/python/dataset/test_profiling.py b/tests/ut/python/dataset/test_profiling.py index 4e26cd4a662..9c623efdd8c 100644 --- a/tests/ut/python/dataset/test_profiling.py +++ b/tests/ut/python/dataset/test_profiling.py @@ -18,6 +18,7 @@ Testing profiling support in DE import json import os import numpy as np +import pytest import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C @@ -41,6 +42,7 @@ file_name_map_rank_id = {"test_profiling_simple_pipeline": "0", "test_profiling_seq_pipelines_repeat": "9"} +@pytest.mark.forked class TestMinddataProfilingManager: """ Test MinddataProfilingManager @@ -53,9 +55,9 @@ class TestMinddataProfilingManager: # Get instance pointer for MindData profiling manager self.md_profiler = cde.GlobalContext.profiling_manager() - self._PIPELINE_FILE = "./pipeline_profiling" - self._CPU_UTIL_FILE = "./minddata_cpu_utilization" - self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" + self._pipeline_file = "./pipeline_profiling" + self._cpu_util_file = "./minddata_cpu_utilization" + self._dataset_iterator_file = "./dataset_iterator_profiling" def setup_method(self): """ @@ -64,9 +66,9 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" # Confirm MindData Profiling files do not yet exist assert os.path.exists(pipeline_file) is False @@ -91,9 +93,9 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" # Delete MindData profiling files generated from the test. os.remove(pipeline_file) @@ -132,9 +134,9 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["data"]) @@ -172,8 +174,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["gen"]) @@ -217,8 +219,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" # In source1 dataset: Number of rows is 3; its values are 0, 1, 2 def source1(): @@ -273,8 +275,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9 def source1(): @@ -342,8 +344,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" def source1(): for i in range(8000): @@ -393,8 +395,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" # Create this common pipeline # Cifar10 -> Map -> Map -> Batch -> Repeat @@ -446,8 +448,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" source = [(np.array([x]),) for x in range(64)] data1 = ds.GeneratorDataset(source, ["data"]) @@ -498,8 +500,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" source = [(np.array([x]),) for x in range(64)] data2 = ds.GeneratorDataset(source, ["data"]) @@ -550,8 +552,8 @@ class TestMinddataProfilingManager: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" source = [(np.array([x]),) for x in range(64)] data2 = ds.GeneratorDataset(source, ["data"]) diff --git a/tests/ut/python/dataset/test_profiling_startstop.py b/tests/ut/python/dataset/test_profiling_startstop.py index 74dda652911..649f55b9303 100644 --- a/tests/ut/python/dataset/test_profiling_startstop.py +++ b/tests/ut/python/dataset/test_profiling_startstop.py @@ -37,6 +37,7 @@ file_name_map_rank_id = {"test_profiling_early_stop": "0", "test_profiling_stop_nostart": "4"} +@pytest.mark.forked class TestMindDataProfilingStartStop: """ Test MindData Profiling Manager Start-Stop Support @@ -46,9 +47,9 @@ class TestMindDataProfilingStartStop: """ Run once for the class """ - self._PIPELINE_FILE = "./pipeline_profiling" - self._CPU_UTIL_FILE = "./minddata_cpu_utilization" - self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" + self._pipeline_file = "./pipeline_profiling" + self._cpu_util_file = "./minddata_cpu_utilization" + self._dataset_iterator_file = "./dataset_iterator_profiling" def setup_method(self): """ @@ -57,9 +58,9 @@ class TestMindDataProfilingStartStop: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - self.pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - self.cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - self.dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + self.pipeline_file = self._pipeline_file + "_" + file_id + ".json" + self.cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + self.dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" # Confirm MindData Profiling files do not yet exist assert os.path.exists(self.pipeline_file) is False diff --git a/tests/ut/python/profiler/parser/test_minddata_analyzer.py b/tests/ut/python/profiler/parser/test_minddata_analyzer.py index dea9e33141f..22dff05a6ef 100644 --- a/tests/ut/python/profiler/parser/test_minddata_analyzer.py +++ b/tests/ut/python/profiler/parser/test_minddata_analyzer.py @@ -19,6 +19,7 @@ import csv import json import os import numpy as np +import pytest import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C @@ -30,6 +31,7 @@ file_name_map_rank_id = {"test_analyze_basic": "0", "test_analyze_sequential_pipelines_invalid": "1"} +@pytest.mark.forked class TestMinddataProfilingAnalyzer: """ Test the MinddataProfilingAnalyzer class @@ -42,15 +44,15 @@ class TestMinddataProfilingAnalyzer: # Get instance pointer for MindData profiling manager self.md_profiler = cde.GlobalContext.profiling_manager() - self._PIPELINE_FILE = "./pipeline_profiling" - self._CPU_UTIL_FILE = "./minddata_cpu_utilization" - self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling" - self._SUMMARY_JSON_FILE = "./minddata_pipeline_summary" - self._SUMMARY_CSV_FILE = "./minddata_pipeline_summary" - self._ANALYZE_FILE_PATH = "./" + self._pipeline_file = "./pipeline_profiling" + self._cpu_util_file = "./minddata_cpu_utilization" + self._dataset_iterator_file = "./dataset_iterator_profiling" + self._summary_json_file = "./minddata_pipeline_summary" + self._summary_csv_file = "./minddata_pipeline_summary" + self._analyze_file_path = "./" # This is the set of keys for success case - self._EXPECTED_SUMMARY_KEYS_SUCCESS = \ + self._expected_summary_keys_success = \ ['avg_cpu_pct', 'avg_cpu_pct_per_worker', 'children_ids', 'num_workers', 'op_ids', 'op_names', 'parent_id', 'per_batch_time', 'per_pipeline_time', 'per_push_queue_time', 'pipeline_ops', 'queue_average_size', 'queue_empty_freq_pct', 'queue_utilization_pct'] @@ -62,11 +64,11 @@ class TestMinddataProfilingAnalyzer: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" - summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" - summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" + summary_json_file = self._summary_json_file + "_" + file_id + ".json" + summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" # Confirm MindData Profiling files do not yet exist assert os.path.exists(pipeline_file) is False @@ -94,11 +96,11 @@ class TestMinddataProfilingAnalyzer: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" - summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" - summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" + summary_json_file = self._summary_json_file + "_" + file_id + ".json" + summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" # Delete MindData profiling files generated from the test. os.remove(pipeline_file) @@ -130,15 +132,15 @@ class TestMinddataProfilingAnalyzer: result.append(row) return result - def verify_md_summary(self, md_summary_dict, EXPECTED_SUMMARY_KEYS): + def verify_md_summary(self, md_summary_dict, expected_summary_keys): """ Verify the content of the 3 variations of the MindData Profiling analyze summary output. """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json" - summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv" + summary_json_file = self._summary_json_file + "_" + file_id + ".json" + summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv" # Confirm MindData Profiling analyze summary files are created assert os.path.exists(summary_json_file) is True @@ -149,7 +151,7 @@ class TestMinddataProfilingAnalyzer: summary_returned_keys.sort() # 1. Confirm expected keys are in returned keys - for k in EXPECTED_SUMMARY_KEYS: + for k in expected_summary_keys: assert k in summary_returned_keys # Read summary JSON file @@ -160,7 +162,7 @@ class TestMinddataProfilingAnalyzer: summary_json_keys.sort() # 2a. Confirm expected keys are in JSON file keys - for k in EXPECTED_SUMMARY_KEYS: + for k in expected_summary_keys: assert k in summary_json_keys # 2b. Confirm returned dictionary keys are identical to JSON file keys @@ -175,7 +177,7 @@ class TestMinddataProfilingAnalyzer: summary_csv_keys.sort() # 3a. Confirm expected keys are in the first column of the CSV file - for k in EXPECTED_SUMMARY_KEYS: + for k in expected_summary_keys: assert k in summary_csv_keys # 3b. Confirm returned dictionary keys are identical to CSV file first column keys @@ -195,9 +197,9 @@ class TestMinddataProfilingAnalyzer: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" # Create this basic and common linear pipeline # Generator -> Map -> Batch -> Repeat -> EpochCtrl @@ -225,7 +227,7 @@ class TestMinddataProfilingAnalyzer: assert os.path.exists(dataset_iterator_file) is True # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result - md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH) + md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path) md_summary_dict = md_analyzer.analyze() # Verify MindData Profiling Analyze Summary output @@ -233,7 +235,7 @@ class TestMinddataProfilingAnalyzer: # 1. returned dictionary # 2. JSON file # 3. CSV file - self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS) + self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success) # 4. Verify non-variant values or number of values in the tested pipeline for certain keys # of the returned dictionary @@ -258,9 +260,9 @@ class TestMinddataProfilingAnalyzer: file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_id = file_name_map_rank_id[file_name] - pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json" - cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json" - dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt" + pipeline_file = self._pipeline_file + "_" + file_id + ".json" + cpu_util_file = self._cpu_util_file + "_" + file_id + ".json" + dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt" # Create the pipeline # Generator -> Map -> Batch -> EpochCtrl @@ -315,11 +317,11 @@ class TestMinddataProfilingAnalyzer: assert os.path.exists(dataset_iterator_file) is True # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result - md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH) + md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path) md_summary_dict = md_analyzer.analyze() # Verify MindData Profiling Analyze Summary output - self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS) + self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success) # Confirm pipeline data contains info for 3 ops assert md_summary_dict["pipeline_ops"] == ["Batch(id=0)", "Map(id=1)", "Generator(id=2)"]