From 367e5ed1dd1694feb560af49ee17fbf3c6274c9f Mon Sep 17 00:00:00 2001
From: Cathy Wong <cathy.wong@huawei.com>
Date: Wed, 1 Dec 2021 19:42:56 -0500
Subject: [PATCH] MD UT: Use pytest.mark.forked for profiling tests

---
 tests/ut/python/dataset/test_profiling.py     | 58 ++++++++--------
 .../dataset/test_profiling_startstop.py       | 13 ++--
 .../profiler/parser/test_minddata_analyzer.py | 68 ++++++++++---------
 3 files changed, 72 insertions(+), 67 deletions(-)

diff --git a/tests/ut/python/dataset/test_profiling.py b/tests/ut/python/dataset/test_profiling.py
index 4e26cd4a662..9c623efdd8c 100644
--- a/tests/ut/python/dataset/test_profiling.py
+++ b/tests/ut/python/dataset/test_profiling.py
@@ -18,6 +18,7 @@ Testing profiling support in DE
 import json
 import os
 import numpy as np
+import pytest
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.c_transforms as C
@@ -41,6 +42,7 @@ file_name_map_rank_id = {"test_profiling_simple_pipeline": "0",
                          "test_profiling_seq_pipelines_repeat": "9"}
 
 
+@pytest.mark.forked
 class TestMinddataProfilingManager:
     """
     Test MinddataProfilingManager
@@ -53,9 +55,9 @@ class TestMinddataProfilingManager:
         # Get instance pointer for MindData profiling manager
         self.md_profiler = cde.GlobalContext.profiling_manager()
 
-        self._PIPELINE_FILE = "./pipeline_profiling"
-        self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
-        self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
+        self._pipeline_file = "./pipeline_profiling"
+        self._cpu_util_file = "./minddata_cpu_utilization"
+        self._dataset_iterator_file = "./dataset_iterator_profiling"
 
     def setup_method(self):
         """
@@ -64,9 +66,9 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         # Confirm MindData Profiling files do not yet exist
         assert os.path.exists(pipeline_file) is False
@@ -91,9 +93,9 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         # Delete MindData profiling files generated from the test.
         os.remove(pipeline_file)
@@ -132,9 +134,9 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         source = [(np.array([x]),) for x in range(1024)]
         data1 = ds.GeneratorDataset(source, ["data"])
@@ -172,8 +174,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         source = [(np.array([x]),) for x in range(1024)]
         data1 = ds.GeneratorDataset(source, ["gen"])
@@ -217,8 +219,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         # In source1 dataset: Number of rows is 3; its values are 0, 1, 2
         def source1():
@@ -273,8 +275,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
         def source1():
@@ -342,8 +344,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         def source1():
             for i in range(8000):
@@ -393,8 +395,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         # Create this common pipeline
         # Cifar10 -> Map -> Map -> Batch -> Repeat
@@ -446,8 +448,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         source = [(np.array([x]),) for x in range(64)]
         data1 = ds.GeneratorDataset(source, ["data"])
@@ -498,8 +500,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         source = [(np.array([x]),) for x in range(64)]
         data2 = ds.GeneratorDataset(source, ["data"])
@@ -550,8 +552,8 @@ class TestMinddataProfilingManager:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
 
         source = [(np.array([x]),) for x in range(64)]
         data2 = ds.GeneratorDataset(source, ["data"])
diff --git a/tests/ut/python/dataset/test_profiling_startstop.py b/tests/ut/python/dataset/test_profiling_startstop.py
index 74dda652911..649f55b9303 100644
--- a/tests/ut/python/dataset/test_profiling_startstop.py
+++ b/tests/ut/python/dataset/test_profiling_startstop.py
@@ -37,6 +37,7 @@ file_name_map_rank_id = {"test_profiling_early_stop": "0",
                          "test_profiling_stop_nostart": "4"}
 
 
+@pytest.mark.forked
 class TestMindDataProfilingStartStop:
     """
     Test MindData Profiling Manager Start-Stop Support
@@ -46,9 +47,9 @@ class TestMindDataProfilingStartStop:
         """
         Run once for the class
         """
-        self._PIPELINE_FILE = "./pipeline_profiling"
-        self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
-        self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
+        self._pipeline_file = "./pipeline_profiling"
+        self._cpu_util_file = "./minddata_cpu_utilization"
+        self._dataset_iterator_file = "./dataset_iterator_profiling"
 
     def setup_method(self):
         """
@@ -57,9 +58,9 @@ class TestMindDataProfilingStartStop:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        self.pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        self.cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        self.dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        self.pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        self.cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        self.dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         # Confirm MindData Profiling files do not yet exist
         assert os.path.exists(self.pipeline_file) is False
diff --git a/tests/ut/python/profiler/parser/test_minddata_analyzer.py b/tests/ut/python/profiler/parser/test_minddata_analyzer.py
index dea9e33141f..22dff05a6ef 100644
--- a/tests/ut/python/profiler/parser/test_minddata_analyzer.py
+++ b/tests/ut/python/profiler/parser/test_minddata_analyzer.py
@@ -19,6 +19,7 @@ import csv
 import json
 import os
 import numpy as np
+import pytest
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.c_transforms as C
@@ -30,6 +31,7 @@ file_name_map_rank_id = {"test_analyze_basic": "0",
                          "test_analyze_sequential_pipelines_invalid": "1"}
 
 
+@pytest.mark.forked
 class TestMinddataProfilingAnalyzer:
     """
     Test the MinddataProfilingAnalyzer class
@@ -42,15 +44,15 @@ class TestMinddataProfilingAnalyzer:
         # Get instance pointer for MindData profiling manager
         self.md_profiler = cde.GlobalContext.profiling_manager()
 
-        self._PIPELINE_FILE = "./pipeline_profiling"
-        self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
-        self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
-        self._SUMMARY_JSON_FILE = "./minddata_pipeline_summary"
-        self._SUMMARY_CSV_FILE = "./minddata_pipeline_summary"
-        self._ANALYZE_FILE_PATH = "./"
+        self._pipeline_file = "./pipeline_profiling"
+        self._cpu_util_file = "./minddata_cpu_utilization"
+        self._dataset_iterator_file = "./dataset_iterator_profiling"
+        self._summary_json_file = "./minddata_pipeline_summary"
+        self._summary_csv_file = "./minddata_pipeline_summary"
+        self._analyze_file_path = "./"
 
         # This is the set of keys for success case
-        self._EXPECTED_SUMMARY_KEYS_SUCCESS = \
+        self._expected_summary_keys_success = \
             ['avg_cpu_pct', 'avg_cpu_pct_per_worker', 'children_ids', 'num_workers', 'op_ids', 'op_names',
              'parent_id', 'per_batch_time', 'per_pipeline_time', 'per_push_queue_time', 'pipeline_ops',
              'queue_average_size', 'queue_empty_freq_pct', 'queue_utilization_pct']
@@ -62,11 +64,11 @@ class TestMinddataProfilingAnalyzer:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
-        summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
-        summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
+        summary_json_file = self._summary_json_file + "_" + file_id + ".json"
+        summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
 
         # Confirm MindData Profiling files do not yet exist
         assert os.path.exists(pipeline_file) is False
@@ -94,11 +96,11 @@ class TestMinddataProfilingAnalyzer:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
-        summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
-        summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
+        summary_json_file = self._summary_json_file + "_" + file_id + ".json"
+        summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
 
         # Delete MindData profiling files generated from the test.
         os.remove(pipeline_file)
@@ -130,15 +132,15 @@ class TestMinddataProfilingAnalyzer:
                 result.append(row)
         return result
 
-    def verify_md_summary(self, md_summary_dict, EXPECTED_SUMMARY_KEYS):
+    def verify_md_summary(self, md_summary_dict, expected_summary_keys):
         """
         Verify the content of the 3 variations of the MindData Profiling analyze summary output.
         """
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
-        summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
+        summary_json_file = self._summary_json_file + "_" + file_id + ".json"
+        summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
 
         # Confirm MindData Profiling analyze summary files are created
         assert os.path.exists(summary_json_file) is True
@@ -149,7 +151,7 @@ class TestMinddataProfilingAnalyzer:
         summary_returned_keys.sort()
 
         # 1. Confirm expected keys are in returned keys
-        for k in EXPECTED_SUMMARY_KEYS:
+        for k in expected_summary_keys:
             assert k in summary_returned_keys
 
         # Read summary JSON file
@@ -160,7 +162,7 @@ class TestMinddataProfilingAnalyzer:
         summary_json_keys.sort()
 
         # 2a. Confirm expected keys are in JSON file keys
-        for k in EXPECTED_SUMMARY_KEYS:
+        for k in expected_summary_keys:
             assert k in summary_json_keys
 
         # 2b. Confirm returned dictionary keys are identical to JSON file keys
@@ -175,7 +177,7 @@ class TestMinddataProfilingAnalyzer:
         summary_csv_keys.sort()
 
         # 3a. Confirm expected keys are in the first column of the CSV file
-        for k in EXPECTED_SUMMARY_KEYS:
+        for k in expected_summary_keys:
             assert k in summary_csv_keys
 
         # 3b. Confirm returned dictionary keys are identical to CSV file first column keys
@@ -195,9 +197,9 @@ class TestMinddataProfilingAnalyzer:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         # Create this basic and common linear pipeline
         # Generator -> Map -> Batch -> Repeat -> EpochCtrl
@@ -225,7 +227,7 @@ class TestMinddataProfilingAnalyzer:
         assert os.path.exists(dataset_iterator_file) is True
 
         # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
-        md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH)
+        md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path)
         md_summary_dict = md_analyzer.analyze()
 
         # Verify MindData Profiling Analyze Summary output
@@ -233,7 +235,7 @@ class TestMinddataProfilingAnalyzer:
         # 1. returned dictionary
         # 2. JSON file
         # 3. CSV file
-        self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS)
+        self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success)
 
         # 4. Verify non-variant values or number of values in the tested pipeline for certain keys
         # of the returned dictionary
@@ -258,9 +260,9 @@ class TestMinddataProfilingAnalyzer:
         file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
         file_id = file_name_map_rank_id[file_name]
 
-        pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
-        cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
-        dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
+        pipeline_file = self._pipeline_file + "_" + file_id + ".json"
+        cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
+        dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
 
         # Create the pipeline
         # Generator -> Map -> Batch -> EpochCtrl
@@ -315,11 +317,11 @@ class TestMinddataProfilingAnalyzer:
         assert os.path.exists(dataset_iterator_file) is True
 
         # Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
-        md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH)
+        md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path)
         md_summary_dict = md_analyzer.analyze()
 
         # Verify MindData Profiling Analyze Summary output
-        self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS)
+        self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success)
 
         # Confirm pipeline data contains info for 3 ops
         assert md_summary_dict["pipeline_ops"] == ["Batch(id=0)", "Map(id=1)", "Generator(id=2)"]