!27085 [MD][UT]: Use pytest.mark.forked for profiling tests

Merge pull request !27085 from cathwong/ckw_ut_prof_forked
This commit is contained in:
i-robot 2021-12-02 17:00:28 +00:00 committed by Gitee
commit 9ddd3fb66f
3 changed files with 72 additions and 67 deletions

View File

@ -18,6 +18,7 @@ Testing profiling support in DE
import json
import os
import numpy as np
import pytest
import mindspore.common.dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
@ -41,6 +42,7 @@ file_name_map_rank_id = {"test_profiling_simple_pipeline": "0",
"test_profiling_seq_pipelines_repeat": "9"}
@pytest.mark.forked
class TestMinddataProfilingManager:
"""
Test MinddataProfilingManager
@ -53,9 +55,9 @@ class TestMinddataProfilingManager:
# Get instance pointer for MindData profiling manager
self.md_profiler = cde.GlobalContext.profiling_manager()
self._PIPELINE_FILE = "./pipeline_profiling"
self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
self._pipeline_file = "./pipeline_profiling"
self._cpu_util_file = "./minddata_cpu_utilization"
self._dataset_iterator_file = "./dataset_iterator_profiling"
def setup_method(self):
"""
@ -64,9 +66,9 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
# Confirm MindData Profiling files do not yet exist
assert os.path.exists(pipeline_file) is False
@ -91,9 +93,9 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
# Delete MindData profiling files generated from the test.
os.remove(pipeline_file)
@ -132,9 +134,9 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
source = [(np.array([x]),) for x in range(1024)]
data1 = ds.GeneratorDataset(source, ["data"])
@ -172,8 +174,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
source = [(np.array([x]),) for x in range(1024)]
data1 = ds.GeneratorDataset(source, ["gen"])
@ -217,8 +219,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
# In source1 dataset: Number of rows is 3; its values are 0, 1, 2
def source1():
@ -273,8 +275,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
# In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
def source1():
@ -342,8 +344,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
def source1():
for i in range(8000):
@ -393,8 +395,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
# Create this common pipeline
# Cifar10 -> Map -> Map -> Batch -> Repeat
@ -446,8 +448,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
source = [(np.array([x]),) for x in range(64)]
data1 = ds.GeneratorDataset(source, ["data"])
@ -498,8 +500,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
source = [(np.array([x]),) for x in range(64)]
data2 = ds.GeneratorDataset(source, ["data"])
@ -550,8 +552,8 @@ class TestMinddataProfilingManager:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
source = [(np.array([x]),) for x in range(64)]
data2 = ds.GeneratorDataset(source, ["data"])

View File

@ -37,6 +37,7 @@ file_name_map_rank_id = {"test_profiling_early_stop": "0",
"test_profiling_stop_nostart": "4"}
@pytest.mark.forked
class TestMindDataProfilingStartStop:
"""
Test MindData Profiling Manager Start-Stop Support
@ -46,9 +47,9 @@ class TestMindDataProfilingStartStop:
"""
Run once for the class
"""
self._PIPELINE_FILE = "./pipeline_profiling"
self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
self._pipeline_file = "./pipeline_profiling"
self._cpu_util_file = "./minddata_cpu_utilization"
self._dataset_iterator_file = "./dataset_iterator_profiling"
def setup_method(self):
"""
@ -57,9 +58,9 @@ class TestMindDataProfilingStartStop:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
self.pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
self.cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
self.dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
self.pipeline_file = self._pipeline_file + "_" + file_id + ".json"
self.cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
self.dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
# Confirm MindData Profiling files do not yet exist
assert os.path.exists(self.pipeline_file) is False

View File

@ -19,6 +19,7 @@ import csv
import json
import os
import numpy as np
import pytest
import mindspore.common.dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
@ -30,6 +31,7 @@ file_name_map_rank_id = {"test_analyze_basic": "0",
"test_analyze_sequential_pipelines_invalid": "1"}
@pytest.mark.forked
class TestMinddataProfilingAnalyzer:
"""
Test the MinddataProfilingAnalyzer class
@ -42,15 +44,15 @@ class TestMinddataProfilingAnalyzer:
# Get instance pointer for MindData profiling manager
self.md_profiler = cde.GlobalContext.profiling_manager()
self._PIPELINE_FILE = "./pipeline_profiling"
self._CPU_UTIL_FILE = "./minddata_cpu_utilization"
self._DATASET_ITERATOR_FILE = "./dataset_iterator_profiling"
self._SUMMARY_JSON_FILE = "./minddata_pipeline_summary"
self._SUMMARY_CSV_FILE = "./minddata_pipeline_summary"
self._ANALYZE_FILE_PATH = "./"
self._pipeline_file = "./pipeline_profiling"
self._cpu_util_file = "./minddata_cpu_utilization"
self._dataset_iterator_file = "./dataset_iterator_profiling"
self._summary_json_file = "./minddata_pipeline_summary"
self._summary_csv_file = "./minddata_pipeline_summary"
self._analyze_file_path = "./"
# This is the set of keys for success case
self._EXPECTED_SUMMARY_KEYS_SUCCESS = \
self._expected_summary_keys_success = \
['avg_cpu_pct', 'avg_cpu_pct_per_worker', 'children_ids', 'num_workers', 'op_ids', 'op_names',
'parent_id', 'per_batch_time', 'per_pipeline_time', 'per_push_queue_time', 'pipeline_ops',
'queue_average_size', 'queue_empty_freq_pct', 'queue_utilization_pct']
@ -62,11 +64,11 @@ class TestMinddataProfilingAnalyzer:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
summary_json_file = self._summary_json_file + "_" + file_id + ".json"
summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
# Confirm MindData Profiling files do not yet exist
assert os.path.exists(pipeline_file) is False
@ -94,11 +96,11 @@ class TestMinddataProfilingAnalyzer:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
summary_json_file = self._summary_json_file + "_" + file_id + ".json"
summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
# Delete MindData profiling files generated from the test.
os.remove(pipeline_file)
@ -130,15 +132,15 @@ class TestMinddataProfilingAnalyzer:
result.append(row)
return result
def verify_md_summary(self, md_summary_dict, EXPECTED_SUMMARY_KEYS):
def verify_md_summary(self, md_summary_dict, expected_summary_keys):
"""
Verify the content of the 3 variations of the MindData Profiling analyze summary output.
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
summary_json_file = self._SUMMARY_JSON_FILE + "_" + file_id + ".json"
summary_csv_file = self._SUMMARY_CSV_FILE + "_" + file_id + ".csv"
summary_json_file = self._summary_json_file + "_" + file_id + ".json"
summary_csv_file = self._summary_csv_file + "_" + file_id + ".csv"
# Confirm MindData Profiling analyze summary files are created
assert os.path.exists(summary_json_file) is True
@ -149,7 +151,7 @@ class TestMinddataProfilingAnalyzer:
summary_returned_keys.sort()
# 1. Confirm expected keys are in returned keys
for k in EXPECTED_SUMMARY_KEYS:
for k in expected_summary_keys:
assert k in summary_returned_keys
# Read summary JSON file
@ -160,7 +162,7 @@ class TestMinddataProfilingAnalyzer:
summary_json_keys.sort()
# 2a. Confirm expected keys are in JSON file keys
for k in EXPECTED_SUMMARY_KEYS:
for k in expected_summary_keys:
assert k in summary_json_keys
# 2b. Confirm returned dictionary keys are identical to JSON file keys
@ -175,7 +177,7 @@ class TestMinddataProfilingAnalyzer:
summary_csv_keys.sort()
# 3a. Confirm expected keys are in the first column of the CSV file
for k in EXPECTED_SUMMARY_KEYS:
for k in expected_summary_keys:
assert k in summary_csv_keys
# 3b. Confirm returned dictionary keys are identical to CSV file first column keys
@ -195,9 +197,9 @@ class TestMinddataProfilingAnalyzer:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
# Create this basic and common linear pipeline
# Generator -> Map -> Batch -> Repeat -> EpochCtrl
@ -225,7 +227,7 @@ class TestMinddataProfilingAnalyzer:
assert os.path.exists(dataset_iterator_file) is True
# Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH)
md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path)
md_summary_dict = md_analyzer.analyze()
# Verify MindData Profiling Analyze Summary output
@ -233,7 +235,7 @@ class TestMinddataProfilingAnalyzer:
# 1. returned dictionary
# 2. JSON file
# 3. CSV file
self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS)
self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success)
# 4. Verify non-variant values or number of values in the tested pipeline for certain keys
# of the returned dictionary
@ -258,9 +260,9 @@ class TestMinddataProfilingAnalyzer:
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_id = file_name_map_rank_id[file_name]
pipeline_file = self._PIPELINE_FILE + "_" + file_id + ".json"
cpu_util_file = self._CPU_UTIL_FILE + "_" + file_id + ".json"
dataset_iterator_file = self._DATASET_ITERATOR_FILE + "_" + file_id + ".txt"
pipeline_file = self._pipeline_file + "_" + file_id + ".json"
cpu_util_file = self._cpu_util_file + "_" + file_id + ".json"
dataset_iterator_file = self._dataset_iterator_file + "_" + file_id + ".txt"
# Create the pipeline
# Generator -> Map -> Batch -> EpochCtrl
@ -315,11 +317,11 @@ class TestMinddataProfilingAnalyzer:
assert os.path.exists(dataset_iterator_file) is True
# Call MindData Analyzer for generated MindData profiling files to generate MindData pipeline summary result
md_analyzer = MinddataProfilingAnalyzer(self._ANALYZE_FILE_PATH, file_id, self._ANALYZE_FILE_PATH)
md_analyzer = MinddataProfilingAnalyzer(self._analyze_file_path, file_id, self._analyze_file_path)
md_summary_dict = md_analyzer.analyze()
# Verify MindData Profiling Analyze Summary output
self.verify_md_summary(md_summary_dict, self._EXPECTED_SUMMARY_KEYS_SUCCESS)
self.verify_md_summary(md_summary_dict, self._expected_summary_keys_success)
# Confirm pipeline data contains info for 3 ops
assert md_summary_dict["pipeline_ops"] == ["Batch(id=0)", "Map(id=1)", "Generator(id=2)"]