!49541 Profiler算子耗时百分比占比使用total_time

Merge pull request !49541 from liuchuting/percent
This commit is contained in:
i-robot 2023-03-03 08:33:35 +00:00 committed by Gitee
commit 195cd9dcb6
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 23 additions and 24 deletions

View File

@ -8,11 +8,11 @@ mindspore.Profiler
参数:
- **output_path** (str, 可选) - 表示输出数据的路径。默认值:"./data"。
- **op_time** (bool, 可选) -Ascend/GPU表示是否收集算子性能数据默认值True。
- **profile_communication** (bool, 可选) -仅限Ascend表示是否在多设备训练中收集通信性能数据。当值为True时收集这些数据。在单台设备训练中该参数的设置无效。使用此参数时op_time必须设置成True。默认值False。
- **profile_memory** (bool, 可选) -仅限Ascend表示是否收集Tensor内存数据。当值为True时收集这些数据。使用此参数时op_time必须设置成True。默认值False。
- **profile_communication** (bool, 可选) -仅限Ascend表示是否在多设备训练中收集通信性能数据。当值为True时收集这些数据。在单台设备训练中该参数的设置无效。使用此参数时 `op_time` 必须设置成True。默认值False。
- **profile_memory** (bool, 可选) -仅限Ascend表示是否收集Tensor内存数据。当值为True时收集这些数据。使用此参数时 `op_time` 必须设置成True。默认值False。
- **parallel_strategy** (bool, 可选) -仅限Ascend表示是否收集并行策略性能数据 默认值True。
- **start_profile** (bool, 可选) - 该参数控制是否在Profiler初始化的时候开启数据采集。默认值True。
- **aicore_metrics** (int, 可选) -仅限Ascend收集的AICORE性能数据类型值必须包含在[-1, 0, 1, 2, 3, 4, 5]默认值0每种类型包含的数据项如下
- **aicore_metrics** (int, 可选) -仅限Ascend收集的AICORE性能数据类型,使用此参数时, `op_time` 必须设置成True值必须包含在[-1, 0, 1, 2, 3, 4, 5]默认值0每种类型包含的数据项如下
- -1: 不收集任何AICORE数据。
- 0: ArithmeticUtilization包含mac_fp16/int8_ratio、vec_fp32/fp16/int32_ratio、vec_misc_ratio等。
@ -29,7 +29,7 @@ mindspore.Profiler
- False: 异步方式算子耗时为从CPU发送到GPU的耗时。这种方式能减少因增加Profiler对训练时间的影响。
- **data_process** (bool, 可选) -Ascend/GPU表示是否收集数据准备性能数据默认值True。
- **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小单位M使用此参数时op_time必须设置成True。默认值500。
- **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小单位M使用此参数时 `op_time` 必须设置成True。默认值500。
异常:
- **RuntimeError** - 当CANN的版本与MindSpore版本不匹配时生成的ascend_job_id目录结构MindSpore无法解析。

View File

@ -37,9 +37,8 @@ class Integrator:
_file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
_file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
_file_name_framework = 'framework_raw_{}.csv'
_header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
'percent']
_header_aicore_detail = ['full_op_name', 'execution_time']
_header_aicore_type = ['op_type', 'total_time', 'execution_frequency', 'percent']
_header_aicore_detail = ['full_op_name', 'execution_time', 'execution_frequency']
_header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
'execution_time', 'run_start', 'run_end']
@ -141,16 +140,16 @@ class Integrator:
op_name_type_cache[row[3]] = row[5]
op_type_time_cache = {}
for full_op_name, op_time in self._op_time_cache.items():
for full_op_name, op_info in self._op_time_cache.items():
self._total_time += op_info[0] * op_info[1]
op_type = op_name_type_cache.get(full_op_name)
op_type_time = op_type_time_cache.get(op_type)
if not op_type_time:
op_type_time = [op_time, 1]
op_type_time = [op_info[0] * op_info[1], op_info[1]]
op_type_time_cache[op_type] = op_type_time
else:
op_type_time[0] += op_time
op_type_time[1] += 1
op_type_time[0] += op_info[0] * op_info[1]
op_type_time[1] += op_info[1]
op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
with open(op_type_file_path, 'w') as type_file:
@ -201,8 +200,8 @@ class Integrator:
if op_infos[0] == 'total':
self._total_time = Decimal(op_infos[2])
continue
self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
csv_writer.writerow([op_infos[0], op_infos[1]])
self._op_time_cache[op_infos[0]] = [Decimal(op_infos[1]), int(op_infos[3])]
csv_writer.writerow([op_infos[0], op_infos[1], op_infos[3]])
def _parse_aicpu_time(self):
"""Parse the parsed AICPU operator time file."""

View File

@ -79,7 +79,7 @@ class MinddataProfilingAnalyzer:
validated_dir = validate_and_normalize_path(dir_name)
except RuntimeError as path_error:
logger.warning('<%s> is invalid.', dir_type)
raise ProfilerPathErrorException(dir_type + 'is invalid.') from path_error
raise ProfilerPathErrorException(dir_type + ' is invalid.') from path_error
if not os.path.isdir(validated_dir):
logger.warning('<%s> <%s> not found.', dir_type, validated_dir)

View File

@ -36,7 +36,7 @@ class OPComputeTimeParser:
"""
_dst_file_title = 'title:op compute time'
_dst_file_column_title = 'op_name compute_time(ms) stream_id'
_dst_file_column_title = 'op_name compute_time(ms) stream_id execution_times'
_dst_file_column_title += '\n------------ --------------- ---------'
def __init__(self, hwts_output_file, output_filename, op_task_info,
@ -77,7 +77,6 @@ class OPComputeTimeParser:
op_duration_str = str(item.duration / factor)
if op_name in op_name_time_dict.keys():
op_name_time_dict[op_name] += op_duration
if item.task_id == op_name_task_dict[op_name]:
op_name_count_dict[op_name] += 1
op_name_start_time[op_name].append(
(op_start_time_str, op_duration_str)
@ -120,7 +119,7 @@ class OPComputeTimeParser:
raise ValueError("The number of operations can not be 0.")
avg_time = time / op_name_count_dict.get(op_name)
total_time += avg_time
result_data += ("%s %s %s\n" % (op_name, str(avg_time), stream_id))
result_data += ("%s %s %s %s\n" % (op_name, str(avg_time), stream_id, op_name_count_dict.get(op_name)))
result_data += ("total op %s 0" % (str(total_time)))
timeline_data = []

View File

@ -93,15 +93,16 @@ class Profiler:
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
a multi devices training,collect when True. Setting this parameter has no effect during single device
training. When using this parameter, op_time must be set to True. Default: False.
training. When using this parameter, `op_time` must be set to True. Default: False.
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
When using this parameter, op_time must be set to True. Default: False.
When using this parameter, `op_time` must be set to True. Default: False.
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
Default value: true.
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
data collection based on conditions. Default: True.
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected. The value must be in
[-1, 0, 1, 2, 3, 4, 5], Default: 0, the data items contained in each metric are as follows:
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5], Default: 0, the
data items contained in each metric are as follows:
- -1: Does not collect AICORE data.
- 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
@ -124,7 +125,7 @@ class Profiler:
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
Default value: True.
timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
parameter, op_time must be set to True. Default value: 500.
parameter, `op_time` must be set to True. Default value: 500.
Raises:
RuntimeError: When the version of CANN does not match the version of MindSpore,