forked from mindspore-Ecosystem/mindspore
!49541 Profiler算子耗时百分比占比使用total_time
Merge pull request !49541 from liuchuting/percent
This commit is contained in:
commit
195cd9dcb6
|
@ -8,11 +8,11 @@ mindspore.Profiler
|
||||||
参数:
|
参数:
|
||||||
- **output_path** (str, 可选) - 表示输出数据的路径。默认值:"./data"。
|
- **output_path** (str, 可选) - 表示输出数据的路径。默认值:"./data"。
|
||||||
- **op_time** (bool, 可选) -(Ascend/GPU)表示是否收集算子性能数据,默认值:True。
|
- **op_time** (bool, 可选) -(Ascend/GPU)表示是否收集算子性能数据,默认值:True。
|
||||||
- **profile_communication** (bool, 可选) -(仅限Ascend)表示是否在多设备训练中收集通信性能数据。当值为True时,收集这些数据。在单台设备训练中,该参数的设置无效。使用此参数时,op_time必须设置成True。默认值:False。
|
- **profile_communication** (bool, 可选) -(仅限Ascend)表示是否在多设备训练中收集通信性能数据。当值为True时,收集这些数据。在单台设备训练中,该参数的设置无效。使用此参数时, `op_time` 必须设置成True。默认值:False。
|
||||||
- **profile_memory** (bool, 可选) -(仅限Ascend)表示是否收集Tensor内存数据。当值为True时,收集这些数据。使用此参数时,op_time必须设置成True。默认值:False。
|
- **profile_memory** (bool, 可选) -(仅限Ascend)表示是否收集Tensor内存数据。当值为True时,收集这些数据。使用此参数时, `op_time` 必须设置成True。默认值:False。
|
||||||
- **parallel_strategy** (bool, 可选) -(仅限Ascend)表示是否收集并行策略性能数据, 默认值:True。
|
- **parallel_strategy** (bool, 可选) -(仅限Ascend)表示是否收集并行策略性能数据, 默认值:True。
|
||||||
- **start_profile** (bool, 可选) - 该参数控制是否在Profiler初始化的时候开启数据采集。默认值:True。
|
- **start_profile** (bool, 可选) - 该参数控制是否在Profiler初始化的时候开启数据采集。默认值:True。
|
||||||
- **aicore_metrics** (int, 可选) -(仅限Ascend)收集的AICORE性能数据类型。值必须包含在[-1, 0, 1, 2, 3, 4, 5],默认值:0,每种类型包含的数据项如下:
|
- **aicore_metrics** (int, 可选) -(仅限Ascend)收集的AICORE性能数据类型,使用此参数时, `op_time` 必须设置成True,且值必须包含在[-1, 0, 1, 2, 3, 4, 5],默认值:0,每种类型包含的数据项如下:
|
||||||
|
|
||||||
- -1: 不收集任何AICORE数据。
|
- -1: 不收集任何AICORE数据。
|
||||||
- 0: ArithmeticUtilization,包含mac_fp16/int8_ratio、vec_fp32/fp16/int32_ratio、vec_misc_ratio等。
|
- 0: ArithmeticUtilization,包含mac_fp16/int8_ratio、vec_fp32/fp16/int32_ratio、vec_misc_ratio等。
|
||||||
|
@ -29,7 +29,7 @@ mindspore.Profiler
|
||||||
- False: 异步方式,算子耗时为从CPU发送到GPU的耗时。这种方式能减少因增加Profiler对训练时间的影响。
|
- False: 异步方式,算子耗时为从CPU发送到GPU的耗时。这种方式能减少因增加Profiler对训练时间的影响。
|
||||||
|
|
||||||
- **data_process** (bool, 可选) -(Ascend/GPU)表示是否收集数据准备性能数据,默认值:True。
|
- **data_process** (bool, 可选) -(Ascend/GPU)表示是否收集数据准备性能数据,默认值:True。
|
||||||
- **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小(单位M),使用此参数时,op_time必须设置成True。默认值:500。
|
- **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小(单位M),使用此参数时, `op_time` 必须设置成True。默认值:500。
|
||||||
|
|
||||||
异常:
|
异常:
|
||||||
- **RuntimeError** - 当CANN的版本与MindSpore版本不匹配时,生成的ascend_job_id目录结构MindSpore无法解析。
|
- **RuntimeError** - 当CANN的版本与MindSpore版本不匹配时,生成的ascend_job_id目录结构MindSpore无法解析。
|
||||||
|
|
|
@ -37,9 +37,8 @@ class Integrator:
|
||||||
_file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
|
_file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
|
||||||
_file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
|
_file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
|
||||||
_file_name_framework = 'framework_raw_{}.csv'
|
_file_name_framework = 'framework_raw_{}.csv'
|
||||||
_header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
|
_header_aicore_type = ['op_type', 'total_time', 'execution_frequency', 'percent']
|
||||||
'percent']
|
_header_aicore_detail = ['full_op_name', 'execution_time', 'execution_frequency']
|
||||||
_header_aicore_detail = ['full_op_name', 'execution_time']
|
|
||||||
_header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
|
_header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
|
||||||
'execution_time', 'run_start', 'run_end']
|
'execution_time', 'run_start', 'run_end']
|
||||||
|
|
||||||
|
@ -141,16 +140,16 @@ class Integrator:
|
||||||
op_name_type_cache[row[3]] = row[5]
|
op_name_type_cache[row[3]] = row[5]
|
||||||
|
|
||||||
op_type_time_cache = {}
|
op_type_time_cache = {}
|
||||||
for full_op_name, op_time in self._op_time_cache.items():
|
for full_op_name, op_info in self._op_time_cache.items():
|
||||||
|
self._total_time += op_info[0] * op_info[1]
|
||||||
op_type = op_name_type_cache.get(full_op_name)
|
op_type = op_name_type_cache.get(full_op_name)
|
||||||
op_type_time = op_type_time_cache.get(op_type)
|
op_type_time = op_type_time_cache.get(op_type)
|
||||||
if not op_type_time:
|
if not op_type_time:
|
||||||
op_type_time = [op_time, 1]
|
op_type_time = [op_info[0] * op_info[1], op_info[1]]
|
||||||
op_type_time_cache[op_type] = op_type_time
|
op_type_time_cache[op_type] = op_type_time
|
||||||
else:
|
else:
|
||||||
op_type_time[0] += op_time
|
op_type_time[0] += op_info[0] * op_info[1]
|
||||||
op_type_time[1] += 1
|
op_type_time[1] += op_info[1]
|
||||||
|
|
||||||
op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
|
op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
|
||||||
op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
|
op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
|
||||||
with open(op_type_file_path, 'w') as type_file:
|
with open(op_type_file_path, 'w') as type_file:
|
||||||
|
@ -201,8 +200,8 @@ class Integrator:
|
||||||
if op_infos[0] == 'total':
|
if op_infos[0] == 'total':
|
||||||
self._total_time = Decimal(op_infos[2])
|
self._total_time = Decimal(op_infos[2])
|
||||||
continue
|
continue
|
||||||
self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
|
self._op_time_cache[op_infos[0]] = [Decimal(op_infos[1]), int(op_infos[3])]
|
||||||
csv_writer.writerow([op_infos[0], op_infos[1]])
|
csv_writer.writerow([op_infos[0], op_infos[1], op_infos[3]])
|
||||||
|
|
||||||
def _parse_aicpu_time(self):
|
def _parse_aicpu_time(self):
|
||||||
"""Parse the parsed AICPU operator time file."""
|
"""Parse the parsed AICPU operator time file."""
|
||||||
|
|
|
@ -79,7 +79,7 @@ class MinddataProfilingAnalyzer:
|
||||||
validated_dir = validate_and_normalize_path(dir_name)
|
validated_dir = validate_and_normalize_path(dir_name)
|
||||||
except RuntimeError as path_error:
|
except RuntimeError as path_error:
|
||||||
logger.warning('<%s> is invalid.', dir_type)
|
logger.warning('<%s> is invalid.', dir_type)
|
||||||
raise ProfilerPathErrorException(dir_type + 'is invalid.') from path_error
|
raise ProfilerPathErrorException(dir_type + ' is invalid.') from path_error
|
||||||
|
|
||||||
if not os.path.isdir(validated_dir):
|
if not os.path.isdir(validated_dir):
|
||||||
logger.warning('<%s> <%s> not found.', dir_type, validated_dir)
|
logger.warning('<%s> <%s> not found.', dir_type, validated_dir)
|
||||||
|
|
|
@ -36,7 +36,7 @@ class OPComputeTimeParser:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_dst_file_title = 'title:op compute time'
|
_dst_file_title = 'title:op compute time'
|
||||||
_dst_file_column_title = 'op_name compute_time(ms) stream_id'
|
_dst_file_column_title = 'op_name compute_time(ms) stream_id execution_times'
|
||||||
_dst_file_column_title += '\n------------ --------------- ---------'
|
_dst_file_column_title += '\n------------ --------------- ---------'
|
||||||
|
|
||||||
def __init__(self, hwts_output_file, output_filename, op_task_info,
|
def __init__(self, hwts_output_file, output_filename, op_task_info,
|
||||||
|
@ -77,7 +77,6 @@ class OPComputeTimeParser:
|
||||||
op_duration_str = str(item.duration / factor)
|
op_duration_str = str(item.duration / factor)
|
||||||
if op_name in op_name_time_dict.keys():
|
if op_name in op_name_time_dict.keys():
|
||||||
op_name_time_dict[op_name] += op_duration
|
op_name_time_dict[op_name] += op_duration
|
||||||
if item.task_id == op_name_task_dict[op_name]:
|
|
||||||
op_name_count_dict[op_name] += 1
|
op_name_count_dict[op_name] += 1
|
||||||
op_name_start_time[op_name].append(
|
op_name_start_time[op_name].append(
|
||||||
(op_start_time_str, op_duration_str)
|
(op_start_time_str, op_duration_str)
|
||||||
|
@ -120,7 +119,7 @@ class OPComputeTimeParser:
|
||||||
raise ValueError("The number of operations can not be 0.")
|
raise ValueError("The number of operations can not be 0.")
|
||||||
avg_time = time / op_name_count_dict.get(op_name)
|
avg_time = time / op_name_count_dict.get(op_name)
|
||||||
total_time += avg_time
|
total_time += avg_time
|
||||||
result_data += ("%s %s %s\n" % (op_name, str(avg_time), stream_id))
|
result_data += ("%s %s %s %s\n" % (op_name, str(avg_time), stream_id, op_name_count_dict.get(op_name)))
|
||||||
result_data += ("total op %s 0" % (str(total_time)))
|
result_data += ("total op %s 0" % (str(total_time)))
|
||||||
|
|
||||||
timeline_data = []
|
timeline_data = []
|
||||||
|
|
|
@ -93,15 +93,16 @@ class Profiler:
|
||||||
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
|
||||||
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
||||||
a multi devices training,collect when True. Setting this parameter has no effect during single device
|
a multi devices training,collect when True. Setting this parameter has no effect during single device
|
||||||
training. When using this parameter, op_time must be set to True. Default: False.
|
training. When using this parameter, `op_time` must be set to True. Default: False.
|
||||||
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
|
||||||
When using this parameter, op_time must be set to True. Default: False.
|
When using this parameter, `op_time` must be set to True. Default: False.
|
||||||
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
||||||
Default value: true.
|
Default value: true.
|
||||||
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
||||||
data collection based on conditions. Default: True.
|
data collection based on conditions. Default: True.
|
||||||
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected. The value must be in
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
||||||
[-1, 0, 1, 2, 3, 4, 5], Default: 0, the data items contained in each metric are as follows:
|
parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5], Default: 0, the
|
||||||
|
data items contained in each metric are as follows:
|
||||||
|
|
||||||
- -1: Does not collect AICORE data.
|
- -1: Does not collect AICORE data.
|
||||||
- 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
|
- 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
|
||||||
|
@ -124,7 +125,7 @@ class Profiler:
|
||||||
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
||||||
Default value: True.
|
Default value: True.
|
||||||
timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
|
timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
|
||||||
parameter, op_time must be set to True. Default value: 500.
|
parameter, `op_time` must be set to True. Default value: 500.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
||||||
|
|
Loading…
Reference in New Issue