!49541 Profiler算子耗时百分比占比使用total_time

Merge pull request !49541 from liuchuting/percent
2023-03-03 08:33:35 +00:00 · 2023-03-03 08:33:35 +00:00 · 195cd9dcb6
parent fe36a9fa85 25958b4eb8
commit 195cd9dcb6
5 changed files with 23 additions and 24 deletions
--- a/docs/api/api_python/mindspore/mindspore.Profiler.rst
+++ b/docs/api/api_python/mindspore/mindspore.Profiler.rst
@ -8,11 +8,11 @@ mindspore.Profiler
    参数：
        - **output_path** (str, 可选) - 表示输出数据的路径。默认值："./data"。
        - **op_time** (bool, 可选) -（Ascend/GPU）表示是否收集算子性能数据，默认值：True。
-        - **profile_communication** (bool, 可选) -（仅限Ascend）表示是否在多设备训练中收集通信性能数据。当值为True时，收集这些数据。在单台设备训练中，该参数的设置无效。使用此参数时，op_time必须设置成True。默认值：False。
-        - **profile_memory** (bool, 可选) -（仅限Ascend）表示是否收集Tensor内存数据。当值为True时，收集这些数据。使用此参数时，op_time必须设置成True。默认值：False。
+        - **profile_communication** (bool, 可选) -（仅限Ascend）表示是否在多设备训练中收集通信性能数据。当值为True时，收集这些数据。在单台设备训练中，该参数的设置无效。使用此参数时， `op_time` 必须设置成True。默认值：False。
+        - **profile_memory** (bool, 可选) -（仅限Ascend）表示是否收集Tensor内存数据。当值为True时，收集这些数据。使用此参数时， `op_time` 必须设置成True。默认值：False。
        - **parallel_strategy** (bool, 可选) -（仅限Ascend）表示是否收集并行策略性能数据， 默认值：True。
        - **start_profile** (bool, 可选) - 该参数控制是否在Profiler初始化的时候开启数据采集。默认值：True。
-        - **aicore_metrics** (int, 可选) -（仅限Ascend）收集的AICORE性能数据类型。值必须包含在[-1, 0, 1, 2, 3, 4, 5]，默认值：0，每种类型包含的数据项如下：
+        - **aicore_metrics** (int, 可选) -（仅限Ascend）收集的AICORE性能数据类型，使用此参数时， `op_time` 必须设置成True，且值必须包含在[-1, 0, 1, 2, 3, 4, 5]，默认值：0，每种类型包含的数据项如下：

          - -1: 不收集任何AICORE数据。
          - 0: ArithmeticUtilization，包含mac_fp16/int8_ratio、vec_fp32/fp16/int32_ratio、vec_misc_ratio等。
@ -29,7 +29,7 @@ mindspore.Profiler
          - False: 异步方式，算子耗时为从CPU发送到GPU的耗时。这种方式能减少因增加Profiler对训练时间的影响。

        - **data_process** (bool, 可选) -（Ascend/GPU）表示是否收集数据准备性能数据，默认值：True。
-        - **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小（单位M），使用此参数时，op_time必须设置成True。默认值：500。
+        - **timeline_limit** (int, 可选) - 设置限制timeline文件存储上限大小（单位M），使用此参数时， `op_time` 必须设置成True。默认值：500。

    异常：
        - **RuntimeError** - 当CANN的版本与MindSpore版本不匹配时，生成的ascend_job_id目录结构MindSpore无法解析。
--- a/mindspore/python/mindspore/profiler/parser/integrator.py
+++ b/mindspore/python/mindspore/profiler/parser/integrator.py
@ -37,9 +37,8 @@ class Integrator:
    _file_name_aicore_detail_time = 'output_op_compute_time_{}.txt'
    _file_name_aicpu_time = 'output_data_preprocess_aicpu_{}.txt'
    _file_name_framework = 'framework_raw_{}.csv'
-    _header_aicore_type = ['op_type', 'execution_time', 'execution_frequency',
-                           'percent']
-    _header_aicore_detail = ['full_op_name', 'execution_time']
+    _header_aicore_type = ['op_type', 'total_time', 'execution_frequency', 'percent']
+    _header_aicore_detail = ['full_op_name', 'execution_time', 'execution_frequency']
    _header_aicpu = ['serial_number', 'op_type', 'total_time', 'dispatch_time',
                     'execution_time', 'run_start', 'run_end']

@ -141,16 +140,16 @@ class Integrator:
                op_name_type_cache[row[3]] = row[5]

        op_type_time_cache = {}
-        for full_op_name, op_time in self._op_time_cache.items():
+        for full_op_name, op_info in self._op_time_cache.items():
+            self._total_time += op_info[0] * op_info[1]
            op_type = op_name_type_cache.get(full_op_name)
            op_type_time = op_type_time_cache.get(op_type)
            if not op_type_time:
-                op_type_time = [op_time, 1]
+                op_type_time = [op_info[0] * op_info[1], op_info[1]]
                op_type_time_cache[op_type] = op_type_time
            else:
-                op_type_time[0] += op_time
-                op_type_time[1] += 1
-
+                op_type_time[0] += op_info[0] * op_info[1]
+                op_type_time[1] += op_info[1]
        op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
        op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
        with open(op_type_file_path, 'w') as type_file:
@ -201,8 +200,8 @@ class Integrator:
                    if op_infos[0] == 'total':
                        self._total_time = Decimal(op_infos[2])
                        continue
-                    self._op_time_cache[op_infos[0]] = Decimal(op_infos[1])
-                    csv_writer.writerow([op_infos[0], op_infos[1]])
+                    self._op_time_cache[op_infos[0]] = [Decimal(op_infos[1]), int(op_infos[3])]
+                    csv_writer.writerow([op_infos[0], op_infos[1], op_infos[3]])

    def _parse_aicpu_time(self):
        """Parse the parsed AICPU operator time file."""
--- a/mindspore/python/mindspore/profiler/parser/minddata_analyzer.py
+++ b/mindspore/python/mindspore/profiler/parser/minddata_analyzer.py
@ -79,7 +79,7 @@ class MinddataProfilingAnalyzer:
            validated_dir = validate_and_normalize_path(dir_name)
        except RuntimeError as path_error:
            logger.warning('<%s> is invalid.', dir_type)
-            raise ProfilerPathErrorException(dir_type + 'is invalid.') from path_error
+            raise ProfilerPathErrorException(dir_type + ' is invalid.') from path_error

        if not os.path.isdir(validated_dir):
            logger.warning('<%s> <%s> not found.', dir_type, validated_dir)
--- a/mindspore/python/mindspore/profiler/parser/optime_parser.py
+++ b/mindspore/python/mindspore/profiler/parser/optime_parser.py
@ -36,7 +36,7 @@ class OPComputeTimeParser:
    """

    _dst_file_title = 'title:op compute time'
-    _dst_file_column_title = 'op_name       compute_time(ms) stream_id'
+    _dst_file_column_title = 'op_name       compute_time(ms) stream_id execution_times'
    _dst_file_column_title += '\n------------  ---------------  ---------'

    def __init__(self, hwts_output_file, output_filename, op_task_info,
@ -77,8 +77,7 @@ class OPComputeTimeParser:
            op_duration_str = str(item.duration / factor)
            if op_name in op_name_time_dict.keys():
                op_name_time_dict[op_name] += op_duration
-                if item.task_id == op_name_task_dict[op_name]:
-                    op_name_count_dict[op_name] += 1
+                op_name_count_dict[op_name] += 1
                op_name_start_time[op_name].append(
                    (op_start_time_str, op_duration_str)
                )
@ -120,7 +119,7 @@ class OPComputeTimeParser:
                    raise ValueError("The number of operations can not be 0.")
                avg_time = time / op_name_count_dict.get(op_name)
                total_time += avg_time
-                result_data += ("%s %s  %s\n" % (op_name, str(avg_time), stream_id))
+                result_data += ("%s %s %s %s\n" % (op_name, str(avg_time), stream_id, op_name_count_dict.get(op_name)))
        result_data += ("total op  %s 0" % (str(total_time)))

        timeline_data = []
--- a/mindspore/python/mindspore/profiler/profiling.py
+++ b/mindspore/python/mindspore/profiler/profiling.py
@ -93,15 +93,16 @@ class Profiler:
        op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
        profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
            a multi devices training,collect when True. Setting this parameter has no effect during single device
-            training. When using this parameter, op_time must be set to True. Default: False.
+            training. When using this parameter, `op_time` must be set to True. Default: False.
        profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
-            When using this parameter, op_time must be set to True. Default: False.
+            When using this parameter, `op_time` must be set to True. Default: False.
        parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
            Default value: true.
        start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
            data collection based on conditions. Default: True.
-        aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected. The value must be in
-            [-1, 0, 1, 2, 3, 4, 5], Default: 0, the data items contained in each metric are as follows:
+        aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
+            parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5], Default: 0, the
+            data items contained in each metric are as follows:

            - -1: Does not collect AICORE data.
            - 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
@ -124,7 +125,7 @@ class Profiler:
        data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
            Default value: True.
        timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
-            parameter, op_time must be set to True. Default value: 500.
+            parameter, `op_time` must be set to True. Default value: 500.

    Raises:
        RuntimeError: When the version of CANN does not match the version of MindSpore,