!42312 [profiler] pynative 模式profiler优化；提供生成PMU&l2_cache性能数据的接口

Merge pull request !42312 from zangqx/m_Q3
2022-09-20 06:32:13 +00:00 · 2022-09-20 06:32:13 +00:00 · b3adb8f823
parent ee14b16a3f 034d6a416e
commit b3adb8f823
5 changed files with 51 additions and 53 deletions
--- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc
@ -38,15 +38,11 @@
 #include "debug/data_dump/e2e_dump.h"
 #include "debug/debugger/debugger_utils.h"
 #include "plugin/device/ascend/hal/profiler/memory_profiling.h"
-#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
 #include "utils/anf_utils.h"
-#include "plugin/device/ascend/hal/profiler/pynative_profiling.h"
 #include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
 #include "plugin/device/ascend/hal/device/dump/ascend_dump.h"

 using Adx::AdxRegDumpProcessCallBack;
-using mindspore::device::ascend::ProfilingManager;
-using mindspore::profiler::ProfilerManager;
 using mindspore::profiler::ascend::MemoryProfiling;
 #endif

@ -416,31 +412,17 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
  if (nop_op_to_memcpy_.find(kernel) != nop_op_to_memcpy_.end()) {
    (void)MemoryCopyAsync(kernel, real_inputs, outputs);
  } else {
-#ifndef ENABLE_SECURITY
-    auto profiler_inst = profiler::ascend::PynativeProfiler::GetInstance();
-    MS_EXCEPTION_IF_NULL(profiler_inst);
-    std::thread::id t_id = std::this_thread::get_id();
-    profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id, kernel->fullname_with_scope(),
-                                       is_dynamic_shape);
-#endif
    MS_LOG(DEBUG) << "Begin launch kernel: " << kernel->fullname_with_scope();
    ret = kernel_mod->Launch(real_inputs, workspace, outputs, stream);
    MS_LOG(DEBUG) << "End launch kernel: " << kernel->fullname_with_scope();
-#ifndef ENABLE_SECURITY
-    profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
-#endif
    if (!ret) {
      MS_LOG(ERROR) << "Launch kernel failed, kernel full name: " << kernel->fullname_with_scope();
      return false;
    }
  }
  auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance();
-  auto profiler_manage_instance = profiler::ProfilerManager::GetInstance();
  MS_EXCEPTION_IF_NULL(ascend_instance);
-  MS_EXCEPTION_IF_NULL(profiler_manage_instance);
-  if ((profiler_manage_instance->GetNetDynamicShapeStatus() ||
-       ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) &&
-      ascend_instance->GetEnableFlag()) {
+  if (ascend_instance->GetEnableFlag()) {
    ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, UintToInt(device_id), kernel_type);
  }

--- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc
@ -41,13 +41,13 @@ namespace {
 PROFILER_REG(kAscendDevice, AscendProfiler);
 }  // namespace

-std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
-  {"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
-  {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
-  {"Memory", ACL_AICORE_MEMORY_BANDWIDTH},
-  {"MemoryLO", ACL_AICORE_L0B_AND_WIDTH},
-  {"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
-};
+std::map<std::string, aclprofAicoreMetrics> kAicMetrics{{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
+                                                        {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
+                                                        {"Memory", ACL_AICORE_MEMORY_BANDWIDTH},
+                                                        {"MemoryLO", ACL_AICORE_L0B_AND_WIDTH},
+                                                        {"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
+                                                        {"MemoryUB", ACL_AICORE_MEMORY_UB},
+                                                        {"None", ACL_AICORE_NONE}};

 std::shared_ptr<AscendProfiler> AscendProfiler::GetInstance() {
  auto instance = Profiler::GetInstance(kAscendDevice);
@ -120,7 +120,9 @@ uint64_t AscendProfiler::GetOptionsMask() const {
  if (options_json["hccl"] == "on") {
    mask |= ACL_PROF_HCCL_TRACE;
  }
-
+  if (options_json["l2_cache"] == "on") {
+    mask |= ACL_PROF_L2CACHE;
+  }
  return mask;
 }

--- a/mindspore/python/mindspore/profiler/parser/flops_parser.py
+++ b/mindspore/python/mindspore/profiler/parser/flops_parser.py
@ -472,7 +472,7 @@ class FlopsParser:

        if not os.path.exists(_step_trace_file_path):
            logger.critical(f'The {_step_trace_file_path} file does not exist.')
-            raise ProfilerFileNotFoundException(_step_trace_file_path)
+            return op_all_step_time, op_all_step_comp
        try:
            with open(_step_trace_file_path, 'r') as f:
                lines = f.readlines()
--- a/mindspore/python/mindspore/profiler/parser/hwts_log_parser.py
+++ b/mindspore/python/mindspore/profiler/parser/hwts_log_parser.py
@ -30,6 +30,7 @@ class HWTSLogParser:
         output_filename (str): The output data path and name. Such as: './output_format_data_hwts_0.txt'.
    """

+    GRAPH_MODE_MAX_TASKID = 65000
    _source_file_target_old = 'hwts.log.data.45.dev.profiler_default_tag'
    _source_file_target = 'hwts.data'
    _dst_file_title = 'title:45 HWTS data'
@ -107,10 +108,10 @@ class HWTSLogParser:
                    logger.info("Profiling: invalid hwts log record type %s", ms_type)
                    continue

-                if task_id < task_id_threshold:
-                    if last_task_stream_map.get(stream_id, task_id) > task_id and self._dynamic_status:
-                        flip_times += 1
-                    task_id_str = str(stream_id) + "_" + str(task_id + flip_times * task_id_threshold)
+                if HWTSLogParser.GRAPH_MODE_MAX_TASKID < last_task_stream_map.get(stream_id, task_id)\
+                        and task_id < last_task_stream_map.get(stream_id, task_id):
+                    flip_times += 1
+                task_id_str = str(stream_id) + "_" + str(task_id + flip_times * task_id_threshold)
                result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id,
                                                                          blk_id, task_id_str, syscnt, stream_id))
                last_task_stream_map[stream_id] = task_id
--- a/mindspore/python/mindspore/profiler/profiling.py
+++ b/mindspore/python/mindspore/profiler/profiling.py
@ -130,6 +130,15 @@ class Profiler:
    _has_initialized = False
    _ascend_profiling_options = ""
    _ascend_job_id = ""
+    _aicore_metrics_dict = {
+        0: "ArithmeticUtilization",
+        1: "PipeUtilization",
+        2: "Memory",
+        3: "MemoryL0",
+        4: "ResourceConflictRatio",
+        5: "MemoryUB",
+        -1: "None"
+    }

    def __init__(self, **kwargs):
        if Profiler._has_initialized:
@ -150,6 +159,10 @@ class Profiler:
        self._rank_size = 0
        self._ascend_profiler = None
        _environment_check()
+        # default aicore_metrics type is ArithmeticUtilization
+        self._aicore_metrics_id = 0
+        self._l2_cache = "off"
+        self._parser_kwargs(kwargs)
        # get device_id and device_target
        self._get_devid_rankid_and_devtarget()
        self._get_output_path(kwargs)
@ -262,8 +275,6 @@ class Profiler:
        # Setup and start MindData Profiling
        self._md_profiler = cde.GlobalContext.profiling_manager()
        self._md_profiler.init()
-        if context.get_context("mode") == context.PYNATIVE_MODE:
-            raise RuntimeError("Pynative mode is not supported on GPU currently.")
        self._parse_parameter_for_gpu(kwargs)

        gpu_profiler = c_expression.Profiler
@ -320,11 +331,12 @@ class Profiler:
            "bp_point": bp_point,
            "training_trace": "on",
            "task_trace": "on",
-            "aic_metrics": "ArithmeticUtilization",
+            "aic_metrics": Profiler._aicore_metrics_dict.get(self._aicore_metrics_id, "ArithmeticUtilization"),
            "aicpu": "on",
            "profile_memory": profile_memory,
            "hccl": profiler_communication,
-            "parallel_strategy": "on"
+            "l2_cache": self._l2_cache,
+            "parallel_strategy": "on",
        }

        return profiling_options
@ -454,10 +466,7 @@ class Profiler:
        else:
            logger.info("No need to stop profiler because profiler has been stopped.")

-        if context.get_context("mode") == context.PYNATIVE_MODE:
-            self._ascend_pynative_analyse()
-        else:
-            self._ascend_graph_analyse()
+        self._ascend_graph_analyse()

        # Call MSAdvisor function
        try:
@ -701,17 +710,7 @@ class Profiler:
            self._gpu_profiler.step_profiling_enable(True)
        elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
            self._md_profiler.start()
-            if context.get_context("mode") == context.PYNATIVE_MODE:
-                self._ascend_pynative_start()
-            else:
-                self._ascend_graph_start()
-
-    def _ascend_pynative_start(self):
-        """Ascend pynative mode start profiling."""
-        pynative_profiler = c_expression.Profiler
-        self._pynative_profiler = pynative_profiler.get_instance("PyNative")
-        self._pynative_profiler.init(self._output_path)
-        self._ascend_profiler.start()
+            self._ascend_graph_start()

    def _ascend_graph_start(self):
        """Ascend graph mode start profiling."""
@ -763,8 +762,6 @@ class Profiler:
        if self._device_target and self._device_target == DeviceTarget.GPU.value:
            self._gpu_profiler.stop()
        elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
-            if context.get_context("mode") == context.PYNATIVE_MODE:
-                self._pynative_profiler.stop()
            self._ascend_profiler.stop()

            self._stop_time = int(time.time() * 10000000)
@ -1182,6 +1179,22 @@ class Profiler:
            logger.warning("The target dir already exists. "
                           "There may be some old profiling data, and they will be rewritten in the end.")

+    def _parser_kwargs(self, kwargs):
+        """Parse kwargs vale."""
+        self._aicore_metrics_id = kwargs.pop("aicore_metrics", 0)
+        if not isinstance(self._aicore_metrics_id, int) or self._aicore_metrics_id not in self._aicore_metrics_dict:
+            logger.warning("aicore_metrics is an invalid value, it will be set to 0.")
+            self._aicore_metrics_id = 0
+
+        l2_cache_enable = kwargs.pop("l2_cache", False)
+        if not isinstance(l2_cache_enable, bool):
+            logger.warning("l2_cache is an invalid value, it will be set to False.")
+
+        if l2_cache_enable:
+            self._l2_cache = "on"
+        else:
+            self._l2_cache = "off"
+
    def _analyse_hccl_info(self):
        """Analyse hccl info."""
        hccl_path = os.path.join(self._output_path, "hccl_info_{}".format(self._rank_id))