!42312 [profiler] pynative 模式profiler优化;提供生成PMU&l2_cache性能数据的接口

Merge pull request !42312 from zangqx/m_Q3
This commit is contained in:
i-robot 2022-09-20 06:32:13 +00:00 committed by Gitee
commit b3adb8f823
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 51 additions and 53 deletions

View File

@ -38,15 +38,11 @@
#include "debug/data_dump/e2e_dump.h"
#include "debug/debugger/debugger_utils.h"
#include "plugin/device/ascend/hal/profiler/memory_profiling.h"
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
#include "utils/anf_utils.h"
#include "plugin/device/ascend/hal/profiler/pynative_profiling.h"
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
#include "plugin/device/ascend/hal/device/dump/ascend_dump.h"
using Adx::AdxRegDumpProcessCallBack;
using mindspore::device::ascend::ProfilingManager;
using mindspore::profiler::ProfilerManager;
using mindspore::profiler::ascend::MemoryProfiling;
#endif
@ -416,31 +412,17 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
if (nop_op_to_memcpy_.find(kernel) != nop_op_to_memcpy_.end()) {
(void)MemoryCopyAsync(kernel, real_inputs, outputs);
} else {
#ifndef ENABLE_SECURITY
auto profiler_inst = profiler::ascend::PynativeProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
std::thread::id t_id = std::this_thread::get_id();
profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id, kernel->fullname_with_scope(),
is_dynamic_shape);
#endif
MS_LOG(DEBUG) << "Begin launch kernel: " << kernel->fullname_with_scope();
ret = kernel_mod->Launch(real_inputs, workspace, outputs, stream);
MS_LOG(DEBUG) << "End launch kernel: " << kernel->fullname_with_scope();
#ifndef ENABLE_SECURITY
profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
#endif
if (!ret) {
MS_LOG(ERROR) << "Launch kernel failed, kernel full name: " << kernel->fullname_with_scope();
return false;
}
}
auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance();
auto profiler_manage_instance = profiler::ProfilerManager::GetInstance();
MS_EXCEPTION_IF_NULL(ascend_instance);
MS_EXCEPTION_IF_NULL(profiler_manage_instance);
if ((profiler_manage_instance->GetNetDynamicShapeStatus() ||
ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) &&
ascend_instance->GetEnableFlag()) {
if (ascend_instance->GetEnableFlag()) {
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, UintToInt(device_id), kernel_type);
}

View File

@ -41,13 +41,13 @@ namespace {
PROFILER_REG(kAscendDevice, AscendProfiler);
} // namespace
std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
{"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
{"Memory", ACL_AICORE_MEMORY_BANDWIDTH},
{"MemoryLO", ACL_AICORE_L0B_AND_WIDTH},
{"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
};
std::map<std::string, aclprofAicoreMetrics> kAicMetrics{{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
{"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
{"Memory", ACL_AICORE_MEMORY_BANDWIDTH},
{"MemoryLO", ACL_AICORE_L0B_AND_WIDTH},
{"ResourceConflictRatio", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
{"MemoryUB", ACL_AICORE_MEMORY_UB},
{"None", ACL_AICORE_NONE}};
std::shared_ptr<AscendProfiler> AscendProfiler::GetInstance() {
auto instance = Profiler::GetInstance(kAscendDevice);
@ -120,7 +120,9 @@ uint64_t AscendProfiler::GetOptionsMask() const {
if (options_json["hccl"] == "on") {
mask |= ACL_PROF_HCCL_TRACE;
}
if (options_json["l2_cache"] == "on") {
mask |= ACL_PROF_L2CACHE;
}
return mask;
}

View File

@ -472,7 +472,7 @@ class FlopsParser:
if not os.path.exists(_step_trace_file_path):
logger.critical(f'The {_step_trace_file_path} file does not exist.')
raise ProfilerFileNotFoundException(_step_trace_file_path)
return op_all_step_time, op_all_step_comp
try:
with open(_step_trace_file_path, 'r') as f:
lines = f.readlines()

View File

@ -30,6 +30,7 @@ class HWTSLogParser:
output_filename (str): The output data path and name. Such as: './output_format_data_hwts_0.txt'.
"""
GRAPH_MODE_MAX_TASKID = 65000
_source_file_target_old = 'hwts.log.data.45.dev.profiler_default_tag'
_source_file_target = 'hwts.data'
_dst_file_title = 'title:45 HWTS data'
@ -107,10 +108,10 @@ class HWTSLogParser:
logger.info("Profiling: invalid hwts log record type %s", ms_type)
continue
if task_id < task_id_threshold:
if last_task_stream_map.get(stream_id, task_id) > task_id and self._dynamic_status:
flip_times += 1
task_id_str = str(stream_id) + "_" + str(task_id + flip_times * task_id_threshold)
if HWTSLogParser.GRAPH_MODE_MAX_TASKID < last_task_stream_map.get(stream_id, task_id)\
and task_id < last_task_stream_map.get(stream_id, task_id):
flip_times += 1
task_id_str = str(stream_id) + "_" + str(task_id + flip_times * task_id_threshold)
result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id,
blk_id, task_id_str, syscnt, stream_id))
last_task_stream_map[stream_id] = task_id

View File

@ -130,6 +130,15 @@ class Profiler:
_has_initialized = False
_ascend_profiling_options = ""
_ascend_job_id = ""
_aicore_metrics_dict = {
0: "ArithmeticUtilization",
1: "PipeUtilization",
2: "Memory",
3: "MemoryL0",
4: "ResourceConflictRatio",
5: "MemoryUB",
-1: "None"
}
def __init__(self, **kwargs):
if Profiler._has_initialized:
@ -150,6 +159,10 @@ class Profiler:
self._rank_size = 0
self._ascend_profiler = None
_environment_check()
# default aicore_metrics type is ArithmeticUtilization
self._aicore_metrics_id = 0
self._l2_cache = "off"
self._parser_kwargs(kwargs)
# get device_id and device_target
self._get_devid_rankid_and_devtarget()
self._get_output_path(kwargs)
@ -262,8 +275,6 @@ class Profiler:
# Setup and start MindData Profiling
self._md_profiler = cde.GlobalContext.profiling_manager()
self._md_profiler.init()
if context.get_context("mode") == context.PYNATIVE_MODE:
raise RuntimeError("Pynative mode is not supported on GPU currently.")
self._parse_parameter_for_gpu(kwargs)
gpu_profiler = c_expression.Profiler
@ -320,11 +331,12 @@ class Profiler:
"bp_point": bp_point,
"training_trace": "on",
"task_trace": "on",
"aic_metrics": "ArithmeticUtilization",
"aic_metrics": Profiler._aicore_metrics_dict.get(self._aicore_metrics_id, "ArithmeticUtilization"),
"aicpu": "on",
"profile_memory": profile_memory,
"hccl": profiler_communication,
"parallel_strategy": "on"
"l2_cache": self._l2_cache,
"parallel_strategy": "on",
}
return profiling_options
@ -454,10 +466,7 @@ class Profiler:
else:
logger.info("No need to stop profiler because profiler has been stopped.")
if context.get_context("mode") == context.PYNATIVE_MODE:
self._ascend_pynative_analyse()
else:
self._ascend_graph_analyse()
self._ascend_graph_analyse()
# Call MSAdvisor function
try:
@ -701,17 +710,7 @@ class Profiler:
self._gpu_profiler.step_profiling_enable(True)
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
self._md_profiler.start()
if context.get_context("mode") == context.PYNATIVE_MODE:
self._ascend_pynative_start()
else:
self._ascend_graph_start()
def _ascend_pynative_start(self):
"""Ascend pynative mode start profiling."""
pynative_profiler = c_expression.Profiler
self._pynative_profiler = pynative_profiler.get_instance("PyNative")
self._pynative_profiler.init(self._output_path)
self._ascend_profiler.start()
self._ascend_graph_start()
def _ascend_graph_start(self):
"""Ascend graph mode start profiling."""
@ -763,8 +762,6 @@ class Profiler:
if self._device_target and self._device_target == DeviceTarget.GPU.value:
self._gpu_profiler.stop()
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
if context.get_context("mode") == context.PYNATIVE_MODE:
self._pynative_profiler.stop()
self._ascend_profiler.stop()
self._stop_time = int(time.time() * 10000000)
@ -1182,6 +1179,22 @@ class Profiler:
logger.warning("The target dir already exists. "
"There may be some old profiling data, and they will be rewritten in the end.")
def _parser_kwargs(self, kwargs):
"""Parse kwargs vale."""
self._aicore_metrics_id = kwargs.pop("aicore_metrics", 0)
if not isinstance(self._aicore_metrics_id, int) or self._aicore_metrics_id not in self._aicore_metrics_dict:
logger.warning("aicore_metrics is an invalid value, it will be set to 0.")
self._aicore_metrics_id = 0
l2_cache_enable = kwargs.pop("l2_cache", False)
if not isinstance(l2_cache_enable, bool):
logger.warning("l2_cache is an invalid value, it will be set to False.")
if l2_cache_enable:
self._l2_cache = "on"
else:
self._l2_cache = "off"
def _analyse_hccl_info(self):
"""Analyse hccl info."""
hccl_path = os.path.join(self._output_path, "hccl_info_{}".format(self._rank_id))