diff --git a/config/data_dump.json b/config/data_dump.json index 3df50432fc3..7eee5233e17 100644 --- a/config/data_dump.json +++ b/config/data_dump.json @@ -4,7 +4,7 @@ "path": "/test", "net_name": "ResNet50", "iteration": 0, - "input_output": 0, + "input_output": 2, "kernels": ["Default/Conv-op12"], "support_device": [0,1,2,3,4,5,6,7] }, diff --git a/config/e2e_dump_config.json b/config/e2e_dump_config.json deleted file mode 100644 index fdba941f971..00000000000 --- a/config/e2e_dump_config.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "DumpSettings": { - "enable": false, - "trans_flag": false, - "path": "/tmp/net/", - "net_name": "ResNet50", - "mode": 0, - "iteration": 0, - "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] - }, - - "DumpSettingsSpec": { - "enable": "true: dump enable, false: dump disable", - "trans_flag": "true: trans to host format, false: not trans format", - "path": "the dump file folder", - "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels, 1: dump kernels in kernels list", - "iteration": "0: all iteration, others: specified iteration ", - "kernels": "op's full scope name which need to be dump" - }, - "other": {} -} \ No newline at end of file diff --git a/config/e2e_dump_config_0.json b/config/e2e_dump_config_0.json deleted file mode 100644 index 64b18b8b553..00000000000 --- a/config/e2e_dump_config_0.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "DumpSettings": { - "enable": false, - "trans_flag": false, - "path": "/tmp/hccllog/0", - "net_name": "ResNet50", - "mode": 0, - "iteration": 0, - "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] - }, - - "DumpSettingsSpec": { - "enable": "true: dump enable, false: dump disable", - "trans_flag": "true: trans to host format, false: not trans format", - "path": "the dump file folder", - "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels, 1: dump kernels in kernels list", - "iteration": "0: all iteration, others: specified iteration ", - "kernels": "op's full scope name which need to be dump" - }, - "other": {} -} \ No newline at end of file diff --git a/config/e2e_dump_config_1.json b/config/e2e_dump_config_1.json deleted file mode 100644 index 14864877996..00000000000 --- a/config/e2e_dump_config_1.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "DumpSettings": { - "enable": false, - "trans_flag": false, - "path": "/tmp/hccllog/1", - "net_name": "ResNet50", - "mode": 0, - "iteration": 0, - "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] - }, - - "DumpSettingsSpec": { - "enable": "true: dump enable, false: dump disable", - "trans_flag": "true: trans to host format, false: not trans format", - "path": "the dump file folder", - "net_name": "net name eg:ResNet50", - "mode": "0: dump all kernels, 1: dump kernels in kernels list", - "iteration": "0: all iteration, others: specified iteration ", - "kernels": "op's full scope name which need to be dump" - }, - "other": {} -} \ No newline at end of file diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 6fd83824a67..ede3390455e 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -442,6 +442,7 @@ void AscendSession::InitRuntimeResource() { if (!runtime_instance->Init()) { MS_LOG(EXCEPTION) << "Kernel runtime init error."; } + DumpJsonParser::GetInstance().Parse(); MS_LOG(INFO) << "Finish!"; } diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index a6658f234e9..ff9c63941dd 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -74,6 +74,10 @@ bool DumpJsonParser::IsDumpEnabled() { void DumpJsonParser::Parse() { std::lock_guard guard(lock_); + if (already_parsed_) { + return; + } + already_parsed_ = true; if (!IsDumpEnabled()) { return; } @@ -305,6 +309,8 @@ void DumpJsonParser::JudgeDumpEnabled() { MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support"; } context->set_param(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_); + MS_LOG(INFO) << "Dump status, e2e_dump_enabled:" << e2e_dump_enabled_ + << " async_dump_enabled:" << async_dump_enabled_; } bool DumpJsonParser::NeedDump(const std::string &op_full_name) const { @@ -325,6 +331,9 @@ void DumpJsonParser::MatchKernel(const std::string &kernel_name) { } void DumpJsonParser::PrintUnusedKernel() { + if (!e2e_dump_enabled_ && !async_dump_enabled_) { + return; + } for (const auto &iter : kernels_) { if (iter.second == 0) { MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first; @@ -362,16 +371,6 @@ bool DumpJsonParser::OutputNeedDump() const { return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly; } -bool NeedAsyncDump(const CNodePtr &kernel) { - if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL && - AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) { - return false; - } - MS_EXCEPTION_IF_NULL(kernel); - // dump all kernel if mode is set 0 in data_dump.json - return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope()); -} - void DumpJsonParser::UpdateNeedDumpKernels(NotNull kernel_graph) { if (e2e_dump_enabled_) { MS_LOG(INFO) << "E2e dump no need to update dump kernel list"; @@ -391,9 +390,6 @@ void DumpJsonParser::UpdateNeedDumpKernels(NotNull update_kernels.try_emplace(input->fullname_with_scope(), 0); } } - } else if (NeedAsyncDump(kernel)) { - MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope(); - update_kernels.try_emplace(kernel->fullname_with_scope(), 0); } } kernels_.insert(update_kernels.begin(), update_kernels.end()); diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h index b0576abb8df..8dcfbd57232 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h @@ -72,6 +72,7 @@ class DumpJsonParser { uint32_t op_debug_mode_{0}; bool trans_flag_{false}; uint32_t cur_dump_iter_{0}; + bool already_parsed_{false}; void ParseCommonDumpSetting(const nlohmann::json &content); void ParseAsyncDumpSetting(const nlohmann::json &content); diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc b/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc index b250baa6e44..4bde9dab4e9 100644 --- a/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc +++ b/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc @@ -206,14 +206,17 @@ bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger } } MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter(); + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + auto device_id = context->get_param(MS_CTX_DEVICE_ID); + std::string net_name = dump_json_parser.net_name(); std::string iterator = std::to_string(dump_json_parser.cur_dump_iter()); std::string dump_path = dump_json_parser.path(); - if (dump_path.back() == '/') { - dump_path = dump_path + net_name + '/' + iterator; - } else { - dump_path = dump_path + '/' + net_name + '/' + iterator; + if (dump_path.back() != '/') { + dump_path += "/"; } + dump_path += (net_name + "/device_" + std::to_string(device_id) + "/iteration_" + iterator); DumpInput(graph, dump_path, debugger); DumpOutput(graph, dump_path, debugger); DumpParameters(graph, dump_path, debugger); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 1a2ea749dbe..5980d3ee6c9 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -206,11 +206,8 @@ bool AscendKernelRuntime::Init() { SetContext(); return true; } - bool ret = false; - - DumpJsonParser::GetInstance().Parse(); // Start up profiling before rtSetDevice - ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); + bool ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); if (!ret) { MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed."; }