diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index ee2d07e7241..e3638264a2c 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -644,7 +644,7 @@ void AscendSession::PostExecuteGraph(const std::shared_ptr &kernel_ } #endif #ifndef ENABLE_SECURITY - DumpSetup(kernel_graph); + E2eDump::UpdateIterOldRTDump(kernel_graph.get()); #endif } @@ -1401,13 +1401,6 @@ void AscendSession::Execute(const std::shared_ptr &kernel_graph, bo } #ifndef ENABLE_SECURITY -void AscendSession::DumpSetup(const std::shared_ptr &kernel_graph) const { - MS_LOG(DEBUG) << "Start!"; - MS_EXCEPTION_IF_NULL(kernel_graph); - E2eDump::DumpSetup(kernel_graph.get()); - MS_LOG(DEBUG) << "Finish!"; -} - void AscendSession::Dump(const std::shared_ptr &kernel_graph) const { MS_LOG(DEBUG) << "Start!"; MS_EXCEPTION_IF_NULL(kernel_graph); diff --git a/mindspore/ccsrc/backend/session/ascend_session.h b/mindspore/ccsrc/backend/session/ascend_session.h index cd18129d99d..19c37380f07 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.h +++ b/mindspore/ccsrc/backend/session/ascend_session.h @@ -114,7 +114,6 @@ class AscendSession : public SessionBasic { void Execute(const std::shared_ptr &kernel_graph, bool is_task) const; #ifndef ENABLE_SECURITY void Dump(const std::shared_ptr &kernel_graph) const; - void DumpSetup(const std::shared_ptr &kernel_graph) const; #endif void LoadTensor(const std::shared_ptr &kernel_graph) const; // below functions are used for run op diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 2880c4134be..05e93742dbe 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -518,7 +518,7 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr &kernel_grap debugger_->PreExecute(kernel_graph); } - DumpSetup(kernel_graph); + E2eDump::UpdateIterOldRTDump(kernel_graph.get()); #endif #if ENABLE_CPU && ENABLE_GPU @@ -725,12 +725,6 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, } #ifdef ENABLE_DEBUGGER -void GPUSession::DumpSetup(const std::shared_ptr &kernel_graph) const { - MS_LOG(INFO) << "Start!"; - MS_EXCEPTION_IF_NULL(kernel_graph); - E2eDump::DumpSetup(kernel_graph.get()); - MS_LOG(INFO) << "Finish!"; -} void GPUSession::Dump(const std::shared_ptr &kernel_graph) const { // Dump graph and graph history file if e2e_dump is enabled and update cur_dump_iter for GPU old runtime. diff --git a/mindspore/ccsrc/backend/session/gpu_session.h b/mindspore/ccsrc/backend/session/gpu_session.h index 4b722fc8444..acbf3ebcce4 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.h +++ b/mindspore/ccsrc/backend/session/gpu_session.h @@ -94,8 +94,6 @@ class GPUSession : public SessionBasic { #ifdef ENABLE_DEBUGGER void Dump(const std::shared_ptr &kernel_graph) const; - void DumpSetup(const std::shared_ptr &kernel_graph) const; - bool DumpDataEnabledIteration() const; #endif diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc index 2464cbb1d5e..5b3f4a1abf6 100644 --- a/mindspore/ccsrc/backend/session/kernel_graph.cc +++ b/mindspore/ccsrc/backend/session/kernel_graph.cc @@ -1422,6 +1422,10 @@ void KernelGraph::SetOptimizerFlag() { bool KernelGraph::IsDatasetGraph() const { // check if there is InitDataSetQueue node const auto &nodes = execution_order_; + // The size of execution_order for the dataset graph is equal to 1. + if (execution_order_.size() > 1) { + return false; + } for (const auto &node : nodes) { auto node_name = AnfAlgo::GetCNodeName(node); if (node_name == prim::kPrimInitDataSetQueue->name()) { diff --git a/mindspore/ccsrc/debug/data_dump/dump_utils.cc b/mindspore/ccsrc/debug/data_dump/dump_utils.cc index 1ceec4ef639..520b6d21ec6 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_utils.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_utils.cc @@ -26,6 +26,9 @@ #include "runtime/device/kernel_runtime_manager.h" #include "utils/utils.h" #include "debug/common.h" +#include "runtime/framework/device_tensor_store.h" + +using mindspore::runtime::DeviceTensorStore; namespace mindspore { uint32_t ConvertPhysicalDeviceId(uint32_t device_id) { @@ -90,6 +93,24 @@ void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull int_shapes, + NotNull host_type, NotNull device_type) { + const auto &device_tensors = DeviceTensorStore::GetInstance().Fetch(node.get()); + if (device_tensors.size() < 1) { + return nullptr; + } + auto device_addr = device_tensors[0]; + MS_EXCEPTION_IF_NULL(device_addr); + auto &dump_json_parser = DumpJsonParser::GetInstance(); + bool trans_flag = dump_json_parser.trans_flag(); + auto ref_node = device_addr->GetNodeIndex().first; + MS_EXCEPTION_IF_NULL(ref_node); + GetDumpIntShape(ref_node, PARAMETER_OUTPUT_INDEX, int_shapes, trans_flag); + *host_type = AnfAlgo::GetOutputInferDataType(ref_node, PARAMETER_OUTPUT_INDEX); + *device_type = AnfAlgo::GetOutputDeviceDataType(ref_node, PARAMETER_OUTPUT_INDEX); + return device_addr; +} + /* * Feature group: Dump. * Target device group: Ascend, CPU. diff --git a/mindspore/ccsrc/debug/data_dump/dump_utils.h b/mindspore/ccsrc/debug/data_dump/dump_utils.h index c5ac74d028f..0b27748d8a3 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_utils.h +++ b/mindspore/ccsrc/debug/data_dump/dump_utils.h @@ -19,10 +19,14 @@ #include #include +#include #include "backend/session/kernel_graph.h" #include "runtime/device/device_address.h" +using DeviceTensor = mindspore::device::DeviceAddress; +using DeviceTensorPtr = std::shared_ptr; + namespace mindspore { static const size_t PARAMETER_OUTPUT_INDEX = 0; static const size_t VALUE_NODE_OUTPUT_INDEX = 0; @@ -33,6 +37,9 @@ void GetFileKernelName(NotNull kernel_name); void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull int_shapes, bool trans_flag = false); +const DeviceTensorPtr GetParameterInfo(const AnfNodePtr &node, NotNull int_shapes, + NotNull host_type, NotNull device_type); + void DumpMemToFile(const std::string &file_path, const device::DeviceAddress &addr, const ShapeVector &int_shapes, const TypeId &type, bool trans_flag = false); // Get time stamp since epoch in microseconds diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump.cc b/mindspore/ccsrc/debug/data_dump/e2e_dump.cc index d0be2e47fea..39bf6b2d874 100644 --- a/mindspore/ccsrc/debug/data_dump/e2e_dump.cc +++ b/mindspore/ccsrc/debug/data_dump/e2e_dump.cc @@ -305,7 +305,6 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_ dump_name = node_name.substr(cst_prefix.length()); trans_flag = false; } - // check if output address exists, if not, return; if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) { return; @@ -334,6 +333,49 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_ } } +/* + * Feature group: Dump. + * Target device group: Ascend, GPU. + * Runtime category: MindRT. + * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT. + * This function uses GetParameterInfo to get dump info for the parameter node. + */ +void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag, + const Debugger *debugger) { + MS_EXCEPTION_IF_NULL(anf_node); + auto &dump_json_parser = DumpJsonParser::GetInstance(); + std::string node_name = GetKernelNodeName(anf_node); + if (!anf_node->isa() || !dump_json_parser.NeedDump(node_name)) { + return; + } + DumpJsonParser::GetInstance().MatchKernel(node_name); + GetFileKernelName(NOT_NULL(&node_name)); + ShapeVector int_shapes; + TypeId type; + TypeId device_type; + auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type)); + if (addr == nullptr) { + MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT."; + return; + } + uint64_t timestamp = GetTimeStamp(); + uint32_t task_id = 0; + uint32_t stream_id = 0; + std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' + + std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0"; + if (IsDeviceTargetGPU()) { + if (dump_json_parser.IsStatisticDump()) { + TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0); + stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger); + } + if (dump_json_parser.IsTensorDump()) { + DumpGPUMemToFile(file_path, node_name, *addr, int_shapes, type, device_type, trans_flag, 0, debugger); + } + } else { + DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag); + } +} + void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) { MS_EXCEPTION_IF_NULL(graph); @@ -380,9 +422,16 @@ void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::str } } -void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_mode) { - uint32_t graph_id = graph->graph_id(); +/* + * Feature group: Dump. + * Target device group: Ascend, GPU. + * Runtime category: Old runtime. + * Description: This function is for updating dump iteration for GPU and ascend old runtime. + */ +void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); auto &dump_json_parser = DumpJsonParser::GetInstance(); + uint32_t graph_id = graph->graph_id(); if (IsDeviceTargetGPU()) { if (starting_graph_id == INT32_MAX) { starting_graph_id = graph_id; @@ -394,7 +443,7 @@ void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_m return; } // If device target is Ascend - if (sink_mode && graph->IsDatasetGraph()) { + if (graph->IsDatasetGraph()) { MS_LOG(INFO) << "No need to update iteration for dataset graph."; return; } @@ -403,29 +452,23 @@ void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_m dump_json_parser.UpdateDumpIter(); } -/* - * Feature group: Dump. - * Target device group: Ascend, GPU. - * Runtime category: Old runtime, MindRT. - * Description: This function is for updating dump iteration for GPU and ascend old runtime and ascend super - * kernel MindRT. - */ -void E2eDump::DumpSetup(const session::KernelGraph *graph) { - auto &dump_json_parser = DumpJsonParser::GetInstance(); - bool sink_mode = (ConfigManager::GetInstance().dataset_mode() || E2eDump::isDatasetGraph(graph)); - - if (dump_json_parser.async_dump_enabled() || dump_json_parser.e2e_dump_enabled()) { - UpdateIterDumpSetup(graph, sink_mode); - } -} - /* * Feature group: Dump. * Target device group: Ascend, GPU. * Runtime category: MindRT. - * Description: This function is for updating dump iteration for GPU and kernel by kernel ascend MindRT dump. + * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with + * dataset_sink_mode = True is not supported for GPU. */ void E2eDump::UpdateIterMindRTDump() { + auto debugger = Debugger::GetInstance(); + // Dataset graph is always the first graph in the list when dataset_sink_mode is true. + auto graph = (debugger->GetStepGraphPtrList())[0]; + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) { + MS_LOG(INFO) << "No need to update iteration for dataset graph."; + return; + } // update dump iter for GPU and kernel by kernel ascend dump. DumpJsonParser::GetInstance().UpdateDumpIter(); } @@ -464,7 +507,7 @@ void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) { MS_LOG(WARNING) << "Open file for saving graph global execution order failed."; return; } - if (sink_mode && json_parser.async_dump_enabled()) { + if (sink_mode && json_parser.async_dump_enabled() && !Debugger::GetInstance()->GetAscendKernelByKernelFlag()) { // for async dump when sink_mode = true, cur_dump_iter() = current_epoch // dump history for all iterations in the epoch Debugger::GetInstance()->UpdateGraphIterMap(graph->graph_id(), iter_num); @@ -501,16 +544,16 @@ void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter(); MS_LOG(INFO) << "Current graph id is " << graph_id; std::string dump_path = GenerateDumpPath(graph_id, rank_id); - std::string cst_path = GenerateDumpPath(graph_id, rank_id, true); - if (dump_json_parser.IsStatisticDump()) { TensorStatDump::OpenStatisticsFile(dump_path); } DumpInput(graph, dump_path, debugger); DumpOutput(graph, dump_path, debugger); - DumpParameters(graph, dump_path, debugger); - if (IsDeviceTargetGPU() && dump_json_parser.e2e_dump_enabled()) { - DumpConstantData(graph, cst_path, debugger); + if (!MsContext::GetInstance()->get_param(MS_CTX_ENABLE_MINDRT)) { + // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger. + DumpParameters(graph, dump_path, debugger); + // DumpConstantData for GPU old runtime. + DumpConstantData(graph, rank_id, debugger); } if (dump_json_parser.IsStatisticDump()) { CsvWriter::GetInstance().CloseFile(); @@ -543,29 +586,29 @@ bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32 return success; } -bool E2eDump::DumpParametersData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) { - bool success = false; - uint32_t graph_id = graph->graph_id(); +/* + * Feature group: Dump. + * Target device group: Ascend, GPU. + * Runtime category: MindRT. + * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel + * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump). + */ +void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) { + uint32_t root_graph_id = debugger->GetCurrentRootGraphId(); auto &dump_json_parser = DumpJsonParser::GetInstance(); + if (dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) { + // Dump parameters for mindRT in async dump only for kernel by kernel mode. + return; + } if (dump_json_parser.DumpEnabledForIter()) { MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter(); - MS_LOG(INFO) << "Current graph id is " << graph_id; - std::string dump_path = GenerateDumpPath(graph_id, rank_id); - DumpParameters(graph, dump_path, debugger); - success = true; - } - return success; -} -bool E2eDump::isDatasetGraph(const session::KernelGraph *graph) { - // check if there is GetNext or InitDataSetQueue node - const auto &nodes = graph->execution_order(); - for (const auto &node : nodes) { - auto node_name = AnfAlgo::GetCNodeName(node); - if (node_name == prim::kPrimGetNext->name() || node_name == prim::kPrimInitDataSetQueue->name()) { - return true; + MS_LOG(INFO) << "Current root graph id is " << root_graph_id; + std::string dump_path = GenerateDumpPath(root_graph_id, rank_id); + bool trans_flag = dump_json_parser.trans_flag(); + for (auto &item : debugger->GetParametersMindRT()) { + DumpSingleParameterNode(item, dump_path, trans_flag, debugger); } } - return false; } #ifdef ENABLE_D diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump.h b/mindspore/ccsrc/debug/data_dump/e2e_dump.h index 1bef8a2f7f2..ae475f8356b 100644 --- a/mindspore/ccsrc/debug/data_dump/e2e_dump.h +++ b/mindspore/ccsrc/debug/data_dump/e2e_dump.h @@ -38,10 +38,10 @@ class E2eDump { public: E2eDump() = default; ~E2eDump() = default; - static void DumpSetup(const session::KernelGraph *graph); - static void UpdateIterMindRTDump(); + static void UpdateIterOldRTDump(const session::KernelGraph *graph); + static void DumpRunIter(const KernelGraphPtr &graph_ptr, uint32_t rank_id = 0); static void DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr); @@ -51,13 +51,11 @@ class E2eDump { static void DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr); - static bool DumpParametersData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger); + static void DumpParametersData(uint32_t rank_id, const Debugger *debugger); static bool DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger = nullptr); - static bool isDatasetGraph(const session::KernelGraph *graph); - // Dump data when task error. static void DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path, std::string *kernel_name, const Debugger *debugger); @@ -91,7 +89,8 @@ class E2eDump { static void DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path, bool trans_flag, const Debugger *debugger); - static void UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_mode); + static void DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag, + const Debugger *debugger); #ifdef ENABLE_D static nlohmann::json ParseOverflowInfo(char *data_ptr); diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index 74fb277b0e6..8cdcdbc99dd 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -38,6 +38,7 @@ #include "runtime/hardware/device_context_manager.h" #include "debug/anf_ir_dump.h" #include "debug/anf_ir_utils.h" +#include "runtime/framework/device_tensor_store.h" #ifdef ENABLE_DEBUGGER #include "debug/debugger/proto_exporter.h" #else @@ -56,6 +57,7 @@ using debugger::WatchCondition_Condition_nan; using debugger::WatchCondition_Parameter; using debugger::WatchNode; using debugger::WatchpointHit; +using mindspore::runtime::DeviceTensorStore; namespace mindspore { @@ -287,6 +289,8 @@ void Debugger::Reset() { graph_proto_list_.clear(); graph_ptr_list_.clear(); graph_ptr_step_vec_.clear(); + parameters_mindRT_.clear(); + visited_root_graph_ids_.clear(); MS_LOG(INFO) << "Release Debugger resource."; } @@ -297,13 +301,15 @@ void Debugger::Reset() { * Description: Sets root_graph_id for all the graphs in the compiled graph list. Sets cur_root_graph_id_ and * prev_root_graph_id_ and calls PreExecute function for all the graphs. */ -void Debugger::PreExecuteGraphDebugger(const std::vector &graphs) { +void Debugger::PreExecuteGraphDebugger(const std::vector &graphs, + const std::vector &origin_parameters_order) { // MindRTBackend for GPU and Ascend if (device_target_ == kCPUDevice) { return; } // Store graphs that are run in one step. graph_ptr_step_vec_ = graphs; + parameters_mindRT_ = origin_parameters_order; prev_root_graph_id_ = cur_root_graph_id_; // set first run graph as the root graph cur_root_graph_id_ = graph_ptr_step_vec_[0]->graph_id(); @@ -474,21 +480,31 @@ uint32_t Debugger::GetRankID() { * Feature group: Dump. * Target device group: Ascend, GPU. * Runtime category: MindRT. - * Description: Dumps graph history and parameters for GPU and Ascend kernel-by-kernel MindRT. DumpConstantData for GPU. + * Description: When dump is enabled, this function: 1) Dumps parameters for the current root_graph_id to the + * root_graph's directory. 2) Dumps constant data once for each graph. 3) Dumps graph run history for each graph. */ -void Debugger::Dump(const KernelGraphPtr &kernel_graph) const { - if (!(ascend_kernel_by_kernel_ || device_target_ == kGPUDevice)) { +void Debugger::DumpParamsAndConstAndHistory() { + if (!CheckDebuggerDumpEnabled()) { return; } - uint32_t rank_id = GetRankID(); - E2eDump::DumpRunIter(kernel_graph, rank_id); - if (debugger_ && debugger_->DebuggerBackendEnabled()) { - MS_EXCEPTION_IF_NULL(kernel_graph); - (void)E2eDump::DumpParametersData(kernel_graph.get(), rank_id, debugger_.get()); - // Dump constant data for GPU mindRT. - E2eDump::DumpConstantData(kernel_graph.get(), rank_id, debugger_.get()); - } else { - DumpJsonParser::GetInstance().UpdateDumpIter(); + LoadParametersAllGraphs(); + (void)E2eDump::DumpParametersData(GetRankID(), debugger_.get()); + // Whether constant data was already dumped for the current root graph. + bool cur_root_graph_checked = std::find(visited_root_graph_ids_.begin(), visited_root_graph_ids_.end(), + cur_root_graph_id_) != visited_root_graph_ids_.end(); + for (auto graph : graph_ptr_step_vec_) { + if (!cur_root_graph_checked) { + LoadConstsForGraph(graph); + // Dump constant data for GPU. + E2eDump::DumpConstantData(graph.get(), GetRankID(), debugger_.get()); + // Dump constant data for Ascend. + DumpConstantDataAscend(graph); + } + // Dump graph run hisotry for each graph. + E2eDump::DumpRunIter(graph, GetRankID()); + } + if (!cur_root_graph_checked) { + visited_root_graph_ids_.push_back(cur_root_graph_id_); } } @@ -556,29 +572,15 @@ void Debugger::PostExecuteGraphDebugger() { DumpJsonParser::GetInstance().UpdateDumpIter(); return; } - // LoadParametersAndConst for all the graphs that have been run in the current step - if (debugger_ && device_target_ == kGPUDevice) { - for (auto graph : graph_ptr_step_vec_) { - debugger_->LoadParametersAndConst(graph); - } - } + DumpParamsAndConstAndHistory(); // debug used for dump - if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) { - // Dump Parameters and consts - for (auto graph : graph_ptr_step_vec_) { - debugger_->Dump(graph); - DumpConstantDataAscend(graph); - if (!debugger_->debugger_enabled()) { - debugger_->ClearCurrentData(); - } - } + if (CheckDebuggerDumpEnabled() && !debugger_enabled()) { + ClearCurrentData(); } if (debugger_) { debugger_->PostExecute(); } - if (ascend_kernel_by_kernel_ || device_target_ == kGPUDevice) { - E2eDump::UpdateIterMindRTDump(); - } + E2eDump::UpdateIterMindRTDump(); } /* @@ -1341,7 +1343,10 @@ bool Debugger::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, } bool Debugger::LoadNewTensor(const std::shared_ptr &tensor, bool keep_prev) { - return debug_services_.get()->LoadNewTensor(tensor, keep_prev); + if (debug_services_ != nullptr) { + return debug_services_.get()->LoadNewTensor(tensor, keep_prev); + } + return false; } bool Debugger::debugger_enabled() const { return debugger_enabled_; } @@ -1543,6 +1548,37 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output } } +void Debugger::LoadSingleParameterMindRT(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto root_graph_id = cur_root_graph_id_; + // This function is only for loading parameters mindRT. + std::string node_name = GetKernelNodeName(node); + GetFileKernelName(NOT_NULL(&node_name)); + TypeId type; + TypeId device_type; + ShapeVector int_shapes; + auto device_addr = GetParameterInfo(node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type)); + if (device_addr == nullptr) { + MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT."; + return; + } + if (!IsTypeDebuggerSupported(type)) { + return; + } + auto format = kOpFormat_DEFAULT; + string tensor_name = node_name + ':' + "0"; + if (debug_services_ != nullptr) { + debug_services_->MoveTensorCurrentToPrev(tensor_name); + } + // Keep_prev is True for parameters. + bool ret = device_addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id); + + if (!ret) { + MS_LOG(ERROR) << "LoadMemToHost:" + << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; + } +} + /* * Feature group: Dump, Online debugger. * Target device group: Ascend, GPU. @@ -1593,6 +1629,43 @@ void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) { } } +/* + * Feature group: Dump. + * Target device group: GPU. + * Runtime category: MindRT. + * Description: This function is for loading parameters' data from device to host into tensor_list_map_ for GPU dump. + * Ascend does not use tensor_map_list_ for dump so it is not needed for ascend dump. + */ +void Debugger::LoadParametersAllGraphs() { + if (!(device_target_ == kGPUDevice && CheckDebuggerDumpEnabled())) { + return; + } + for (auto &node : parameters_mindRT_) { + LoadSingleParameterMindRT(node); + } +} + +/* + * Feature group: Dump. + * Target device group: GPU. + * Runtime category: MindRT. + * Description: This function is for loading constant data from device to host into tensor_list_map_ for GPU dump. + * Ascend does not use tensor_map_list_ for dump so it is not needed for ascend dump. + */ +void Debugger::LoadConstsForGraph(const KernelGraphPtr &graph) { + if (!(device_target_ == kGPUDevice && CheckDebuggerDumpEnabled())) { + return; + } + // load value nodes + // get all constant values from the graph + MS_LOG(INFO) << "Start to load value nodes for graph " << graph->graph_id() << "."; + auto root_graph_id = graph->root_graph_id(); + const auto value_nodes = graph->graph_value_nodes(); + for (auto &item : value_nodes) { + LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX, root_graph_id); + } +} + /* * Feature group: Online debugger. * Target device group: Ascend. @@ -1683,7 +1756,10 @@ void Debugger::ClearCurrentData() { } bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) { - return debug_services_->TensorExistsInCurrent(tensor_name); + if (debug_services_ != nullptr) { + return debug_services_->TensorExistsInCurrent(tensor_name); + } + return false; } #ifdef ENABLE_D diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 52f368d8f23..51c631158a0 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -29,6 +29,7 @@ #ifdef ENABLE_D #include "debug/dump_data_builder.h" #endif +#include "runtime/device/device_address.h" using debugger::Chunk; using debugger::DataType; @@ -41,6 +42,8 @@ using debugger::WatchCondition; using debugger::WatchCondition_Parameter; using debugger::WatchNode; using debugger::WatchpointHit; +using DeviceTensor = mindspore::device::DeviceAddress; +using DeviceTensorPtr = std::shared_ptr; template using ProtoVector = google::protobuf::RepeatedPtrField; @@ -77,7 +80,8 @@ class Debugger : public std::enable_shared_from_this { // reset debugger void Reset(); - void PreExecuteGraphDebugger(const std::vector &graphs); + void PreExecuteGraphDebugger(const std::vector &graphs, + const std::vector &origin_parameters_order); // enable debugger // send graph and wait for command // do nothing if graph is set already @@ -87,6 +91,8 @@ class Debugger : public std::enable_shared_from_this { void SetAscendKernelByKernelFlag(bool value) { ascend_kernel_by_kernel_ = value; } + bool GetAscendKernelByKernelFlag() const { return ascend_kernel_by_kernel_; } + void StoreRunGraphIdList(uint32_t graph_id); // analyze tensors and wait for command @@ -97,8 +103,6 @@ class Debugger : public std::enable_shared_from_this { static uint32_t GetRankID(); - void Dump(const KernelGraphPtr &kernel_graph) const; - void DumpConstantDataAscend(const KernelGraphPtr &graph); void DumpSingleNode(const CNodePtr &node, uint32_t graph_id); @@ -144,6 +148,12 @@ class Debugger : public std::enable_shared_from_this { void LoadParametersAndConst(const KernelGraphPtr &graph); + void LoadParametersAllGraphs(); + + void LoadConstsForGraph(const KernelGraphPtr &graph); + + void DumpParamsAndConstAndHistory(); + void UpdateStepNum(const session::KernelGraph *graph); void UpdateStepNumGPU(); @@ -162,6 +172,8 @@ class Debugger : public std::enable_shared_from_this { uint32_t GetPrevRootGraphId() const { return prev_root_graph_id_; } + std::vector GetStepGraphPtrList() const { return graph_ptr_step_vec_; } + void SetGraphPtr(const KernelGraphPtr &graph_ptr) { graph_ptr_ = graph_ptr; } const KernelGraphPtr GetGraphPtr() const { return graph_ptr_; } @@ -180,6 +192,8 @@ class Debugger : public std::enable_shared_from_this { void UpdateGraphIterMap(uint32_t graph_id, int32_t iter_num); + std::vector GetParametersMindRT() const { return parameters_mindRT_; } + #ifdef ENABLE_D std::shared_ptr LoadDumpDataBuilder(const std::string &node_name); @@ -271,6 +285,8 @@ class Debugger : public std::enable_shared_from_this { void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id); + void LoadSingleParameterMindRT(const AnfNodePtr &anf_node); + // class members std::unique_ptr grpc_client_; @@ -301,6 +317,9 @@ class Debugger : public std::enable_shared_from_this { std::list graph_ptr_list_; // The vector of graph pointers that have been run in the current step. std::vector graph_ptr_step_vec_; + // The vector of all the parameters for the current step for mindRT. + std::vector parameters_mindRT_; + std::vector visited_root_graph_ids_; // map to store iter num in each epoch when dataset_sink_mode is true std::map graph_iter_num_map_; diff --git a/mindspore/ccsrc/debug/debugger/debugger_utils.cc b/mindspore/ccsrc/debug/debugger/debugger_utils.cc index 82a7bc8c66e..a3d1897b209 100644 --- a/mindspore/ccsrc/debug/debugger/debugger_utils.cc +++ b/mindspore/ccsrc/debug/debugger/debugger_utils.cc @@ -159,12 +159,18 @@ bool CheckReadData(const CNodePtr &cnode) { return read_data; } +bool IsDeviceTargetGPU() { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + return context->get_param(MS_CTX_DEVICE_TARGET) == kGPUDevice; +} + /* * Feature group: Dump, Online debugger. - * Target device group: GPU. + * Target device group: Ascend, GPU. * Runtime category: MindRT. * Description: Load inputs and outputs of the given node if needed and dump them if dump is enabled, then it performs - * PostExecuteNode function on the given node. + * PostExecuteNode function on the given node for GPU. */ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, const DeviceContext *device_context) { @@ -194,9 +200,11 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, debugger->ClearCurrentData(); } } - // check if the node is last kernel - bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip"); - debugger->PostExecuteNode(cnode, last_kernel); + if (IsDeviceTargetGPU()) { + // check if the node is last kernel + bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip"); + debugger->PostExecuteNode(cnode, last_kernel); + } } /* @@ -210,10 +218,7 @@ std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) { std::string error_info = ""; bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph(); auto debugger = Debugger::GetInstance(); - auto context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context); - bool is_gpu = (context->get_param(MS_CTX_DEVICE_TARGET) == kGPUDevice); - if (debugger->CheckDebuggerDumpEnabled() && sink_mode && is_gpu) { + if (debugger->CheckDebuggerDumpEnabled() && sink_mode && IsDeviceTargetGPU()) { error_info = "e2e_dump is not supported on GPU with dataset_sink_mode=True. Please set dataset_sink_mode=False"; } if (debugger->CheckDebuggerEnabled() && sink_mode) { @@ -250,17 +255,9 @@ void LoadDataForDebugger(const KernelGraphPtr &graph_ptr) { #endif } -void DumpSetup(const KernelGraphPtr &graph) { - MS_LOG(DEBUG) << "Start!"; - MS_EXCEPTION_IF_NULL(graph); - E2eDump::DumpSetup(graph.get()); - MS_LOG(DEBUG) << "Finish!"; -} - void Dump(const KernelGraphPtr &graph, uint32_t rank_id) { MS_LOG(DEBUG) << "Start!"; MS_EXCEPTION_IF_NULL(graph); - E2eDump::DumpRunIter(graph, rank_id); E2eDump::DumpData(graph.get(), rank_id); MS_LOG(DEBUG) << "Finish!"; } @@ -280,7 +277,6 @@ uint32_t GetRankID() { void SuperKernelE2eDump(const KernelGraphPtr &graph) { #ifndef ENABLE_SECURITY Dump(graph, GetRankID()); - DumpSetup(graph); #endif } diff --git a/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.cc b/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.cc index 6d7b9677f7a..1d469019ce9 100644 --- a/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.cc @@ -294,7 +294,8 @@ void DataPrepareActor::PrepareData(const std::vector> &in void DataPrepareActor::SendDebugReq(OpContext *const context) { ActorDispatcher::Send(*debug_aid_, &DebugActor::DebugOnStepBegin, graph_compiler_info_->graphs_, - graph_compiler_info_->device_contexts_, context, &GetAID()); + graph_compiler_info_->origin_parameters_order_, graph_compiler_info_->device_contexts_, context, + &GetAID()); } void DataPrepareActor::OnDebugFinish(OpContext *const context) { diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc index ce5bc0424d6..7998b2b461a 100644 --- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc @@ -128,7 +128,9 @@ void DebugActor::DebugForGraph(const KernelGraphPtr &graph, const DeviceContext * Runtime category: MindRT. * Description: Checks dataset_sink_mode and generates the related error if any exist and calls PreExecuteGraphDebugger. */ -void DebugActor::DebugOnStepBegin(std::vector graphs, std::vector device_contexts, +void DebugActor::DebugOnStepBegin(const std::vector &graphs, + const std::vector &origin_parameters_order, + std::vector device_contexts, OpContext *const op_context, const AID *from_aid) { MS_EXCEPTION_IF_NULL(op_context); MS_EXCEPTION_IF_NULL(from_aid); @@ -144,7 +146,7 @@ void DebugActor::DebugOnStepBegin(std::vector graphs, std::vecto } auto debugger = Debugger::GetInstance(); if (debugger != nullptr && debugger->DebuggerBackendEnabled()) { - debugger->PreExecuteGraphDebugger(graphs); + debugger->PreExecuteGraphDebugger(graphs, origin_parameters_order); } #endif diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_actor.h b/mindspore/ccsrc/runtime/framework/actor/debug_actor.h index 824455c0314..5477d187967 100644 --- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.h @@ -43,8 +43,10 @@ class DebugActor : public ActorBase { OpContext *const op_context, const AID *from_aid); // The debug on step begin. - void DebugOnStepBegin(std::vector graphs, std::vector device_contexts, - OpContext *const op_context, const AID *from_aid); + void DebugOnStepBegin(const std::vector &graphs, + const std::vector &origin_parameters_order, + std::vector device_contexts, OpContext *const op_context, + const AID *from_aid); // The debug on step end. void DebugOnStepEnd(OpContext *const op_context, const AID *from_aid); diff --git a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc index 97de4555e8d..74308bb9c7f 100644 --- a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc @@ -278,6 +278,16 @@ bool AscendDeviceContext::IsGraphMode() { } void AscendDeviceContext::Destroy() { +#ifdef ENABLE_DEBUGGER + auto debugger = Debugger::GetInstance(); + if (debugger && debugger->debugger_enabled()) { + debugger->SetTrainingDone(true); + bool ret = debugger->SendMetadata(false); + if (!ret) { + MS_LOG(ERROR) << "Failed to SendMetadata when finalize"; + } + } +#endif MS_LOG(INFO) << "Status record: Enter Destroy..."; if (!initialized_) { return; diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc index d87376af1cb..2a11831daeb 100644 --- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc @@ -153,7 +153,10 @@ void GPUDeviceContext::Destroy() { auto debugger = Debugger::GetInstance(); if (debugger && debugger->debugger_enabled()) { debugger->SetTrainingDone(true); - debugger->SendMetadata(false); + bool ret = debugger->SendMetadata(false); + if (!ret) { + MS_LOG(ERROR) << "Failed to SendMetadata when finalize"; + } } #endif diff --git a/tests/st/dump/test_data_dump.py b/tests/st/dump/test_data_dump.py index 354727f69e8..73a2b1546ea 100644 --- a/tests/st/dump/test_data_dump.py +++ b/tests/st/dump/test_data_dump.py @@ -101,7 +101,7 @@ def run_e2e_dump(): add = Net() add(Tensor(x), Tensor(y)) if context.get_context("device_target") == "Ascend": - assert len(os.listdir(dump_file_path)) == 5 + assert len(os.listdir(dump_file_path)) == 3 output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy" elif context.get_context("device_target") == "CPU": assert len(os.listdir(dump_file_path)) == 5 @@ -271,7 +271,7 @@ def test_dump_with_diagnostic_path(): shutil.rmtree(diagnose_path) add = Net() add(Tensor(x), Tensor(y)) - assert len(os.listdir(dump_file_path)) == 5 + assert len(os.listdir(dump_file_path)) == 3 del os.environ['MINDSPORE_DUMP_CONFIG'] del os.environ['MS_DIAGNOSTIC_DATA_PATH']