diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc index c47febcb46b..47042c0edce 100644 --- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc @@ -279,6 +279,9 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() { } } } + + global_mem_block_list_.clear(); + global_idle_mem_buf_map_.clear(); } void DynamicMemPoolBestFit::DumpDynamicMemPoolInfo() { diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc index 7771d8ee0f4..ec64e63b2fc 100644 --- a/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc +++ b/mindspore/ccsrc/runtime/device/gpu/distribution/nccl_wrapper.cc @@ -32,6 +32,10 @@ ncclUniqueId NCCLWrapper::nccl_unique_id() const { } void NCCLWrapper::InitNCCLComm() { + if (comm_init_done_) { + return; + } + for (auto group : group_info_) { std::string group_name = group.first; NcclGroupInfo group_info = group.second; diff --git a/mindspore/ccsrc/runtime/framework/graph_compiler.cc b/mindspore/ccsrc/runtime/framework/graph_compiler.cc index 04f22a4fe04..394dbec4c07 100644 --- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc @@ -24,6 +24,7 @@ #include "ir/tensor.h" #include "backend/optimizer/common/helper.h" #include "base/base_ref_utils.h" +#include "debug/dump_proto.h" #ifdef ENABLE_DEBUGGER #include "debug/debugger/debugger.h" #endif @@ -297,6 +298,14 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic auto &json_parser = DumpJsonParser::GetInstance(); json_parser.Parse(); + const auto &ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + bool save_graphs = ms_context->get_param(MS_CTX_SAVE_GRAPHS_FLAG); + // Dump .pb graph before graph optimization. + if (save_graphs) { + DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id())); + } + // Execute optimization pass. auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output()); device_context->OptimizeGraph(graph); @@ -308,8 +317,6 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic // 'KernelMod' is real executive object of kernel. device_context->CreateKernel(graph->execution_order()); - const auto &ms_context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(ms_context); if (ms_context->get_param(MS_CTX_EXECUTION_MODE) == kGraphMode) { // Create device address for all anf nodes of graph. CreateDeviceAddress(graph, device_context); @@ -322,6 +329,12 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic session_->SetSummaryNodes(graph.get()); SetSummaryNodesRefCount(graph.get()); + + // Dump .pb graph after graph optimization. + if (save_graphs) { + DumpIRProto(graph, "after_opt_" + std::to_string(graph->graph_id())); + } + #ifdef ENABLE_DEBUGGER auto debugger = Debugger::GetInstance(); debugger->DumpInGraphCompiler(graph); @@ -329,6 +342,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic debugger->LoadGraphs(graph); } #endif + + session_->DumpGraph(graph); return graph->graph_id(); } diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc index b561c112ed0..3c57765d022 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc @@ -1962,22 +1962,10 @@ void GraphScheduler::LinkDeviceTensorStoreForAutoMonadActor(const std::vectoroutput_control_arrows_) { copy_actor->output_control_arrows_.emplace_back(output_contorl); - auto to_actor = FetchActor(output_contorl.Name()); - MS_EXCEPTION_IF_NULL(to_actor); - if (output_contorl.Name().find("_LoopCountActor") != string::npos) { - auto real_to_actor = dynamic_cast(to_actor); - MS_EXCEPTION_IF_NULL(real_to_actor); - real_to_actor->input_controls_num_++; - } else if (output_contorl.Name().find("copy_from") != string::npos) { - auto real_to_actor = dynamic_cast(to_actor); - MS_EXCEPTION_IF_NULL(real_to_actor); - real_to_actor->input_controls_num_++; - } else { - auto real_to_actor = dynamic_cast(to_actor); - MS_EXCEPTION_IF_NULL(real_to_actor); - real_to_actor->input_controls_num_++; - } } + // Move the control arrows from kernel actor to kernel actor users. + kernel_actor->output_control_arrows_.clear(); + // Link from kernel actor to copy actor. kernel_actor->output_control_arrows_.emplace_back(copy_actor->GetAID()); copy_actor->input_controls_num_++;