From 0683b7fd7578e527be2fd6652c4c68548cfb13e6 Mon Sep 17 00:00:00 2001 From: caifubi Date: Fri, 17 Sep 2021 14:22:37 +0800 Subject: [PATCH] clean codex --- .../kernel_compiler/tbe/tbe_kernel_build.cc | 10 +- .../ccsrc/backend/session/ascend_session.cc | 35 ++-- .../ccsrc/backend/session/ascend_session.h | 2 +- .../ccsrc/backend/session/cpu_session.cc | 6 +- .../ccsrc/backend/session/gpu_session.cc | 10 +- .../ccsrc/debug/data_dump/dump_json_parser.cc | 6 +- .../ccsrc/debug/data_dump/dump_json_parser.h | 2 +- .../device/ascend/ascend_kernel_runtime.cc | 105 +++++------ .../device/ascend/ascend_kernel_runtime.h | 25 ++- .../device/ascend/ascend_memory_manager.cc | 4 +- .../device/ascend/ascend_memory_manager.h | 2 +- .../ascend/executor/aicpu_ext_info_handle.cc | 8 +- .../ascend/executor/aicpu_ext_info_handle.h | 3 +- .../ascend/profiling/profiling_manager.cc | 2 +- .../ascend/profiling/profiling_manager.h | 2 +- .../ascend/profiling/profiling_utils.cc | 2 +- .../runtime/device/cpu/cpu_kernel_runtime.cc | 14 +- .../runtime/device/cpu/cpu_kernel_runtime.h | 6 +- .../runtime/device/gpu/gpu_kernel_runtime.cc | 23 ++- .../runtime/device/gpu/gpu_kernel_runtime.h | 8 +- .../ccsrc/runtime/device/kernel_runtime.cc | 164 ++++++++---------- .../ccsrc/runtime/device/kernel_runtime.h | 53 +++--- .../ccsrc/runtime/device/memory_manager.cc | 15 +- .../ccsrc/runtime/device/memory_manager.h | 2 +- mindspore/core/abstract/prim_arrays.cc | 3 +- mindspore/core/abstract/prim_others.cc | 2 +- .../ops/_grad_experimental/grad_inner_ops.py | 1 + 27 files changed, 241 insertions(+), 274 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc index 944b756e818..bb3508341b6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc @@ -812,8 +812,7 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no return format; } -void GetInputSizeList(const nlohmann::json &input_json, std::vector *input_size_list, - const AnfNodePtr &anf_node) { +void GetInputSizeList(const nlohmann::json &input_json, std::vector *input_size_list) { for (size_t i = 0; i < input_json.size(); i++) { for (size_t m = 0; m < input_json[i].size(); m++) { size_t size_i = 1; @@ -840,8 +839,7 @@ void GetInputSizeList(const nlohmann::json &input_json, std::vector *inp } } -void GetOutputSizeList(const nlohmann::json &output_json, std::vector *output_size_list, - const AnfNodePtr &anf_node) { +void GetOutputSizeList(const nlohmann::json &output_json, std::vector *output_size_list) { for (size_t i = 0; i < output_json.size(); i++) { for (size_t m = 0; m < output_json[i].size(); m++) { size_t size_i = 1; @@ -878,8 +876,8 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vectorclear(); output_size_list->clear(); - GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list, anf_node); - GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list, anf_node); + GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list); + GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list); return true; } diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index f31a1c55fb2..fc4f7d32cde 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -733,8 +733,8 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) { if (!enable_mem_scheduler) { auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->AssignStaticMemoryInput(child_graph.get()); - runtime_instance->AssignStaticMemoryValueNode(child_graph.get()); + runtime_instance->AssignStaticMemoryInput(*child_graph); + runtime_instance->AssignStaticMemoryValueNode(*child_graph); } } @@ -822,7 +822,7 @@ void AscendSession::BindAddressToTensor( } } -void AscendSession::LaunchFunc(const KernelGraphPtr &graph, const std::vector &tensors_mask, +void AscendSession::LaunchFunc(const KernelGraphPtr &graph, const std::map &tensor_to_node, bool is_dynamic_shape, const std::vector &input_tensors) { // Wait for AllReduce @@ -887,7 +887,7 @@ void AscendSession::PrepareForOutputTensor(const KernelGraphPtr &graph, // Create DeviceAddress For Output Tensor(contain: Shape, Format, DType) auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime(); runtime_instance->RunOpMallocPre(*graph, input_tensors); - runtime_instance->UpdateRefNodeOutputMem(graph.get()); + runtime_instance->UpdateRefNodeOutputMem(*graph); // CREATE OUTPUT TENSOR ADDRESS UpdateOutputs(graph, outputs, input_tensors, tensor_to_node); } @@ -951,7 +951,7 @@ void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_inf auto &task_manager = PynativeTaskManager::GetInstance(); if (!cache_miss && task_manager.QueueEmpty()) { // Cache match and there are no task in Queue. Just Launch immediately. - LaunchFunc(graph, tensors_mask, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors); + LaunchFunc(graph, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors); } else { auto run_op_context = std::make_shared(graph_info, op_run_info->is_dynamic_shape, graph, tensors_mask, *input_tensors, tensor_to_node); @@ -1320,7 +1320,7 @@ void AscendSession::BuildDynamicKernel(const std::shared_ptr &kerne } auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - if (!runtime_instance->GenDynamicKernel(kernel_graph.get())) { + if (!runtime_instance->GenDynamicKernel(*kernel_graph)) { MS_LOG(DEBUG) << "Graph:" << kernel_graph->graph_id() << " failed to generate dynamic kernel!"; } MS_LOG(DEBUG) << "Finish!"; @@ -1460,7 +1460,7 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const { InitMemReuseExecOrder(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->AssignMemory(kernel_graph); + runtime_instance->AssignMemory(*kernel_graph); MS_LOG(INFO) << "Finish!"; } @@ -1469,7 +1469,7 @@ void AscendSession::RunOpMemoryAlloc(const std::vector &input MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); + runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph); } void AscendSession::RunOpMemoryAllocNew(const std::vector &input_tensors, @@ -1478,21 +1478,21 @@ void AscendSession::RunOpMemoryAllocNew(const std::vector &in MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph, tensor_to_node); + runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph, tensor_to_node); } void AscendSession::RunOpGenKernelEvent(const KernelGraph *graph) const { MS_EXCEPTION_IF_NULL(graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->GenKernelEvents(graph); + runtime_instance->GenKernelEvents(*graph); } void AscendSession::RunOpMemoryClear(const KernelGraph *kernel_graph) const { MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->RunOpClearMemory(kernel_graph); + runtime_instance->RunOpClearMemory(*kernel_graph); } void AscendSession::Load(const std::shared_ptr &kernel_graph) const { @@ -1503,7 +1503,7 @@ void AscendSession::Load(const std::shared_ptr &kernel_graph) const (void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - bool ret_ok = runtime_instance->Load(kernel_graph.get(), is_task_sink); + bool ret_ok = runtime_instance->Load(*kernel_graph, is_task_sink); if (!ret_ok) { MS_LOG(EXCEPTION) << "Load task error!"; } @@ -1525,7 +1525,7 @@ void AscendSession::Execute(const std::shared_ptr &kernel_graph, bo DumpSetup(kernel_graph); #endif } - bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink); + bool ret_ok = runtime_instance->Run(*kernel_graph, is_task_sink); if (is_task && is_task_sink) { #ifndef ENABLE_SECURITY Dump(kernel_graph); @@ -1599,7 +1599,7 @@ void AscendSession::LoadTensor(const std::shared_ptr &kernel_graph) MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - (void)runtime_instance->LoadData(kernel_graph.get()); + (void)runtime_instance->LoadData(*kernel_graph); MS_LOG(INFO) << "Finish!"; } @@ -1884,8 +1884,8 @@ void AscendSession::AssignStaticMemory(NotNull graph, auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); runtime_instance->ClearGlobalIdleMem(); - runtime_instance->AssignStaticMemoryInput(graph.get().get()); - runtime_instance->AssignStaticMemoryValueNode(graph.get().get()); + runtime_instance->AssignStaticMemoryInput(*graph.get()); + runtime_instance->AssignStaticMemoryValueNode(*graph.get()); for (auto &child_graph : graph->child_graph_order()) { AssignStaticMemory(NOT_NULL(child_graph.lock()), memo); } @@ -1977,8 +1977,7 @@ void AscendSession::ExecuteAllTaskInQueue() { while (!launch_tasks.empty()) { auto &launch_task = launch_tasks.front(); const auto &context = launch_task->context(); - LaunchFunc(context->graph(), context->tensor_mask(), context->tensor_to_node(), context->is_dynamic_shape(), - context->input_tensors()); + LaunchFunc(context->graph(), context->tensor_to_node(), context->is_dynamic_shape(), context->input_tensors()); launch_tasks.pop(); } diff --git a/mindspore/ccsrc/backend/session/ascend_session.h b/mindspore/ccsrc/backend/session/ascend_session.h index db7237f08da..c3914d7c0ea 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.h +++ b/mindspore/ccsrc/backend/session/ascend_session.h @@ -153,7 +153,7 @@ class AscendSession : public SessionBasic { VectorRef *outputs) const; std::shared_ptr CreateBucket(uint32_t bucket_id, uint32_t bucket_size) override; - void LaunchFunc(const KernelGraphPtr &graph, const std::vector &tensors_mask, + void LaunchFunc(const KernelGraphPtr &graph, const std::map &tensor_to_node, bool is_dynamic_shape, const std::vector &input_tensors); KernelGraphPtr CreateKernelGraph(const GraphInfo &graph_info, OpRunInfo *op_run_info, diff --git a/mindspore/ccsrc/backend/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc index 88e5df0b4b2..b686afb16dd 100644 --- a/mindspore/ccsrc/backend/session/cpu_session.cc +++ b/mindspore/ccsrc/backend/session/cpu_session.cc @@ -209,7 +209,7 @@ void CPUSession::PostExecuteGraph(const std::shared_ptr &kernel_gra } void CPUSession::ExecuteGraph(const std::shared_ptr &kernel_graph) { - bool ret = runtime_.Run(kernel_graph.get(), false); + bool ret = runtime_.Run(*kernel_graph, false); if (!ret) { MS_LOG(EXCEPTION) << "Run graph failed"; } @@ -291,7 +291,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node); runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs); - bool ret = runtime_.Run(kernel_graph.get(), false); + bool ret = runtime_.Run(*kernel_graph, false); if (!ret) { MS_LOG(EXCEPTION) << "Run Op failed"; } @@ -301,7 +301,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, UpdateOutputAbstract(kernel_graph, op_run_info); } SetOutputFlags(*outputs); - runtime_.RunOpClearMemory(kernel_graph.get()); + runtime_.RunOpClearMemory(*kernel_graph); } void CPUSession::SetKernelInfo(const KernelGraph *kernel_graph) { diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index f5f757d94e6..3cc6094abe2 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -248,7 +248,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const { MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->AssignMemory(kernel_graph); + runtime_instance->AssignMemory(*kernel_graph); } void GPUSession::RunOpAllocateMemory(const std::vector &input_tensors, @@ -256,21 +256,21 @@ void GPUSession::RunOpAllocateMemory(const std::vector &input MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); + runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph); } void GPUSession::RunOpGenKernelEvent(const KernelGraph *graph) const { MS_EXCEPTION_IF_NULL(graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->GenKernelEvents(graph); + runtime_instance->GenKernelEvents(*graph); } void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const { MS_EXCEPTION_IF_NULL(kernel_graph); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - runtime_instance->RunOpClearMemory(kernel_graph); + runtime_instance->RunOpClearMemory(*kernel_graph); } namespace { @@ -626,7 +626,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs, void GPUSession::Execute(const std::shared_ptr &kernel_graph) const { auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); - if (!runtime_instance->Run(kernel_graph.get(), false)) { + if (!runtime_instance->Run(*kernel_graph, false)) { MS_LOG(EXCEPTION) << "GPU execute graph failed!"; } } diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index 56f5ea53257..a50065d8033 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -99,7 +99,7 @@ void DumpJsonParser::Parse() { std::ifstream json_file(dump_config_file.value()); if (!json_file.is_open()) { MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed." - << " Errno:" << errno << " ErrInfo:" << strerror(errno); + << " Errno:" << errno; } nlohmann::json j; @@ -586,13 +586,13 @@ bool DumpJsonParser::OutputNeedDump() const { return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly; } -void DumpJsonParser::UpdateNeedDumpKernels(NotNull kernel_graph) { +void DumpJsonParser::UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph) { if (!async_dump_enabled_) { return; } MS_LOG(INFO) << "Update async dump kernel list for hccl"; std::map update_kernels; - for (const auto &kernel : kernel_graph->execution_order()) { + for (const auto &kernel : kernel_graph.execution_order()) { MS_EXCEPTION_IF_NULL(kernel); if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL && DumpJsonParser::GetInstance().NeedDump(GetKernelNodeName(kernel))) { diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h index 0bbb5e08c19..00d65127176 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h @@ -61,7 +61,7 @@ class DumpJsonParser { bool InputNeedDump() const; bool OutputNeedDump() const; std::string GetOpOverflowBinPath(uint32_t graph_id) const; - void UpdateNeedDumpKernels(NotNull kernel_graph); + void UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph); void ClearGraph() { graphs_.clear(); } void SaveGraph(session::KernelGraph *graph) { (void)graphs_.emplace_back(graph); } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 085d4e1f767..92a971b5327 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include "utils/signal_util.h" #include "runtime/device/ascend/ascend_device_address.h" #include "runtime/device/ascend/distribute/ascend_collective.h" @@ -372,8 +373,7 @@ bool AscendKernelRuntime::Init() { return true; } -bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +bool AscendKernelRuntime::LoadData(const session::KernelGraph &graph) { #ifdef ENABLE_DEBUGGER MS_LOG(INFO) << "Start load step"; for (const auto &graph_ptr : debugger_->GetGraphPtrList()) { @@ -412,7 +412,7 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size return std::make_shared(device_ptr, device_size, format, type_id, node_index); } -bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { +bool AscendKernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) { if (!is_task_sink) { MS_LOG(INFO) << "Graph mode with not task sink"; GenKernelEvents(graph); @@ -428,10 +428,9 @@ bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } -bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph &graph) { MS_LOG(INFO) << "GenDynamicKernel start"; - auto cnode_list = graph->execution_order(); + auto cnode_list = graph.execution_order(); std::vector dynamic_kernels; for (const auto &cnode : cnode_list) { MS_EXCEPTION_IF_NULL(cnode); @@ -445,15 +444,14 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) { dynamic_kernel->Initialize(); dynamic_kernels.emplace_back(dynamic_kernel); } - graph_dynamic_kernel_map_[graph->graph_id()] = std::move(dynamic_kernels); + graph_dynamic_kernel_map_[graph.graph_id()] = std::move(dynamic_kernels); MS_LOG(INFO) << "GenDynamicKernel end"; return true; } -bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +bool AscendKernelRuntime::GenTask(const session::KernelGraph &graph) { SetCurrentContext(); - if (graph->is_dynamic_shape()) { + if (graph.is_dynamic_shape()) { if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE && (ConfigManager::GetInstance().iter_num() > 1)) { MS_LOG(EXCEPTION) << "Dynamic shape is not supported with dataset_sink_mode."; } @@ -465,9 +463,9 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel"; return GenDynamicKernel(graph); } - MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id(); + MS_LOG(INFO) << "GenTask start. GraphId:" << graph.graph_id(); #ifndef ENABLE_SECURITY - DumpJsonParser::GetInstance().UpdateNeedDumpKernels(NOT_NULL(graph)); + DumpJsonParser::GetInstance().UpdateNeedDumpKernels(graph); #endif #ifdef MEM_REUSE_DEBUG if (!EnvConfigParser::GetInstance().GetSysMemreuse()) { @@ -476,19 +474,19 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { } #endif vector> task_info_list; - auto anf_node_list = graph->execution_order(); + auto anf_node_list = graph.execution_order(); auto task_generator = TaskGenerator(); - if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph->graph_id())) { + if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph.graph_id())) { return false; } // Store the task_info_list - auto insert_ret = task_map_.insert(std::make_pair(graph->graph_id(), task_info_list)); + auto insert_ret = task_map_.insert(std::make_pair(graph.graph_id(), task_info_list)); if (!insert_ret.second) { MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session."; } // Graph may have no compute node, such TensorAddGrad. if (task_info_list.empty()) { - MS_LOG(WARNING) << "Graph " << graph->graph_id() << " have no compute node"; + MS_LOG(WARNING) << "Graph " << graph.graph_id() << " have no compute node"; return true; } AscendStreamAssign &assign_instance = AscendStreamAssign::GetInstance(); @@ -500,13 +498,13 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { assign_instance.GetHcomStreams(&force_copy_stream_list); MS_LOG(INFO) << "Call DavinciModel total stream num:" << resource_manager.get_cur_stream_num() << ", total event num:" << resource_manager.get_cur_event_num() - << ", total label num:" << graph->label_num() + << ", total label num:" << graph.label_num() << ", wait_active_stream_list size:" << wait_active_stream_list.size() << ", force_copy_stream_list size:" << force_copy_stream_list.size(); auto model = std::make_shared( task_info_list, wait_active_stream_list, force_copy_stream_list, 0, 0, 0, 0, 0, 0, - resource_manager.get_cur_stream_num(), graph->label_num(), resource_manager.get_cur_event_num(), 0); - auto ret = graph_model_map_.insert(std::make_pair(graph->graph_id(), model)); + resource_manager.get_cur_stream_num(), graph.label_num(), resource_manager.get_cur_event_num(), 0); + auto ret = graph_model_map_.insert(std::make_pair(graph.graph_id(), model)); if (!ret.second) { MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session."; } @@ -514,23 +512,22 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { return true; } -bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +bool AscendKernelRuntime::LoadTask(const session::KernelGraph &graph) { SetCurrentContext(); - if (graph->is_dynamic_shape()) { + if (graph.is_dynamic_shape()) { MS_LOG(INFO) << "Dynamic Shape Graph Skip Load Task Step"; return true; } - MS_LOG(INFO) << "LoadTask start. GraphId:" << graph->graph_id(); + MS_LOG(INFO) << "LoadTask start. GraphId:" << graph.graph_id(); if (GraphWithEmptyTaskList(graph)) { MS_LOG(WARNING) << "LoadTask end, task list is empty"; return true; } - auto model_iter = graph_model_map_.find(graph->graph_id()); + auto model_iter = graph_model_map_.find(graph.graph_id()); if (model_iter == graph_model_map_.end()) { - MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph LoadTask without GenTask."; + MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph LoadTask without GenTask."; return false; } @@ -540,7 +537,7 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { #ifndef ENABLE_SECURITY std::function model_handle = std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first); - DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle)); + DistributeDebugTask(graph, NOT_NULL(model_handle)); #endif try { @@ -556,9 +553,9 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { if (ProfilingManager::GetInstance().IsProfiling()) { auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); - ProfilingUtils::ReportProfilingData(task_ids, stream_ids, *graph); + ProfilingUtils::ReportProfilingData(task_ids, stream_ids, graph); } - LaunchDataDump(graph->graph_id()); + LaunchDataDump(graph.graph_id()); #endif ModelRunner::Instance().LoadModelComplete(model_iter->first); @@ -566,18 +563,18 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { } #ifndef ENABLE_SECURITY -void AscendKernelRuntime::DistributeDebugTask(NotNull graph, +void AscendKernelRuntime::DistributeDebugTask(const session::KernelGraph &graph, const NotNull> &model_handle) { if (!DumpJsonParser::GetInstance().async_dump_enabled()) { return; } MS_LOG(INFO) << "Start Distribute Debug Task"; - auto data_dumper = std::make_shared(graph.get(), model_handle); + auto data_dumper = std::make_shared(&graph, model_handle); MS_EXCEPTION_IF_NULL(data_dumper); - auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); + auto ret = graph_data_dumper_.try_emplace(graph.graph_id(), data_dumper); data_dumper->OpDebugRegister(); if (!ret.second) { - MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; + MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph.graph_id() << " data dumper failed"; } } @@ -671,8 +668,7 @@ std::string AscendKernelRuntime::GetDumpPath() { } #ifndef ENABLE_SECURITY -void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph &graph) { const std::string path = GetDumpPath(); if (access(path.c_str(), F_OK) == 0) { if (!DeleteDumpDir(path)) { @@ -697,10 +693,9 @@ void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *grap } #endif -bool AscendKernelRuntime::Run(session::KernelGraph *const graph, bool is_task_sink) { +bool AscendKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) { const uint64_t kUSecondInSecond = 1000000; SignalGuard sg(IntHandler); - MS_EXCEPTION_IF_NULL(graph); bool ret = false; if (is_task_sink) { @@ -784,10 +779,9 @@ void AscendKernelRuntime::SetKernelModStream(const std::vector &kernel [](const std::pair &item) { return item.second; }); } -void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - auto &kernels = graph->execution_order(); - if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) { +void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph &graph) { + auto &kernels = graph.execution_order(); + if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) { return; } std::vector last_stream_nodes; @@ -840,7 +834,7 @@ void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) { } } ProcessBoundaryEvent(kernels, &kernel_post_run_events, last_stream_nodes); - graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events); + graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events); } void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector &kernels, @@ -882,12 +876,11 @@ void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector &kern } } -bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph->graph_id(); - auto iter = graph_dynamic_kernel_map_.find(graph->graph_id()); +bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph &graph) { + MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph.graph_id(); + auto iter = graph_dynamic_kernel_map_.find(graph.graph_id()); if (iter == graph_dynamic_kernel_map_.end()) { - MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Not Found! Please generator executor first"; + MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Not Found! Please generator executor first"; return false; } @@ -919,16 +912,15 @@ bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *grap return true; } -bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { - current_graph_ = graph; +bool AscendKernelRuntime::RunTask(const session::KernelGraph &graph) { + current_graph_ = &graph; SetCurrentContext(); - MS_EXCEPTION_IF_NULL(graph); - if (graph->is_dynamic_shape()) { + if (graph.is_dynamic_shape()) { MS_LOG(INFO) << "Dynamic Shape Graph Run Task Async"; return RunDynamicKernelAsync(graph); } - MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id(); + MS_LOG(INFO) << "RunTask start. GraphId:" << graph.graph_id(); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -937,13 +929,13 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { return true; } - if (!CheckGraphIdValid(graph->graph_id())) { - MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph RunTask without GenTask."; + if (!CheckGraphIdValid(graph.graph_id())) { + MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph RunTask without GenTask."; return false; } try { - ModelRunner::Instance().RunModel(graph->graph_id()); + ModelRunner::Instance().RunModel(graph.graph_id()); } catch (const std::exception &) { #ifndef ENABLE_SECURITY DumpTaskExceptionInfo(graph); @@ -1139,9 +1131,8 @@ bool AscendKernelRuntime::DestroyHccl() { return true; } -bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph *graph) const { - MS_EXCEPTION_IF_NULL(graph); - auto iter = task_map_.find(graph->graph_id()); +bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph &graph) const { + auto iter = task_map_.find(graph.graph_id()); if (iter == task_map_.end()) { MS_LOG(EXCEPTION) << "Unknown graph ptr"; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h index 36a0643945b..27449652191 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h @@ -41,19 +41,19 @@ class AscendKernelRuntime : public KernelRuntime { AscendKernelRuntime() = default; ~AscendKernelRuntime() override; bool Init() override; - bool LoadData(session::KernelGraph *graph) override; - bool GenTask(const session::KernelGraph *graph); - void GenKernelEvents(const session::KernelGraph *graph) override; + bool LoadData(const session::KernelGraph &graph) override; + bool GenTask(const session::KernelGraph &graph); + void GenKernelEvents(const session::KernelGraph &graph) override; void SetKernelModStream(const std::vector &kernels, std::vector *last_stream_nodes); void ProcessBoundaryEvent(const std::vector &kernels, std::vector>> *kernel_run_events, const std::vector &last_stream_nodes); - bool GenDynamicKernel(const session::KernelGraph *graph) override; - bool RunDynamicKernelAsync(const session::KernelGraph *graph) override; - bool LoadTask(const session::KernelGraph *graph); - bool RunTask(const session::KernelGraph *graph); - bool Load(session::KernelGraph *graph, bool is_task_sink) override; - bool Run(session::KernelGraph *graph, bool is_task_sink) override; + bool GenDynamicKernel(const session::KernelGraph &graph) override; + bool RunDynamicKernelAsync(const session::KernelGraph &graph) override; + bool LoadTask(const session::KernelGraph &graph); + bool RunTask(const session::KernelGraph &graph); + bool Load(const session::KernelGraph &graph, bool is_task_sink) override; + bool Run(const session::KernelGraph &graph, bool is_task_sink) override; void ClearGraphRuntimeResource(uint32_t graph_id) override; void ClearGlobalIdleMem() override; bool SyncStream() override; @@ -91,18 +91,17 @@ class AscendKernelRuntime : public KernelRuntime { void ClearGraphModelMap(); void ReleaseDeviceRes() override; - bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const; + bool GraphWithEmptyTaskList(const session::KernelGraph &graph) const; bool CheckGraphIdValid(GraphId graph_id) const; #ifndef ENABLE_SECURITY - void DistributeDebugTask(NotNull graph, - const NotNull> &model_handle); + void DistributeDebugTask(const session::KernelGraph &graph, const NotNull> &model_handle); void LaunchDataDump(GraphId graph_id); void ReportProfilingData(); #endif static CNodePtr GetErrorNodeName(uint32_t streamid, uint32_t taskid); static std::string GetDumpPath(); #ifndef ENABLE_SECURITY - static void DumpTaskExceptionInfo(const session::KernelGraph *graph); + static void DumpTaskExceptionInfo(const session::KernelGraph &graph); #endif static void TaskFailCallback(rtExceptionInfo *task_fail_info); static bool DeleteDumpDir(const std::string &path); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc index e29de84ac47..a47ff3197e2 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc @@ -197,11 +197,11 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m } } -void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) { +void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) { MemoryManager::MallocSomasDynamicMem(graph); #ifndef ENABLE_SECURITY if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable()) { - somas_reuse_util_ptr_->ConvertToProfilingNode(graph->graph_id()); + somas_reuse_util_ptr_->ConvertToProfilingNode(graph.graph_id()); } #endif } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h index 8aa14bbdfaa..6d460fd16c7 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h @@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager { void *MallocMemFromMemPool(size_t size) override; void FreeMemFromMemPool(void *device_ptr) override; uint64_t GetDeviceMemSize(); - void MallocSomasDynamicMem(const session::KernelGraph *graph) override; + void MallocSomasDynamicMem(const session::KernelGraph &graph) override; uint8_t *MallocCommunicationMemFromMemPool(size_t size) override; std::vector MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list) override { return AscendMemoryPool::GetInstance().AllocContinuousTensorMem(total_size, size_list); diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc b/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc index 2d5b3c8861d..b7f11141b27 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc @@ -142,10 +142,9 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No } auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); - auto data_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); std::vector tmp_shape; std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong); - return UpdateShapeAndType(tmp_shape, data_type, NOT_NULL(input_shape_and_type_[input_index])); + return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index])); } bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const NotNull &anf_node) { @@ -171,8 +170,7 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const std::vector tmp_shape; std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong); - return UpdateShapeAndType(tmp_shape, AnfAlgo::GetOutputDeviceDataType(anf_node, output_index), - NOT_NULL(output_shape_and_type_[output_index])); + return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index])); } bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull *> shape, @@ -182,7 +180,7 @@ bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull &shape, TypeId data_type, +bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector &shape, NotNull shape_and_type) { if (shape.empty() || shape.size() > kernel::kMaxShapeDims) { MS_LOG(ERROR) << "Invalid shape:" << shape.size(); diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.h b/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.h index 641d1d3f9c7..766324a45bd 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.h +++ b/mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.h @@ -65,8 +65,7 @@ class AicpuExtInfoHandler { bool ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); bool ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); - static bool UpdateShapeAndType(const std::vector &shape, TypeId data_type, - NotNull shape_and_type); + static bool UpdateShapeAndType(const std::vector &shape, NotNull shape_and_type); static void GetShapeAndType(NotNull shape_and_type, NotNull *> shape, NotNull data_type); diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc index 345546695ab..ca7b445128c 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc @@ -227,7 +227,7 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t len) { return RT_ERROR_NONE; } -bool ProfilingManager::StopProfiling() { +bool ProfilingManager::StopProfiling() const { MS_LOG(INFO) << "StopProfiling"; if (!IsProfiling()) { MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode."; diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h index 90bd04adf84..864fbdcabbe 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h @@ -49,7 +49,7 @@ class ProfilingManager { bool ReportProfilingData(const map &op_taskId_map) const; bool ProfRegisterCtrlCallback() const; bool StartupProfiling(uint32_t device_id); - bool StopProfiling(); + bool StopProfiling() const; inline bool IsProfiling() const { auto profiler_manager = profiler::ProfilerManager::GetInstance(); diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc index 0cb2cfabc18..f38033c6d23 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_utils.cc @@ -210,7 +210,7 @@ void ProfilingUtils::GetTraceBpEnd(const session::KernelGraph &kernel_graph, con if (bp_end_str.empty()) { trace_info->trace_bp_end = trace_info->trace_iter_end; } else { - trace_info->trace_bp_end.insert(bp_end_str); + (void)trace_info->trace_bp_end.insert(bp_end_str); } } diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index 6d314182f80..0dafb0c30de 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -72,7 +72,8 @@ void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { if (is_enable_mem_reuse) { MS_EXCEPTION_IF_NULL(mem_manager_); mem_manager_->ResetDynamicMemory(); - AssignDynamicMemory(kernel_graph); + MS_EXCEPTION_IF_NULL(kernel_graph); + AssignDynamicMemory(*kernel_graph); #ifdef MEM_REUSE_DEBUG // Get normal graph ir for memreuse mindspore::memreuse::MemReuseChecker::GetInstance().CheckNormalIR(kernel_graph); @@ -405,16 +406,15 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput static_cast(mem_manager_.get())->DecreaseSummaryRefCount(summary_outputs); } -bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) { - MS_EXCEPTION_IF_NULL(kernel_graph); - static_cast(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); +bool CPUKernelRuntime::Run(const session::KernelGraph &kernel_graph, bool) { + static_cast(mem_manager_.get())->IncreaseAddressRefCount(&kernel_graph); - auto kernels = kernel_graph->execution_order(); + auto kernels = kernel_graph.execution_order(); #ifndef ENABLE_SECURITY auto &dump_json_parser = DumpJsonParser::GetInstance(); bool iter_dump_flag = dump_json_parser.GetIterDumpFlag(); - uint32_t graph_id = kernel_graph->graph_id(); + uint32_t graph_id = kernel_graph.graph_id(); #endif #ifdef ENABLE_DUMP_IR std::string name = "mem_address_list"; @@ -490,7 +490,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) { } #ifndef ENABLE_SECURITY if (iter_dump_flag) { - CPUE2eDump::DumpParametersAndConst(kernel_graph, graph_id); + CPUE2eDump::DumpParametersAndConst(&kernel_graph, graph_id); } if (graph_id == 0) { dump_json_parser.UpdateDumpIter(); diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h index 26f9610b4e2..f7ea815420c 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h @@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime { ~CPUKernelRuntime() override = default; bool Init(); - bool Run(session::KernelGraph *graph, bool is_task_sink) override; + bool Run(const session::KernelGraph &graph, bool is_task_sink) override; void AssignKernelAddress(session::KernelGraph *kernel_graph); void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector &inputs, VectorRef *outputs, std::map *tensor_to_node); @@ -44,8 +44,8 @@ class CPUKernelRuntime : public KernelRuntime { VectorRef *outputs); void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); - bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; } - bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; } + bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; } + bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; } DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kCPU; }; protected: diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 2a80eca2fb5..9a92267015c 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -431,7 +431,7 @@ void GPUKernelRuntime::FetchMemUnitSize(const session::KernelGraph *graph) { } } -void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { +void GPUKernelRuntime::AssignMemory(const session::KernelGraph &graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(mem_manager_); @@ -441,18 +441,17 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { bool is_enable_dynamic_mem = context_ptr->get_param(MS_CTX_ENABLE_DYNAMIC_MEM_POOL); if (is_enable_dynamic_mem) { // Use the dynamic memory pool. - InitKernelRefCount(graph); - InitMemorySwapInfo(graph); - InitKernelOutputAddress(graph); - InitKernelWorkspaceAddress(graph); - SaveGraphOutputNode(graph); + InitKernelRefCount(&graph); + InitMemorySwapInfo(&graph); + InitKernelOutputAddress(&graph); + InitKernelWorkspaceAddress(&graph); + SaveGraphOutputNode(&graph); } else { AssignDynamicMemory(graph); } } -bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) { - MS_EXCEPTION_IF_NULL(graph); +bool GPUKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) { struct timeval start_time, end_time; (void)gettimeofday(&start_time, nullptr); bool ret = true; @@ -462,7 +461,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) { bool is_enable_pynative_infer = context_ptr->get_param(MS_CTX_ENABLE_PYNATIVE_INFER); bool is_pynative_mode = (context_ptr->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode); if (is_enable_dynamic_mem && !is_pynative_mode && !is_enable_pynative_infer) { - auto graph_id = graph->graph_id(); + auto graph_id = graph.graph_id(); auto iter = mem_swap_map_.find(graph_id); if (iter == mem_swap_map_.end()) { MS_LOG(EXCEPTION) << "Find memory swap map failed."; @@ -476,11 +475,11 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) { mem_reuse_util_ = mem_reuse_iter->second; MS_EXCEPTION_IF_NULL(mem_reuse_util_); - ret = RunOneStep(graph); + ret = RunOneStep(&graph); } else { - if (graph->is_dynamic_shape()) { + if (graph.is_dynamic_shape()) { // run dynamic shape graph in pynative - ret = RunOpLaunchKernelDynamic(graph); + ret = RunOpLaunchKernelDynamic(&graph); } else { ret = LaunchKernels(graph); } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h index 5f3401cea03..08baac8981b 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h @@ -43,10 +43,10 @@ class GPUKernelRuntime : public KernelRuntime { bool Init() override; void ReleaseDeviceRes() override; void ClearGraphRuntimeResource(uint32_t graph_id) override; - void AssignMemory(session::KernelGraph *graph) override; - bool Run(session::KernelGraph *graph, bool is_task_sink) override; - bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; } - bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; } + void AssignMemory(const session::KernelGraph &graph) override; + bool Run(const session::KernelGraph &graph, bool is_task_sink) override; + bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; } + bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; } DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kGPU; } std::shared_ptr CreateDeviceEvent() override; void *compute_stream() const override { return stream_; } diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 1ffd669db10..1530a6c1be9 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -46,12 +46,11 @@ constexpr float kMaxMemReuseFactor = 0.8; constexpr float kMinMemReuseFactor = 0.5; constexpr float kRetryFactor = 0.1; namespace { -std::vector GetGraphInputs(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - auto graph_inputs = graph->inputs(); +std::vector GetGraphInputs(const session::KernelGraph &graph) { + auto graph_inputs = graph.inputs(); std::vector result(graph_inputs.begin(), graph_inputs.end()); std::set inputs_set(graph_inputs.begin(), graph_inputs.end()); - auto kernels = graph->execution_order(); + auto kernels = graph.execution_order(); for (auto &kernel : kernels) { MS_EXCEPTION_IF_NULL(kernel); auto input_num = AnfAlgo::GetInputTensorNum(kernel); @@ -71,9 +70,9 @@ std::vector GetGraphInputs(const session::KernelGraph *graph) { constexpr size_t kMinInputSize = 2; KernelRuntime::~KernelRuntime() {} -bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } +bool KernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) { return true; } -bool KernelRuntime::LoadData(session::KernelGraph *) { return false; } +bool KernelRuntime::LoadData(const session::KernelGraph &) { return false; } bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { MS_EXCEPTION_IF_NULL(kernel); @@ -85,7 +84,7 @@ bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_ return false; } -void KernelRuntime::AssignMemory(session::KernelGraph *graph) { +void KernelRuntime::AssignMemory(const session::KernelGraph &graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); auto enable_mem_scheduler = context_ptr->get_param(MS_CTX_ENABLE_MEM_SCHEDULER); @@ -262,9 +261,8 @@ void KernelRuntime::RunOpMallocPre(const session::KernelGraph &graph, } } -void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) { - MS_EXCEPTION_IF_NULL(kernel_graph); - auto kernels = kernel_graph->execution_order(); +void KernelRuntime::ResetNodeAddress(const session::KernelGraph &kernel_graph) { + auto kernels = kernel_graph.execution_order(); for (auto &kernel : kernels) { auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); @@ -303,39 +301,38 @@ void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) { } } -void KernelRuntime::RunOpAssignMemory(const std::vector &input_tensors, session::KernelGraph *graph, +void KernelRuntime::RunOpAssignMemory(const std::vector &input_tensors, + const session::KernelGraph &graph, const std::map &tensor_to_node) { - MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(mem_manager_); mem_manager_->ResetDynamicMemory(); - for (const auto &node : graph->execution_order()) { + for (const auto &node : graph.execution_order()) { RunOpAssignCommunicationOutput(node); RunOpAssignCommunicationInput(node); } RunOpAssignInputMemory(input_tensors, graph); AssignStaticMemoryValueNode(graph); - for (const auto &node : graph->execution_order()) { + for (const auto &node : graph.execution_order()) { RunOpAssignOutputMemory(node, tensor_to_node); RunOpAssignWorkSpaceMemory(node); } UpdateRefNodeOutputMem(graph); } -void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) const { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::RunOpClearMemory(const session::KernelGraph &graph) const { // clear input parameter memory resource - for (const auto &input_node : graph->inputs()) { + for (const auto &input_node : graph.inputs()) { MS_EXCEPTION_IF_NULL(input_node); AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get()); } // clear input value node memory resource - for (const auto &value_node : graph->graph_value_nodes()) { + for (const auto &value_node : graph.graph_value_nodes()) { MS_EXCEPTION_IF_NULL(value_node); AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get()); } - for (const auto &cnode : graph->execution_order()) { + for (const auto &cnode : graph.execution_order()) { MS_EXCEPTION_IF_NULL(cnode); // clear output memory resource size_t output_num = AnfAlgo::GetOutputTensorNum(cnode); @@ -372,23 +369,22 @@ bool KernelRuntime::DumpDataEnabledIteration() { } #endif -void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { +void KernelRuntime::AssignStaticMemory(const session::KernelGraph &graph) { AssignStaticMemoryInput(graph); AssignStaticMemoryValueNode(graph); AssignStaticMemoryOutput(graph); } void KernelRuntime::RunOpAssignInputMemory(const std::vector &input_tensors, - const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); + const session::KernelGraph &graph) { MS_EXCEPTION_IF_NULL(mem_manager_); - if (input_tensors.size() != graph->inputs().size()) { + if (input_tensors.size() != graph.inputs().size()) { MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors.size() - << " should be equal to graph input parameter size " << graph->inputs().size(); + << " should be equal to graph input parameter size " << graph.inputs().size(); } - for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) { - auto item = graph->inputs()[input_index]; + for (size_t input_index = 0; input_index < graph.inputs().size(); ++input_index) { + auto item = graph.inputs()[input_index]; MS_EXCEPTION_IF_NULL(item); if (!item->isa()) { continue; @@ -400,7 +396,9 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto output_address = std::dynamic_pointer_cast(current_tensor->device_address()); if (output_address != nullptr && output_address->DeviceType() == GetTargetDeviceAddressType()) { if (output_address->ptr_ == nullptr) { - mem_manager_->MallocMemFromMemPool(output_address, output_address->size()); + if (!mem_manager_->MallocMemFromMemPool(output_address, output_address->size())) { + MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << output_address->size(); + } } AnfAlgo::SetOutputAddr(output_address, index, item.get()); @@ -448,7 +446,9 @@ void KernelRuntime::RunOpAssignOutputMemory( MS_EXCEPTION_IF_NULL(address); if (address->ptr() == nullptr) { MS_EXCEPTION_IF_NULL(mem_manager_); - mem_manager_->MallocMemFromMemPool(address, address->size()); + if (!mem_manager_->MallocMemFromMemPool(address, address->size())) { + MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << address->size(); + } } continue; } @@ -489,14 +489,13 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { } } -void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph) { +void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph) { if (pre_output_value == nullptr) { return; } std::vector pre_output_tensors; TensorValueToTensor(pre_output_value, &pre_output_tensors); - MS_EXCEPTION_IF_NULL(graph); - auto output_nodes = graph->outputs(); + auto output_nodes = graph.outputs(); if (pre_output_tensors.size() != output_nodes.size()) { MS_LOG(EXCEPTION) << "The size of pre output tensors [" << pre_output_tensors.size() << "] is not equal to the size of output nodes of graph [" << output_nodes.size() << "]"; @@ -536,13 +535,12 @@ void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value } } -void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) { MS_EXCEPTION_IF_NULL(mem_manager_); - MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph->graph_id(); + MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph.graph_id(); auto graph_inputs = GetGraphInputs(graph); - auto graph_valid_input = graph->valid_inputs(); - graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end()); + auto graph_valid_input = graph.valid_inputs(); + graph_inputs.insert(graph_inputs.end(), graph.child_graph_result().begin(), graph.child_graph_result().end()); std::vector need_alloc_nodes; auto add_need_alloc_nodes = [&need_alloc_nodes, graph, this](const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); @@ -553,7 +551,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { return; } auto input_param = node->cast(); - if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph->graph_id())) { + if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph.graph_id())) { return; } need_alloc_nodes.push_back(node); @@ -611,7 +609,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id, {item, index}); MS_LOG(INFO) << "Assign Static Memory for Input node, size:" << tensor_size << " node:" << item->fullname_with_scope() << " index: " << index; - if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) { + if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph.graph_id()) == nullptr) { MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; } AnfAlgo::SetOutputAddr(device_address, index, item.get()); @@ -620,10 +618,9 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { MS_LOG(INFO) << "AssignStaticMemoryInput end"; } -void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph->graph_id(); - auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem}); +void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph &graph) { + MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph.graph_id(); + auto nodes = AnfAlgo::GetAllOutput(graph.output(), {prim::kPrimTupleGetItem}); std::vector non_communication_op; // Assign Communicate Op Memory firstly. for (const auto &node : nodes) { @@ -647,9 +644,8 @@ void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph) MS_LOG(INFO) << "AssignStaticMemoryOutput end"; } -void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - auto &kernels = graph->execution_order(); +void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph &graph) { + auto &kernels = graph.execution_order(); for (auto &kernel : kernels) { MS_EXCEPTION_IF_NULL(kernel); auto output_num = AnfAlgo::GetOutputTensorNum(kernel); @@ -659,8 +655,8 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) { } for (size_t i = 0; i < output_num; ++i) { session::AnfWithOutIndex out_pair(kernel, i); - if (graph->IsInRefOutputMap(out_pair)) { - auto origin_pair = graph->GetRefCorrespondOutput(out_pair); + if (graph.IsInRefOutputMap(out_pair)) { + auto origin_pair = graph.GetRefCorrespondOutput(out_pair); MS_EXCEPTION_IF_NULL(origin_pair.first); auto origin_node_output_addr = AnfAlgo::GetMutableOutputAddr(origin_pair.first, origin_pair.second); MS_EXCEPTION_IF_NULL(origin_node_output_addr); @@ -682,10 +678,9 @@ void KernelRuntime::AssignCommunicationNodeMem(MemType type, const AnfNodePtr &n AssignWorkSpaceMem(type, node); } -void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); - auto &kernels = graph->execution_order(); - if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) { +void KernelRuntime::GenKernelEvents(const session::KernelGraph &graph) { + auto &kernels = graph.execution_order(); + if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) { return; } auto kernel_events = @@ -736,7 +731,7 @@ void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) { kernel_post_run_events[i].emplace_back([post_event]() { post_event->WaitEvent(); }); } } - graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events); + graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events); } void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node) { @@ -989,15 +984,14 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const } } -void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::AssignStaticMemoryValueNode(const session::KernelGraph &graph) { MS_EXCEPTION_IF_NULL(mem_manager_); - MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph->graph_id(); + MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph.graph_id(); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); // order the value nodes std::map value_nodes_map; - for (auto &node : graph->graph_value_nodes()) { + for (auto &node : graph.graph_value_nodes()) { MS_EXCEPTION_IF_NULL(node); value_nodes_map[node->fullname_with_scope()] = node; } @@ -1007,22 +1001,18 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(value_node); if (NodeOutputDeviceAddressExist(value_node, 0)) { MS_LOG(DEBUG) << "value_node[" << value_node->DebugString() << "] address already exist"; - - // TODO(jojo): PyNaitve Infer ? auto device_address = AnfAlgo::GetMutableOutputAddr(value_node, 0); if (device_address->ptr_ == nullptr) { if (ms_context->get_param(MS_CTX_ENABLE_PYNATIVE_INFER)) { if (!mem_manager_->MallocMemFromMemPool(device_address, device_address->size_)) { MS_LOG(EXCEPTION) << "MallocMemFromMemPool failed"; } - } else { - if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph->graph_id())) { + if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph.graph_id())) { MS_LOG(EXCEPTION) << "MallocMem kStaticMem failed"; } } } - continue; } auto &node_value = value_node->value(); @@ -1042,7 +1032,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { } else { MS_LOG(INFO) << "Assign Static Memory for Value node, size:" << tensor_size << " node:" << value_node->fullname_with_scope(); - if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) { + if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph.graph_id()) == nullptr) { MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size; } @@ -1057,8 +1047,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { MS_LOG(DEBUG) << "AssignStaticMemoryValueNode end"; } -void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::AssignDynamicMemory(const session::KernelGraph &graph) { MS_EXCEPTION_IF_NULL(mem_manager_); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -1078,7 +1067,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { } else { MS_LOG(INFO) << "Memory Reuse is disable..."; } - auto &execution_nodes = graph->execution_order(); + auto &execution_nodes = graph.execution_order(); std::vector compute_nodes; // communication nodes first for (auto &node : execution_nodes) { @@ -1338,17 +1327,16 @@ void KernelRuntime::AssignKernelAddress(const std::shared_ptr &mem } void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr &mem_scheduler, - const session::KernelGraph *graph, const AnfNodePtr &kernel, bool mock) { - MS_EXCEPTION_IF_NULL(graph); + const session::KernelGraph &graph, const AnfNodePtr &kernel, bool mock) { MS_EXCEPTION_IF_NULL(mem_scheduler); MS_EXCEPTION_IF_NULL(kernel); auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); for (size_t j = 0; j < kernel_mod->GetOutputSizeList().size(); ++j) { - auto tensor = graph->GetNodeOutputTensor(std::make_pair(kernel, j)); + auto tensor = graph.GetNodeOutputTensor(std::make_pair(kernel, j)); auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, j, true); if (mock) { - if (graph->IsInternalOutput(kernel, j) && device_address != nullptr) { + if (graph.IsInternalOutput(kernel, j) && device_address != nullptr) { mem_scheduler->SetMemPriority(device_address.get(), kMemPriorityHigh); } continue; @@ -1377,11 +1365,10 @@ void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr &m } void KernelRuntime::InitGraphInputTensors(const std::shared_ptr &mem_scheduler, - const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); + const session::KernelGraph &graph) { MS_EXCEPTION_IF_NULL(mem_scheduler); - auto &input_nodes = graph->input_nodes(); - auto &input_tensors = graph->input_tensors(); + auto &input_nodes = graph.input_nodes(); + auto &input_tensors = graph.input_tensors(); if (input_tensors.size() != input_nodes.size()) { MS_LOG_EXCEPTION << "Invalid input tensor size:" << input_tensors.size() << " vs node size:" << input_nodes.size(); } @@ -1407,9 +1394,8 @@ void KernelRuntime::InitGraphInputTensors(const std::shared_ptr &m } } -bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel, +bool KernelRuntime::LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel, const std::shared_ptr &mem_scheduler, bool mock) { - MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(kernel); auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); @@ -1456,21 +1442,21 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNod return ret; } -bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock) { +bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph, bool mock) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); std::shared_ptr mem_scheduler = nullptr; auto enable_mem_scheduler = context_ptr->get_param(MS_CTX_ENABLE_MEM_SCHEDULER); if (enable_mem_scheduler) { - mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id()); + mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id()); MS_EXCEPTION_IF_NULL(mem_scheduler); mem_scheduler->SetMemHandler(mem_manager_); mem_scheduler->RecordMemUsage(); InitGraphInputTensors(mem_scheduler, graph); } - const auto &kernels = graph->execution_order(); + const auto &kernels = graph.execution_order(); std::vector dynamic_kernel_list; - auto iter = graph_dynamic_kernel_map_.find(graph->graph_id()); + auto iter = graph_dynamic_kernel_map_.find(graph.graph_id()); if (iter != graph_dynamic_kernel_map_.end()) { dynamic_kernel_list = iter->second; } @@ -1480,7 +1466,7 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock } std::vector>> kernel_pre_run_events; std::vector>> kernel_post_run_events; - auto events_iter = graph_kernel_events_map_.find(graph->graph_id()); + auto events_iter = graph_kernel_events_map_.find(graph.graph_id()); if (events_iter != graph_kernel_events_map_.end()) { kernel_pre_run_events = events_iter->second.first; kernel_post_run_events = events_iter->second.second; @@ -1528,13 +1514,12 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock return true; } -void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph &graph) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); auto enable_mem_scheduler = context_ptr->get_param(MS_CTX_ENABLE_MEM_SCHEDULER); if (enable_mem_scheduler) { - auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id()); + auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id()); if (mem_scheduler->need_record_event()) { (void)LaunchKernelMod(graph, true); } @@ -1551,8 +1536,7 @@ void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) { } } -bool KernelRuntime::LaunchKernels(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +bool KernelRuntime::LaunchKernels(const session::KernelGraph &graph) { UseMemSchedulerIfNeeded(graph); if (!LaunchKernelMod(graph)) { MS_LOG(ERROR) << "LaunchKernelMod failed!"; @@ -1574,11 +1558,10 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { } #if ((defined ENABLE_CPU) && (!defined _WIN32)) -void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph *graph, +void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index, size_t *const first_cache_size) { - MS_EXCEPTION_IF_NULL(graph); - for (const auto &kernel : graph->execution_order()) { + for (const auto &kernel : graph.execution_order()) { MS_EXCEPTION_IF_NULL(kernel); auto kernel_name = AnfAlgo::GetCNodeName(kernel); if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) { @@ -1647,13 +1630,12 @@ void KernelRuntime::CheckSparsePSEmbeddingCache(const CNodePtr &node) { } } -void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph) { AnfNodePtr first_cache_input_index = nullptr; size_t first_cache_size = 0; GetFirstPSEmbeddingCache(graph, &first_cache_input_index, &first_cache_size); MS_EXCEPTION_IF_NULL(first_cache_input_index); - for (const auto &kernel : graph->execution_order()) { + for (const auto &kernel : graph.execution_order()) { MS_EXCEPTION_IF_NULL(kernel); auto kernel_name = AnfAlgo::GetCNodeName(kernel); if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) { diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index 5c78f11e757..bdef5a0252d 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -53,25 +53,26 @@ class KernelRuntime { KernelRuntime() = default; virtual ~KernelRuntime(); virtual bool Init() = 0; - virtual void AssignMemory(session::KernelGraph *graph); - void RunOpAssignMemory(const std::vector &input_tensors, session::KernelGraph *graph, + virtual void AssignMemory(const session::KernelGraph &graph); + void RunOpAssignMemory(const std::vector &input_tensors, const session::KernelGraph &graph, const std::map &tensor_to_node = {}); void RunOpAssignCommunicationOutput(const AnfNodePtr &node) const; void RunOpAssignCommunicationInput(const AnfNodePtr &node) const; - void RunOpClearMemory(const session::KernelGraph *graph) const; + void RunOpClearMemory(const session::KernelGraph &graph) const; void RunOpMallocPre(const session::KernelGraph &graph, const std::vector &input_tensors); #ifdef ENABLE_DEBUGGER static bool DumpDataEnabled(); static bool DumpDataEnabledIteration(); #endif - virtual bool LoadData(session::KernelGraph *graph); - virtual bool Load(session::KernelGraph *graph, bool is_task_sink); - virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0; - virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0; - virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0; - bool LaunchKernels(const session::KernelGraph *graph); - virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); - virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph); + virtual bool LoadData(const session::KernelGraph &graph); + virtual bool Load(const session::KernelGraph &graph, bool is_task_sink); + virtual bool Run(const session::KernelGraph &graph, bool is_task_sink) = 0; + virtual bool GenDynamicKernel(const session::KernelGraph &graph) = 0; + virtual bool RunDynamicKernelAsync(const session::KernelGraph &graph) = 0; + bool LaunchKernels(const session::KernelGraph &graph); + virtual void AssignStaticMemoryInput(const session::KernelGraph &graph); + virtual void AssignStaticMemoryValueNode(const session::KernelGraph &graph); + virtual void ClearGraphRuntimeResource(uint32_t graph_id); virtual bool SyncStream() = 0; virtual bool MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) = 0; @@ -107,13 +108,13 @@ class KernelRuntime { virtual void PreInit() {} #endif virtual uint64_t GetAvailableMemMaxSize() const { return 0; } - virtual void GenKernelEvents(const session::KernelGraph *graph); + virtual void GenKernelEvents(const session::KernelGraph &graph); virtual std::shared_ptr CreateDeviceEvent() { return nullptr; } virtual std::shared_ptr CreateDeviceTimeEvent() { return nullptr; } virtual DeviceAddressType GetTargetDeviceAddressType() const = 0; virtual void *compute_stream() const { return nullptr; } virtual void *communication_stream() const { return nullptr; } - void UpdateRefNodeOutputMem(const session::KernelGraph *graph); + void UpdateRefNodeOutputMem(const session::KernelGraph &graph); virtual DeviceAddressPtr AssignExtraStaticMem(const TensorPtr &tensor, const AnfNodePtr &node, size_t index); virtual void *GetModelStream(uint32_t graph_id) const { return nullptr; } @@ -125,8 +126,8 @@ class KernelRuntime { virtual bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index); virtual bool KernelMemNotReuse(const AnfNodePtr &node); - void AssignStaticMemory(session::KernelGraph *graph); - void AssignDynamicMemory(session::KernelGraph *graph); + void AssignStaticMemory(const session::KernelGraph &graph); + void AssignDynamicMemory(const session::KernelGraph &graph); void AssignNodeOutputMem(MemType type, const AnfNodePtr &node, int index); void AssignWorkSpaceMem(MemType type, const AnfNodePtr &node); @@ -141,35 +142,35 @@ class KernelRuntime { virtual void KernelLaunchProfiling(const std::string &kernel_name) {} private: - void UseMemSchedulerIfNeeded(const session::KernelGraph *graph); - bool LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel, + void UseMemSchedulerIfNeeded(const session::KernelGraph &graph); + bool LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel, const std::shared_ptr &mem_scheduler, bool mock = false); - void ResetNodeAddress(session::KernelGraph *graph); + void ResetNodeAddress(const session::KernelGraph &graph); void AssignKernelAddress(const std::shared_ptr &mem_scheduler, const AnfNodePtr &kernel, AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces, AddressPtrList *kernel_outputs); static void GetOrMallocAddress(const std::shared_ptr &mem_scheduler, const DeviceAddress *device_address, const kernel::AddressPtr &kernel_addr); - void InitGraphInputTensors(const std::shared_ptr &mem_scheduler, const session::KernelGraph *graph); - void SyncNodeOutputTensors(const std::shared_ptr &mem_scheduler, const session::KernelGraph *graph, + void InitGraphInputTensors(const std::shared_ptr &mem_scheduler, const session::KernelGraph &graph); + void SyncNodeOutputTensors(const std::shared_ptr &mem_scheduler, const session::KernelGraph &graph, const AnfNodePtr &kernel, bool mock); - void AssignStaticMemoryOutput(const session::KernelGraph *graph); - bool LaunchKernelMod(const session::KernelGraph *graph, bool mock = false); + void AssignStaticMemoryOutput(const session::KernelGraph &graph); + bool LaunchKernelMod(const session::KernelGraph &graph, bool mock = false); void LaunchKernelEvent(const std::vector>> &run_events, size_t index) const; void DebugStreamSync(const CNodePtr &kernel); static void GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList *kernel_inputs, const std::shared_ptr &mem_schedule = nullptr); - void RunOpAssignInputMemory(const std::vector &input_tensors, const session::KernelGraph *graph); + void RunOpAssignInputMemory(const std::vector &input_tensors, const session::KernelGraph &graph); void RunOpAssignOutputMemory(const AnfNodePtr &kernel, const std::map &tensor_to_node = {}); void RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel); - void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph); + void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph); void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx); DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const; #if ((defined ENABLE_CPU) && (!defined _WIN32)) - void GetFirstPSEmbeddingCache(const session::KernelGraph *graph, AnfNodePtr *const first_cache_input_index, + void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index, size_t *const first_cache_size); - void CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph); + void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph); void CheckSparsePSEmbeddingCache(const CNodePtr &node); #endif void RunOpGetCommunicationInputInfo(const AnfNodePtr &node, size_t *total_size, diff --git a/mindspore/ccsrc/runtime/device/memory_manager.cc b/mindspore/ccsrc/runtime/device/memory_manager.cc index 1b51a63f6c8..b83f5f6e01c 100644 --- a/mindspore/ccsrc/runtime/device/memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/memory_manager.cc @@ -35,18 +35,17 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) { return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize; } -void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) { - MS_EXCEPTION_IF_NULL(graph); +void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) { SomasPtr somas_reuse_util_ptr = std::make_shared(); MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr); somas_reuse_util_ptr_ = somas_reuse_util_ptr; - if (!(somas_reuse_util_ptr->Allocate(graph))) { + if (!(somas_reuse_util_ptr->Allocate(&graph))) { MS_LOG(EXCEPTION) << "Somas Allocate Failed."; } size_t total_allocated_size = somas_reuse_util_ptr->GetTotalMemSize(); - MS_LOG(INFO) << "Graph " << graph->graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]"; + MS_LOG(INFO) << "Graph " << graph.graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]"; if (total_allocated_size > 0) { auto base_ptr = MallocDynamicMem(total_allocated_size, false); MS_LOG(INFO) << "Somas Reuse Memory Base Address [" << static_cast(base_ptr) << "], End Address [" @@ -59,18 +58,18 @@ void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) { #ifdef ENABLE_DUMP_IR SubModuleId module = SubModuleId::SM_OPTIMIZER; - std::string name = "somas_allocate_info." + std::to_string(graph->graph_id()); + std::string name = "somas_allocate_info." + std::to_string(graph.graph_id()); (void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasInfo()); - name = "somas_mem_info." + std::to_string(graph->graph_id()); + name = "somas_mem_info." + std::to_string(graph.graph_id()); (void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasMemory()); #endif bool save_graphs = context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG); if (save_graphs) { - std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph->graph_id()) + ".ir"); + std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph.graph_id()) + ".ir"); somas_reuse_util_ptr_->DumpSomasInfoIR(file_path); - std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir"); + std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph.graph_id()) + ".ir"); somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path); } } diff --git a/mindspore/ccsrc/runtime/device/memory_manager.h b/mindspore/ccsrc/runtime/device/memory_manager.h index 4e4dcdf913f..7def55738ad 100644 --- a/mindspore/ccsrc/runtime/device/memory_manager.h +++ b/mindspore/ccsrc/runtime/device/memory_manager.h @@ -44,7 +44,7 @@ class MemoryManager : public MemHandler { } virtual void ClearGlobalIdleMem() {} - virtual void MallocSomasDynamicMem(const session::KernelGraph *graph); + virtual void MallocSomasDynamicMem(const session::KernelGraph &graph); uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size, const DeviceAddressPtr &address, bool comm_mem); uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size); diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc index 9c72ad800f2..498a0aaf513 100644 --- a/mindspore/core/abstract/prim_arrays.cc +++ b/mindspore/core/abstract/prim_arrays.cc @@ -1231,7 +1231,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv AbstractBasePtr InferImplTensorCopySlices(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list) { auto &op_name = primitive->name(); - CheckArgsSize(op_name, args_spec_list, 5); + constexpr auto kTensorCopySlicesInputNum = 5; + CheckArgsSize(op_name, args_spec_list, kTensorCopySlicesInputNum); AbstractTensorPtr input = CheckArg(op_name, args_spec_list, 0); return std::make_shared(input->element(), input->shape()); } diff --git a/mindspore/core/abstract/prim_others.cc b/mindspore/core/abstract/prim_others.cc index dd98f8b9dab..38fe3673c92 100644 --- a/mindspore/core/abstract/prim_others.cc +++ b/mindspore/core/abstract/prim_others.cc @@ -493,7 +493,7 @@ AbstractBasePtr InferImplReduceScatter(const AnalysisEnginePtr &, const Primitiv if (tmp_shape.empty()) { MS_LOG(EXCEPTION) << "shape size is 0"; } - tmp_shape[0] = IntMulWithOverflowCheck(tmp_shape[0], rank_size); + tmp_shape[0] = LongMulWithOverflowCheck(tmp_shape[0], rank_size); return std::make_shared(x->element(), std::make_shared(tmp_shape)); } diff --git a/mindspore/ops/_grad_experimental/grad_inner_ops.py b/mindspore/ops/_grad_experimental/grad_inner_ops.py index be38eefaa61..23c558ffa12 100644 --- a/mindspore/ops/_grad_experimental/grad_inner_ops.py +++ b/mindspore/ops/_grad_experimental/grad_inner_ops.py @@ -20,6 +20,7 @@ from ..operations import _inner_ops as inner from .. import functional as F from ..composite.multitype_ops.zeros_like_impl import zeros_like + @bprop_getters.register(inner.TensorCopySlices) def get_bprop_tensor_copy_slices(self): """Generate bprop for TensorCopySlices"""