diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index b5069c4acb7..2ef959fa10d 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -171,7 +171,7 @@ GraphId AscendSession::CompileGraph(NotNull func_graph) { device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get())); // build kernel BuildKernel(root_graph); - if (debugger_) { + if (debugger_ && debugger_->partial_memory()) { debugger_->PreExecute(root_graph); } SetSummaryNodes(root_graph.get()); @@ -248,7 +248,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { BuildKernel(graph); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); - if (debugger_) { + if (debugger_ && debugger_->partial_memory()) { debugger_->PreExecute(graph); } if (ms_context->get_param(MS_CTX_PRECOMPILE_ONLY)) { @@ -312,6 +312,9 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vectorPreExecute(kernel_graph); + } #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) // Initialize parameter server InitPSParamAndOptim(kernel_graph, inputs); diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index d8d04908787..1ce0048b8db 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -278,9 +278,9 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList void GPUSession::RunGraph(const GraphId &graph_id, const std::vector &inputs, VectorRef *outputs) { auto &kernel_graph = graphs_[graph_id]; - PreIterationDbg(kernel_graph); // Load input data from user input LoadInputData(kernel_graph, inputs); + PreIterationDbg(kernel_graph); #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) // Initialize parameter server InitPSParamAndOptim(kernel_graph, inputs); diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index a8fb3e95a98..af49da4f626 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -22,7 +22,6 @@ #include #include #include - #include "backend/session/session_context.h" #include "backend/session/kernel_graph.h" #include "backend/session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index f347c0351fc..4d09df8f84a 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -30,6 +30,7 @@ #include "pipeline/jit/pipeline.h" #include "backend/session/anf_runtime_algorithm.h" #include "runtime/device/kernel_runtime_manager.h" +#include "runtime/device/kernel_runtime.h" using debugger::EventReply; using debugger::GraphProto; @@ -47,6 +48,7 @@ namespace mindspore { DebuggerPtr Debugger::debugger_ = nullptr; std::mutex Debugger::instance_lock_; +static const size_t PRAMATER_OUTPUT_INDEX = 0; Debugger::Debugger() : grpc_client_(nullptr), @@ -62,7 +64,26 @@ Debugger::Debugger() is_dataset_graph_(false), partial_memory_(false), last_overflow_bin_(0), - overflow_bin_path_("") {} + overflow_bin_path_("") { + if (CheckDebuggerEnabled()) { + // configure partial memory reuse + partial_memory_ = CheckDebuggerPartialMemoryEnabled(); + + // switch memory reuse on or off + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + context_ptr->set_param(MS_CTX_ENABLE_MEM_REUSE, partial_memory_); + // print some message about memory reuse to user + if (partial_memory_) { + MS_LOG(WARNING) + << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first " + "step. 2. Tensor values are only available for nodes that are watched by any watchpoint."; + } else { + MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " + "usage for large models."; + } + } +} void Debugger::Init(const uint32_t device_id, const std::string device_target) { // access lock for public method @@ -133,27 +154,6 @@ void Debugger::EnableDebugger() { MS_LOG(INFO) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051"; port = "50051"; } - - // configure partial memory reuse - const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM"); - if (env_partial_mem_str != nullptr) { - MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str; - if (std::strcmp(env_partial_mem_str, "1") == 0) { - partial_memory_ = true; - } - } - // switch memory reuse on or off - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - context_ptr->set_param(MS_CTX_ENABLE_MEM_REUSE, partial_memory_); - // print some message about memory reuse to user - if (partial_memory_) { - MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first " - "step. 2. Tensor values are only available for nodes that are watched by any watchpoint."; - } else { - MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory " - "usage for large models."; - } #ifdef ENABLE_D // set operation overflow info overflow_bin_path_ = DumpJsonParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_); @@ -195,9 +195,7 @@ void Debugger::EnableDebugger() { bool Debugger::CheckDebuggerDumpEnabled() { // see if dump is enabled if (device_target_ == kGPUDevice) { - auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - return runtime_instance->DumpDataEnabled(); + return device::KernelRuntime::DumpDataEnabled(); } return false; } @@ -213,6 +211,17 @@ bool Debugger::CheckDebuggerEnabled() { return false; } +bool Debugger::CheckDebuggerPartialMemoryEnabled() { + const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM"); + if (env_partial_mem_str != nullptr) { + MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str; + if (std::strcmp(env_partial_mem_str, "1") == 0) { + return true; + } + } + return false; +} + bool Debugger::DebuggerBackendEnabled() { return CheckDebuggerDumpEnabled() || CheckDebuggerEnabled(); } void Debugger::Reset() { @@ -324,6 +333,7 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) { // only try to enable debugger if it is not a dataset graph EnableDebugger(); if (debugger_enabled_) { + LoadParameters(); // get graph proto and send to mindinsight SendGraphAndSuspend(GetGraphProto()); } @@ -839,4 +849,34 @@ bool Debugger::CheckPort(const char *port) { return true; } +void Debugger::LoadParameters() { + if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; + if (!(num_step_ == 0 || device_target_ == kAscendDevice || + (device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration()))) + return; + MS_EXCEPTION_IF_NULL(graph_ptr_); + const auto ¶meters = graph_ptr_->inputs(); + // for parameters, set its execution order to be 0; + int exec_order = 0; + for (auto &item : parameters) { + if (!item->isa()) { + continue; + } + std::string parameter_name = item->fullname_with_scope(); + auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); + auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); + auto format = kOpFormat_DEFAULT; + string tensor_name = parameter_name + ':' + "0"; + ShapeVector int_shapes; + auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); + (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), + [](size_t inner_item) { return SizeToInt(inner_item); }); + bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, true); + if (!ret) { + MS_LOG(ERROR) << "LoadMemToHost:" + << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; + } + } +} + } // namespace mindspore diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 1661fa04027..6e4407b0c58 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -103,6 +103,8 @@ class Debugger : public std::enable_shared_from_this { void SendMetadata(); + void LoadParameters(); + private: // private constructor for singleton Debugger(); @@ -118,6 +120,8 @@ class Debugger : public std::enable_shared_from_this { // check if debugger enabled bool CheckDebuggerEnabled(); + bool CheckDebuggerPartialMemoryEnabled(); + // check and save graph pointer void CheckGraphPtr(const KernelGraphPtr &graph_ptr); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index 5f475f0c2ac..d761dfd7033 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -663,39 +663,25 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file } #ifdef ENABLE_DEBUGGER -bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order, +bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, - size_t slot, Debugger *debugger, bool keep_prev) const { + size_t slot, bool keep_prev) const { bool ret = false; - DebugServices *debug_services = debugger->debug_services(); - MS_EXCEPTION_IF_NULL(debug_services); - TensorLoader *tensor_loader = debug_services->tensor_loader(); + TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); MS_EXCEPTION_IF_NULL(tensor_loader); // TensorData is freed up in AscendSession class auto tensor_data = std::make_shared(); tensor_data->SetName(tensor_name); tensor_data->SetExecutionOrder(execution_order); tensor_data->SetSlot(slot); - if (trans_flag) { - MS_LOG(INFO) << "E2E tensor name is " << tensor_name; - mindspore::tensor::TensorPtr out_tensor = std::make_shared(host_type, host_shape); - size_t host_size = out_tensor->data().nbytes(); - ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); - if (!ret) { - MS_LOG(ERROR) << "Copy device mem to host failed"; - return ret; - } - tensor_data->SetTensor(out_tensor); - } else { - mindspore::tensor::TensorPtr out_tensor = std::make_shared(type_id_, host_shape); - size_t host_size = out_tensor->data().nbytes(); - auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST); - if (ret_rt_memcpy != RT_ERROR_NONE) { - MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; - } - MS_LOG(INFO) << "E2E tensor name is " << tensor_name; - tensor_data->SetTensor(out_tensor); + mindspore::tensor::TensorPtr out_tensor = std::make_shared(type_id_, host_shape); + size_t host_size = out_tensor->data().nbytes(); + auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST); + if (ret_rt_memcpy != RT_ERROR_NONE) { + MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; } + MS_LOG(INFO) << "E2E tensor name is " << tensor_name; + tensor_data->SetTensor(out_tensor); ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev); return ret; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h index 10389a0796b..393525c2e3c 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h @@ -45,9 +45,8 @@ class AscendDeviceAddress : public DeviceAddress { bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type) const override; #ifdef ENABLE_DEBUGGER - bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt, - const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, - bool keep_prev) const; + bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, + const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; #endif private: diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index b1213f79676..1946c630aa1 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -254,15 +254,10 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { auto ascend_addr = dynamic_cast(addr); MS_EXCEPTION_IF_NULL(ascend_addr); ShapeVector int_shapes; - if (trans_flag) { - int_shapes = trans::GetRuntimePaddingShape(node, j); - } else { - auto shape = AnfAlgo::GetOutputDeviceShape(node, j); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - } - auto ret = - ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger, false); + auto shape = AnfAlgo::GetOutputDeviceShape(node, j); + (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), + [](size_t inner_item) { return SizeToInt(inner_item); }); + auto ret = ascend_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); if (!ret) { MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; @@ -272,40 +267,6 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { } } -void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) { - MS_EXCEPTION_IF_NULL(graph); - // trans_flag: "true" means tensor values will be transfered to host format, otherwise not. - bool trans_flag = false; - const auto ¶meters = graph->inputs(); - // for parameters, set its execution order to be 0; - int exec_order = 0; - for (auto &item : parameters) { - if (!item->isa()) { - continue; - } - std::string parameter_name = item->fullname_with_scope(); - auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); - auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); - auto format = kOpFormat_DEFAULT; - string tensor_name = parameter_name + ':' + "0"; - auto ascend_addr = dynamic_cast(addr); - MS_EXCEPTION_IF_NULL(ascend_addr); - ShapeVector int_shapes; - if (trans_flag) { - int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX); - } else { - auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - } - auto ret = - ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); - if (!ret) { - MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name - << ", host_format:" << format << ".!"; - } - } -} } // namespace #endif @@ -319,7 +280,7 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debug // load output LoadOutput(graph, debugger); // load parameters - LoadParameters(graph, debugger); + if (debugger) debugger->LoadParameters(); #endif return true; } diff --git a/mindspore/ccsrc/runtime/device/device_address.h b/mindspore/ccsrc/runtime/device/device_address.h index ddae68d3dbd..7d32d11af5a 100644 --- a/mindspore/ccsrc/runtime/device/device_address.h +++ b/mindspore/ccsrc/runtime/device/device_address.h @@ -70,6 +70,12 @@ class DeviceAddress : public mindspore::DeviceSync { const ShapeVector &host_shape, TypeId host_type) const { return true; } +#ifdef ENABLE_DEBUGGER + virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, + const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const { + return true; + } +#endif protected: const void *ptr() const { return ptr_; } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc index 9dba9a8a245..fc3fab30c34 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc @@ -80,14 +80,14 @@ GPUDeviceAddress::~GPUDeviceAddress() { } #ifdef ENABLE_DEBUGGER bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, - const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, + const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const { bool ret = false; if (size_ == 0) { return true; } - DebugServices *debug_services = debugger->debug_services(); - TensorLoader *tensor_loader = debug_services->tensor_loader(); + + TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader(); mindspore::tensor::TensorPtr out_tensor = std::make_shared(type_id_, host_shape); size_t host_size = out_tensor->data().nbytes(); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h index c68108b9de2..a98f67786b8 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h @@ -44,8 +44,7 @@ class GPUDeviceAddress : public DeviceAddress { #ifdef ENABLE_DEBUGGER bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, - const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, - bool keep_prev) const; + const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override; #endif private: DeviceAddressStatus status_{DeviceAddressStatus::kInDevice}; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index c6c766e322a..74a16310971 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -111,7 +111,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); + auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true); if (!ret) { MS_LOG(ERROR) << "LoadMemToHost:" << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; @@ -130,7 +130,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j); (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, debugger, false); + auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); if (!ret) { MS_LOG(ERROR) << "LoadMemToHost:" << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; @@ -148,36 +148,6 @@ void UpdateStepNum(Debugger *debugger, bool dump_enabled) { } } -void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) { - MS_EXCEPTION_IF_NULL(graph); - if (!(debugger && dump_enabled)) { - return; - } - const auto ¶meters = graph->inputs(); - // for parameters, set its execution order to be 0; - int exec_order = 0; - for (auto &item : parameters) { - if (!item->isa()) { - continue; - } - std::string parameter_name = item->fullname_with_scope(); - auto addr = AnfAlgo::GetOutputAddr(item, PARAMETER_OUTPUT_INDEX); - auto type = AnfAlgo::GetOutputInferDataType(item, PARAMETER_OUTPUT_INDEX); - auto format = kOpFormat_DEFAULT; - string tensor_name = parameter_name + ':' + "0"; - auto gpu_addr = dynamic_cast(addr); - ShapeVector int_shapes; - auto shape = AnfAlgo::GetOutputDeviceShape(item, PARAMETER_OUTPUT_INDEX); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, debugger, true); - if (!ret) { - MS_LOG(ERROR) << "LoadMemToHost:" - << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; - } - } -} - void ClearCurrentData(Debugger *debugger, bool dump_enabled) { if (debugger && (debugger->debugger_enabled() || dump_enabled)) { DebugServices *debug_services = debugger->debug_services(); @@ -601,7 +571,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De } if (!mock) { // collect weights and bias for dump mode - LoadParameters(graph, debugger, dump_enabled); + if (debugger) debugger->LoadParameters(); CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); } ClearSwapInfo(mock); diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index 78e8f80a670..69d7764e6f3 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -53,8 +53,8 @@ class KernelRuntime { void RunOpAssignMemory(const ValuePtr &pre_output_value, const std::vector &input_tensors, session::KernelGraph *graph); void RunOpClearMemory(const session::KernelGraph *graph); - bool DumpDataEnabled(); - bool DumpDataEnabledIteration(); + static bool DumpDataEnabled(); + static bool DumpDataEnabledIteration(); virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); virtual bool Load(session::KernelGraph *graph, bool is_task_sink); virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0;