From f4db1dabe031d48e89a6236b0dc1b42d3b0cd658 Mon Sep 17 00:00:00 2001 From: TinaMengtingZhang Date: Fri, 17 Sep 2021 13:58:29 -0400 Subject: [PATCH] Merge code safety check to master --- .../ccsrc/backend/session/ascend_session.cc | 1 + .../ccsrc/backend/session/session_basic.h | 1 + mindspore/ccsrc/debug/common.cc | 4 +- .../ccsrc/debug/data_dump/dump_json_parser.cc | 2 +- .../ccsrc/debug/data_dump/dump_json_parser.h | 1 - mindspore/ccsrc/debug/debug_services.cc | 26 ++++----- mindspore/ccsrc/debug/debug_services.h | 23 +------- mindspore/ccsrc/debug/debugger/debugger.cc | 56 ++++++++----------- mindspore/ccsrc/debug/debugger/debugger.h | 17 ------ .../ccsrc/debug/debugger/proto_exporter.cc | 5 -- mindspore/ccsrc/debug/tensor_load.h | 6 -- .../runtime/device/gpu/gpu_kernel_runtime.cc | 2 + 12 files changed, 44 insertions(+), 100 deletions(-) diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index f31a1c55fb2..db56d9a0fed 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -1863,6 +1863,7 @@ void AscendSession::LoadGraphsToDbg(NotNull graph, MS_LOG(INFO) << "Start to do LoadGraphsToDbg in graph: " << graph->graph_id(); + MS_EXCEPTION_IF_NULL(debugger_); debugger_->LoadGraphs(graph); MS_LOG(INFO) << "graph_sum_: " << graph_sum_; for (auto &child_graph : graph->child_graph_order()) { diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index 4d13b3d4c81..7c95e541231 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -161,6 +161,7 @@ class SessionBasic : public std::enable_shared_from_this { debugger_ = Debugger::GetInstance(); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); + MS_EXCEPTION_IF_NULL(debugger_); debugger_->Init(device_id_, ms_context->get_param(MS_CTX_DEVICE_TARGET)); } #endif diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc index f42c4676324..267f3095f23 100644 --- a/mindspore/ccsrc/debug/common.cc +++ b/mindspore/ccsrc/debug/common.cc @@ -181,8 +181,8 @@ std::optional Common::GetConfigFile(const std::string &env) { if (env.empty()) { MS_LOG(EXCEPTION) << "Invalid env"; } - auto config_path_str = std::getenv(env.c_str()); - if (config_path_str == nullptr) { + auto config_path_str = common::GetEnv(env); + if (config_path_str.empty()) { MS_LOG(ERROR) << "Please export env:" << env; return std::nullopt; } diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index 56f5ea53257..0aca8e2e007 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -130,7 +130,7 @@ void DumpJsonParser::CopyJsonToDir(uint32_t rank_id) { } auto dump_config_file = Common::GetConfigFile(kMindsporeDumpConfig); if (!dump_config_file.has_value()) { - MS_LOG(EXCEPTION) << "Get dump config file failed"; + MS_LOG(EXCEPTION) << "Get dump config file failed."; } std::ifstream json_file(dump_config_file.value()); if (async_dump_enabled_ || e2e_dump_enabled_) { diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h index 0bbb5e08c19..e403115d6c8 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.h +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.h @@ -52,7 +52,6 @@ class DumpJsonParser { std::string path() const { return path_; } std::string iteration_string() const { return iteration_; } std::string net_name() const { return net_name_; } - uint32_t input_output() const { return input_output_; } uint32_t op_debug_mode() const { return op_debug_mode_; } bool trans_flag() const { return trans_flag_; } uint32_t cur_dump_iter() const { return cur_dump_iter_; } diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc index 70b6b92c2b8..13302cee968 100644 --- a/mindspore/ccsrc/debug/debug_services.cc +++ b/mindspore/ccsrc/debug/debug_services.cc @@ -410,14 +410,18 @@ void DebugServices::CheckWatchpoints(std::vector *const name, std:: std::vector *root_graph_id) { std::lock_guard lg(lock_); auto t1 = std::chrono::high_resolution_clock::now(); - if (watchpoint_table_.empty()) return; + if (watchpoint_table_.empty()) { + return; + } // vector to store execution order of tensors hit std::vector exec_order; std::vector time_stamps; int tensor_list_size = tensor_list->size(); uint64_t tensor_list_byte_size = 0; MS_LOG(INFO) << "tensor list size: " << tensor_list_size; - if (tensor_list_size == 0) return; + if (tensor_list_size == 0) { + return; + } // default value for number of threads const int default_thread_num = 16; int max_thread_num = default_thread_num; @@ -1165,7 +1169,7 @@ void DebugServices::ReadNodesTensors(const std::vector &name, std:: tensor_loader_->SearchTensors(name, &result_list); for (auto result : result_list) { - if (!std::get<1>(result)) { + if (std::get<1>(result) == nullptr) { continue; } ret_name->push_back(std::get<0>(result)); @@ -1205,7 +1209,7 @@ bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr } bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const { - if (kernel && w_name.length() > 0) { + if (kernel != nullptr && w_name.length() > 0) { auto input_size = AnfAlgo::GetInputTensorNum(kernel); for (size_t j = 0; j < input_size; ++j) { auto input_kernel = kernel->input(j + 1); @@ -1221,14 +1225,8 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode } #endif -void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } - std::vector> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } -uint32_t DebugServices::GetTensorLoaderIterNum() const { return tensor_loader_->GetIterNum(); } - -void DebugServices::SetTensorLoaderIterNum(uint32_t iter_num) { tensor_loader_->set_iter_num(iter_num); } - void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); } #ifdef ONLINE_DBG_MODE @@ -1245,10 +1243,6 @@ bool DebugServices::LoadNewTensor(const std::shared_ptr &tensor, boo return tensor_loader_->LoadNewTensor(tensor, keep_prev); } -std::unordered_map DebugServices::GetWatchpointTable() { - return watchpoint_table_; -} - void DebugServices::ResetLoadedTensors() { wp_id_cache_.clear(); MS_LOG(INFO) << "Resetting loaded tensors"; @@ -1268,7 +1262,9 @@ std::vector> DebugServices::GetNodeTensor(const CNod for (size_t j = 0; j < output_size; ++j) { auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j); auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot); - if (tensor) result.push_back(tensor); + if (tensor != nullptr) { + result.push_back(tensor); + } } return result; } diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h index e8262fdc703..03085c1f49e 100644 --- a/mindspore/ccsrc/debug/debug_services.h +++ b/mindspore/ccsrc/debug/debug_services.h @@ -90,7 +90,9 @@ class DebugServices { bool hit; double_t actual_value; void Evaluate(double_t actualValue, std::string inequality_type) { - if (std::isnan(actualValue)) return; + if (std::isnan(actualValue)) { + return; + } actual_value = actualValue; // if cannot extract inequality type from watchpoint @@ -164,17 +166,6 @@ class DebugServices { condition.type == SD_LT || condition.type == MAX_MIN_LT; } - bool min_max_enabled() const { - return condition.type == MAX_LT || condition.type == MAX_GT || condition.type == MIN_LT || - condition.type == MIN_GT || condition.type == MAX_MIN_LT || condition.type == MAX_MIN_GT || - (condition.type == INIT && (!parameter_list[1].disabled || !parameter_list[2].disabled)) || - (condition.type == TOO_LARGE && (!parameter_list[1].disabled || !parameter_list[2].disabled)) || - (condition.type == TOO_SMALL && (!parameter_list[1].disabled || !parameter_list[2].disabled)); - } - // inf or nan related condition set - bool inf_nan_enabled() const { - return condition.type == HAS_INF || condition.type == HAS_NAN || condition.type == GENERAL_OVERFLOW; - } // mean or sd related condition set bool mean_sd_enabled() const { return condition.type == MEAN_LT || condition.type == MEAN_GT || condition.type == SD_LT || @@ -185,7 +176,6 @@ class DebugServices { return (condition.type == TOO_LARGE && !parameter_list[0].disabled) || (condition.type == TOO_SMALL && !parameter_list[0].disabled); } - bool zero_percentage_enabled() const { return condition.type == ALL_ZERO || condition.type == INIT; } bool tensor_update_ratio_mean_enabled() const { return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL; @@ -372,16 +362,11 @@ class DebugServices { bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; #endif - void EmptyTensor(); std::vector> GetTensor() const; void AddAnalyzedTensorToCache(const bool recheck, const unsigned int id, const std::string &tensor_name); - uint32_t GetTensorLoaderIterNum() const; - - void SetTensorLoaderIterNum(uint32_t iter_num); - void EmptyCurrentTensor(); #ifdef ONLINE_DBG_MODE @@ -392,8 +377,6 @@ class DebugServices { bool LoadNewTensor(const std::shared_ptr &tensor, bool keep_prev); - std::unordered_map GetWatchpointTable(); - void ResetLoadedTensors(); #ifdef ONLINE_DBG_MODE std::vector> GetNodeTensor(const CNodePtr &kernel); diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index 1c2cd228364..df7109f44f5 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -291,6 +291,7 @@ void Debugger::PreExecuteGraphDebugger(const std::vector &graphs } } void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) { + MS_EXCEPTION_IF_NULL(graph_ptr); // access lock for public method std::lock_guard a_lock(access_lock_); CheckDatasetSinkMode(); @@ -379,7 +380,7 @@ uint32_t Debugger::GetRankID() { } void Debugger::Dump(const KernelGraphPtr &kernel_graph) const { uint32_t rank_id = GetRankID(); - if (debugger_->DebuggerBackendEnabled()) { + if (debugger_ && debugger_->DebuggerBackendEnabled()) { MS_EXCEPTION_IF_NULL(kernel_graph); (void)E2eDump::DumpParametersAndConstData(kernel_graph.get(), rank_id, debugger_.get()); } else { @@ -388,7 +389,7 @@ void Debugger::Dump(const KernelGraphPtr &kernel_graph) const { } void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id) { - if (debugger_->DebuggerBackendEnabled()) { + if (debugger_ && debugger_->DebuggerBackendEnabled()) { uint32_t rank_id = GetRankID(); (void)E2eDump::DumpSingleNodeData(node, graph_id, rank_id, debugger_.get()); } @@ -429,8 +430,10 @@ void Debugger::PostExecuteGraphDebugger() { return; } // LoadParametersAndConst for all the graphs - for (auto graph : graph_ptr_list_) { - debugger_->LoadParametersAndConst(graph); + if (debugger_) { + for (auto graph : graph_ptr_list_) { + debugger_->LoadParametersAndConst(graph); + } } // debug used for dump if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) { @@ -453,7 +456,7 @@ void Debugger::PostExecute() { if (pipeline::GraphExecutorPy::GetDebugTerminate()) { return; } - if (debugger_->DebuggerBackendEnabled()) { + if (debugger_ && debugger_->DebuggerBackendEnabled()) { // analyze tensor data and send the watchpoints been hit if (debugger_enabled_ && !is_dataset_graph_) { if (device_target_ != kGPUDevice) { @@ -516,17 +519,8 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) { } } -void Debugger::PostDebugOp() { - // access lock for public method - std::lock_guard a_lock(access_lock_); - // suspend if debugger is enabled - if (debugger_enabled_ && !is_dataset_graph_) { - MS_LOG(INFO) << "Debugger suspend at debug_op"; - CommandLoop(); - } -} - void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) { + MS_EXCEPTION_IF_NULL(graph_ptr); if (graph_ptr_ != graph_ptr) { MS_LOG(INFO) << "LoadGraphs Debugger got new graph: " << graph_ptr->graph_id(); // save new graph_ptr @@ -547,6 +541,7 @@ void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) { // In single graph cases, check single graph ptr void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) { + MS_EXCEPTION_IF_NULL(graph_ptr); if (graph_ptr_ != graph_ptr) { MS_LOG(INFO) << "CheckGraphPtr Debugger got new graph: " << graph_ptr->graph_id(); // save new graph_ptr @@ -566,6 +561,7 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) { void Debugger::CheckDatasetGraph() { // print parameter node names + MS_EXCEPTION_IF_NULL(graph_ptr_); const auto ¶ms = graph_ptr_->inputs(); for (const auto ¶m : params) { MS_LOG(INFO) << "param: " << GetKernelNodeName(param); @@ -602,6 +598,7 @@ void Debugger::SendHeartbeat(int32_t period) { SetEnableHeartbeat(CheckDebuggerEnabled()); while (enable_heartbeat_) { + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply reply = grpc_client_->SendHeartbeat(heartbeat); if (reply.status() != reply.OK) { @@ -624,6 +621,7 @@ void Debugger::SendHeartbeat(int32_t period) { void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { if (SendMetadata(true)) { // send graph to Mindinsight server + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply reply = grpc_client_->SendGraph(graph_proto); if (reply.status() != reply.OK) { MS_LOG(ERROR) << "Error: SendGraph failed"; @@ -635,6 +633,7 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { bool Debugger::SendMetadata(bool version_check) { // prepare metadata + MS_EXCEPTION_IF_NULL(graph_ptr_); std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); Metadata metadata; metadata.set_device_name(device_name); @@ -647,6 +646,7 @@ bool Debugger::SendMetadata(bool version_check) { // set graph munber to not_dataset_graph_sum_ metadata.set_graph_num(not_dataset_graph_sum_); + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply reply_metadata = grpc_client_->SendMetadata(metadata); bool ret = false; @@ -681,6 +681,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list &graph_prot if (!SendMetadata(true)) { return; } + MS_EXCEPTION_IF_NULL(grpc_client_); // send multiple graphs to mindinght server // split graph into chunks if one graph is larger than chunk size std::list chunked_graph_proto_list; @@ -716,6 +717,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list &graph_prot void Debugger::CommandLoop() { // prepare metadata + MS_EXCEPTION_IF_NULL(graph_ptr_); std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id()); Metadata metadata; @@ -732,6 +734,7 @@ void Debugger::CommandLoop() { while (!run) { // wait for command + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply reply = grpc_client_->WaitForCommand(metadata); if (reply.status() != reply.OK) { MS_LOG(ERROR) << "Error: WaitForCommand failed"; @@ -885,6 +888,7 @@ void Debugger::ViewValueLevel(const EventReply &reply) { } MS_LOG(INFO) << "tensor dtype: " << tensor.data_type(); } + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply send_tensors_reply = grpc_client_->SendTensors(tensors); if (send_tensors_reply.status() != debugger::EventReply::OK) { MS_LOG(ERROR) << "Error: SendTensors failed"; @@ -1127,6 +1131,7 @@ std::list Debugger::CheckWatchpoints(const std::string &watchnode void Debugger::SendWatchpoints(const std::list &points) { // send info about watchpoint if (!points.empty()) { + MS_EXCEPTION_IF_NULL(grpc_client_); EventReply reply = grpc_client_->SendWatchpointHits(points); if (reply.status() != reply.OK) { MS_LOG(ERROR) << "Error: SendWatchpointHits failed"; @@ -1141,16 +1146,6 @@ bool Debugger::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, device_type, addr_format, slot); } -bool Debugger::DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const { - return debug_services_.get()->IsWatchPoint(kernel_name, kernel); -} - -void Debugger::EmptyTensor() { debug_services_.get()->EmptyTensor(); } - -void Debugger::SetTensorLoaderIterNum(uint32_t iter_num) { debug_services_.get()->SetTensorLoaderIterNum(iter_num); } - -uint32_t Debugger::GetTensorLoaderIterNum() const { return debug_services_.get()->GetTensorLoaderIterNum(); } - bool Debugger::LoadNewTensor(const std::shared_ptr &tensor, bool keep_prev) { return debug_services_.get()->LoadNewTensor(tensor, keep_prev); } @@ -1273,14 +1268,6 @@ void Debugger::SetCurNode(const std::string &cur_name) { std::string Debugger::run_level() const { return run_level_; } -void Debugger::SetStepNum(int32_t cur_num_step) { - // access lock for public method - std::lock_guard a_lock(access_lock_); - num_step_ = cur_num_step; -} - -int32_t Debugger::step_num() const { return num_step_; } - void Debugger::SetTrainingDone(bool training_done) { training_done_ = training_done; } bool Debugger::CheckPort(const std::string &port) const { @@ -1377,6 +1364,7 @@ void Debugger::LoadParametersAndConst() { void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) { if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(graph_ptr_); // load parameters MS_LOG(INFO) << "Start to load Parameters for graph " << graph->graph_id() << "."; const auto ¶meters = graph_ptr_->inputs(); @@ -1432,6 +1420,8 @@ void Debugger::LoadGraphOutputs() { } void Debugger::UpdateStepNum(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(debugger_); // update step number if we are processing the first graph (to support multigraph) if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()) && (graph->graph_id() == debugger_->GetFirstRunGraphId())) { diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index c4bec014895..7e5bf3efe71 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -102,21 +102,10 @@ class Debugger : public std::enable_shared_from_this { void PostExecuteNode(const CNodePtr &kernel, bool last_kernel); - // suspend the execution after a debug_op - void PostDebugOp(); - bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, const std::string &host_fmt, const std::vector &host_shape, TypeId host_type, TypeId device_type, const std::string &addr_format, size_t slot) const; - bool DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const; - - void EmptyTensor(); - - void SetTensorLoaderIterNum(uint32_t iter_num); - - uint32_t GetTensorLoaderIterNum() const; - bool LoadNewTensor(const std::shared_ptr &tensor, bool keep_prev); bool debugger_enabled() const; @@ -129,10 +118,6 @@ class Debugger : public std::enable_shared_from_this { std::string run_level() const; - void SetStepNum(int32_t cur_num_step); - - int32_t step_num() const; - // check if any feature that uses the debugger backend is enabled bool DebuggerBackendEnabled() const; @@ -291,8 +276,6 @@ class Debugger : public std::enable_shared_from_this { using DebuggerPtr = std::shared_ptr; // get debugger ModelProto -std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph); - ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph); // for getting proto DataType from Type of Tensor diff --git a/mindspore/ccsrc/debug/debugger/proto_exporter.cc b/mindspore/ccsrc/debug/debugger/proto_exporter.cc index ca90c4cdeb5..a708cdac13c 100644 --- a/mindspore/ccsrc/debug/debugger/proto_exporter.cc +++ b/mindspore/ccsrc/debug/debugger/proto_exporter.cc @@ -506,11 +506,6 @@ void DebuggerProtoExporter::ExportValueNodes(const std::map void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(debugger::IR_VERSION); } -std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph) { - DebuggerProtoExporter exporter; - return exporter.GetFuncGraphProtoString(func_graph); -} - debugger::ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph) { DebuggerProtoExporter exporter; return exporter.GetFuncGraphProto(func_graph); diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h index 4b825ae471d..99d914a934c 100644 --- a/mindspore/ccsrc/debug/tensor_load.h +++ b/mindspore/ccsrc/debug/tensor_load.h @@ -120,10 +120,6 @@ class TensorLoader { return nullptr; } - uint32_t GetIterNum() const { return iter_num_; } - - std::map> GetTensorMap() { return tensor_list_map_; } - std::shared_ptr GetPrevTensor(const std::string &tensor_name) { if (tensor_list_map_.find(tensor_name + ":prev") != tensor_list_map_.end()) { return tensor_list_map_[tensor_name + ":prev"]; @@ -152,8 +148,6 @@ class TensorLoader { void EmptyCurrentTensor() { tensor_list_map_.clear(); } - void set_iter_num(uint32_t iter_num) { this->iter_num_ = iter_num; } - bool EnableMemoryControl() { return mem_total_ > 0; } void AppendToCacheEvictQueue(const std::string &tensor_name) { diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 2a80eca2fb5..7d17fcfb22c 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -793,6 +793,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo gpu_kernel->PostExecute(); } #ifdef ENABLE_DEBUGGER + MS_EXCEPTION_IF_NULL(debugger_); // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, dump_enabled, kernel == last_kernel); @@ -803,6 +804,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo if (!UpdateMemorySwapTask(kernel, mock, profiling)) { #ifdef ENABLE_DEBUGGER if (!mock) { + MS_EXCEPTION_IF_NULL(debugger_); // invalidate current data collected by the debugger debugger_->ClearCurrentData(); }