diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index b680b6b782b..02673880397 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -1003,18 +1003,9 @@ void AscendSession::DumpAllGraphs(const std::vector &all_graphs) void AscendSession::LoadTensor(const std::shared_ptr &kernel_graph) const { MS_LOG(INFO) << "Start!"; MS_EXCEPTION_IF_NULL(kernel_graph); -#ifdef ENABLE_DEBUGGER - if (debugger_->DebuggerBackendEnabled()) { - auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - // TensorData will be freed up here - debugger_->EmptyTensor(); - uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); - debugger_->SetTensorLoaderIterNum(++iter_num); - (void)runtime_instance->LoadData(kernel_graph.get()); - debugger_->EmptyPrevTensor(); - } -#endif + auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); + MS_EXCEPTION_IF_NULL(runtime_instance); + (void)runtime_instance->LoadData(kernel_graph.get()); MS_LOG(INFO) << "Finish!"; } diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 994c5c5f6d5..8e20e858376 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -360,7 +360,9 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vectorPreExecute(kernel_graph, graph_sum_); + } #if ENABLE_CPU && ENABLE_GPU // Initialize parameter server InitPSParamAndOptim(kernel_graph, inputs); @@ -372,7 +374,6 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vectorDumpDataEnabledIteration(); } -void GPUSession::PreIterationDbg(const std::shared_ptr &kernel_graph) const { - if (debugger_) { - debugger_->PreExecute(kernel_graph, graph_sum_); - } - PreLoadTensor(kernel_graph); -} - void GPUSession::PostIterationDbg(const std::shared_ptr &kernel_graph) const { bool dump_enabled = DumpDataEnabledIteration(); // debug used for dump @@ -463,30 +457,6 @@ void GPUSession::PostIterationDbg(const std::shared_ptr &kernel_gra } } -void GPUSession::PreLoadTensor(const std::shared_ptr &kernel_graph) const { - bool dump_enabled = DumpDataEnabledIteration(); - if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { - return; - } - MS_EXCEPTION_IF_NULL(kernel_graph); - auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - debugger_->EmptyTensor(); - uint32_t iter_num = debugger_->GetTensorLoaderIterNum(); - debugger_->SetTensorLoaderIterNum(++iter_num); -} - -void GPUSession::PostLoadTensor(const std::shared_ptr &kernel_graph) const { - bool dump_enabled = DumpDataEnabledIteration(); - if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) { - return; - } - MS_EXCEPTION_IF_NULL(kernel_graph); - auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); - MS_EXCEPTION_IF_NULL(runtime_instance); - debugger_->EmptyPrevTensor(); -} - void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr &kernel_graph) const { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); diff --git a/mindspore/ccsrc/backend/session/gpu_session.h b/mindspore/ccsrc/backend/session/gpu_session.h index 024a9b4fb7d..3ff72d58523 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.h +++ b/mindspore/ccsrc/backend/session/gpu_session.h @@ -75,14 +75,8 @@ class GPUSession : public SessionBasic { bool DumpDataEnabledIteration() const; - void PreIterationDbg(const std::shared_ptr &kernel_graph) const; - void PostIterationDbg(const std::shared_ptr &kernel_graph) const; - void PreLoadTensor(const std::shared_ptr &kernel_graph) const; - - void PostLoadTensor(const std::shared_ptr &kernel_graph) const; - void SyncValueNodeDeviceAddr(const std::shared_ptr &kernel_graph) const; void CleanValueNodeDeviceAddr(const std::shared_ptr &kernel_graph) const; diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc index 530afe20a13..2505b48552c 100644 --- a/mindspore/ccsrc/debug/debug_services.cc +++ b/mindspore/ccsrc/debug/debug_services.cc @@ -66,7 +66,7 @@ void DebugServices::CheckWatchpoints(std::vector *name, std::vector std::vector> *parameters, std::vector *error_codes, const std::vector &op_overflows, const std::vector> &tensor_list, - const bool init_dbg_suspend) { + const bool init_dbg_suspend, const bool step_end, const bool recheck) { std::lock_guard lg(lock_); if (watchpoint_table.empty()) return; @@ -75,13 +75,26 @@ void DebugServices::CheckWatchpoints(std::vector *name, std::vector const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':')); const auto tensor_slot = std::to_string(tensor->GetSlot()); mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor(); + // no elements to analyze + if (tensor_ptr->DataSize() == 0) continue; int tensor_dtype = tensor_ptr->data_type_c(); std::vector watchpoints_to_check; std::string qualified_tensor_name; for (auto w_table_item : watchpoint_table) { auto wp = std::get<1>(w_table_item); - if (wp.condition.type == INIT && !init_dbg_suspend) continue; + // check ONLY init conditions on intial suspended state. + // skip other conditions on intial suspended state + // skip init condition on all the other states + if ((wp.condition.type == INIT) ^ init_dbg_suspend) continue; + if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue; + + // check change conditions only on step end. + if (wp.change_condition() && !step_end) continue; + + // if recheck, ignore the cache results and reanalyze everything. + // if not a recheck, check only unanalyzed tensors + if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue; std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); if (!found.empty()) { qualified_tensor_name = found; @@ -174,6 +187,10 @@ void DebugServices::CheckWatchpoints(std::vector *name, std::vector error_code = std::get<1>(item); parameter_list = std::get<2>(item); } + // add analyzed tensor to cache + if (!recheck) { + wp_id_cache[tensor_name].insert(wp.id); + } if (is_hit || error_code) { name->push_back(qualified_tensor_name); @@ -238,28 +255,6 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode } } -void DebugServices::AddWeightsBiasInputs(std::vector> *tensor_list, - const CNodePtr &kernel) { - if (kernel) { - auto input_size = AnfAlgo::GetInputTensorNum(kernel); - for (size_t j = 0; j < input_size; ++j) { - auto input_kernel = kernel->input(j + 1); - std::string input_kernel_name = input_kernel->fullname_with_scope(); - auto found_dot = input_kernel_name.find_last_of('.'); - if (found_dot != std::string::npos && - (input_kernel_name.substr(found_dot + 1) == "weight" || input_kernel_name.substr(found_dot + 1) == "bias")) { - std::string locate_tensor = input_kernel_name + ":0"; - std::map> tensor_map = tensor_loader_->GetTensorMap(); - std::map>::iterator iter; - iter = tensor_map.find(locate_tensor); - if (iter != tensor_map.end()) { - tensor_list->push_back(iter->second); - } - } - } - } -} - void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } std::vector> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); } @@ -292,4 +287,32 @@ std::unordered_map DebugServices::Get return watchpoint_table; } +void DebugServices::ResetLoadedTensors() { + wp_id_cache.clear(); + MS_LOG(INFO) << "Resetting loaded tensors"; + tensor_loader_->MoveParametersCurrentToPrev(); + tensor_loader_->EmptyCurrentTensor(); + // will move parameters from previous to current map + tensor_loader_->SwapCurrentPrev(); +} + +std::vector> DebugServices::GetNodeTensor(const CNodePtr &kernel) { + MS_EXCEPTION_IF_NULL(kernel); + std::vector> result; + auto output_size = AnfAlgo::GetOutputTensorNum(kernel); + auto kernel_name = kernel->fullname_with_scope(); + for (size_t j = 0; j < output_size; ++j) { + auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j); + auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot); + if (tensor) result.push_back(tensor); + } + return result; +} +bool DebugServices::TensorExistsInCurrent(std::string tensor_name) { + return tensor_loader_->TensorExistsInCurrent(tensor_name); +} +void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) { + tensor_loader_->MoveTensorCurrentToPrev(tensor_name); +} + } // namespace mindspore diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h index 67bb38a5df7..890a3569685 100644 --- a/mindspore/ccsrc/debug/debug_services.h +++ b/mindspore/ccsrc/debug/debug_services.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,10 @@ class DebugServices { bool range_enabled() const { return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled); } + + bool change_condition() const { + return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL || condition.type == NOT_CHANGED; + } } watchpoint_t; void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, @@ -171,7 +176,8 @@ class DebugServices { void CheckWatchpoints(std::vector *name, std::vector *slot, std::vector *condition, std::vector *watchpoint_id, std::vector> *parameters, std::vector *error_code, const std::vector &op_overflows, - const std::vector> &tensor_list, bool init_dbg_suspend); + const std::vector> &tensor_list, bool init_dbg_suspend, + const bool step_end, const bool recheck); void ReadNodesTensors(std::vector name, std::vector *ret_name, std::vector *data_ptr, std::vector *data_size, @@ -181,8 +187,6 @@ class DebugServices { bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; - void AddWeightsBiasInputs(std::vector> *tensor_list, const CNodePtr &kernel); - void EmptyTensor(); std::vector> GetTensor() const; @@ -205,9 +209,19 @@ class DebugServices { std::unordered_map GetWatchpointTable(); + void ResetLoadedTensors(); + + std::vector> GetNodeTensor(const CNodePtr &kernel); + + bool TensorExistsInCurrent(std::string tensor_name); + + void MoveTensorCurrentToPrev(std::string tensor_name); + private: std::mutex lock_; + // to keep track of watchpoints that have been checked already for a tensor in current step + std::unordered_map> wp_id_cache; std::unordered_map watchpoint_table; TensorLoader *tensor_loader_; diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index af2c9064d37..e23fce7ebe6 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -313,20 +313,16 @@ void Debugger::PostExecute() { } if (debugger_->DebuggerBackendEnabled()) { // analyze tensor data and send the watchpoints been hit - if (run_level_ == "node") { - MS_LOG(INFO) << "Debugger is in node level mode "; - return; - } if (debugger_enabled_ && !is_dataset_graph_) { if (device_target_ != kGPUDevice) { num_step_++; - MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; - SendWatchpoints(CheckWatchpoints()); - CommandLoop(); - } else { - CommandLoop(); } + MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_; + SendWatchpoints(CheckWatchpoints()); + CommandLoop(); } + // Only keep parameters in the current map + debug_services_->ResetLoadedTensors(); } } @@ -596,7 +592,7 @@ void Debugger::CommandLoop() { MS_LOG(INFO) << "RunCMD"; if (GetRunLevel(reply) == "recheck") { MS_LOG(INFO) << "rechecking all watchpoints"; - SendWatchpoints(CheckWatchpoints()); + SendWatchpoints(CheckWatchpoints("", nullptr, true)); } else { // no longer the initial suspension. initial_suspend_ = false; @@ -705,9 +701,6 @@ void Debugger::SetWatchpoint(const ProtoVector &nodes, const WatchCon return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()}; }); debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list); - if (initial_suspend_ && - static_cast(condition.condition()) == DebugServices::CONDITION_TYPE::INIT) - SendWatchpoints(CheckWatchpoints()); } void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); } @@ -780,7 +773,8 @@ void Debugger::Exit() { } } -std::list Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) { +std::list Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel, + bool recheck) { std::vector name; std::vector slot; std::vector condition; @@ -795,11 +789,10 @@ std::list Debugger::CheckWatchpoints(const std::string &watchnode if (watchnode.empty()) { tensor_list = debug_services_->GetTensor(); } else { - tensor_list = debug_services_->GetNodeTensorMap(watchnode); - debug_services_->AddWeightsBiasInputs(&tensor_list, kernel); + tensor_list = debug_services_->GetNodeTensor(kernel); } debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, - tensor_list, initial_suspend_); + tensor_list, initial_suspend_, watchnode.empty(), recheck); std::list hits; for (unsigned int i = 0; i < name.size(); i++) { WatchpointHit hit; @@ -1045,7 +1038,7 @@ std::vector Debugger::CheckOpOverflow() { } closedir(d); - if (op_names.size()) { + if (!op_names.empty()) { MS_LOG(ERROR) << "These operation overflows are detected " << op_names; } @@ -1091,12 +1084,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output if (!anf_node->isa() && !anf_node->isa()) { return; } - bool keep_prev; - if (anf_node->isa()) { - keep_prev = true; - } else { - keep_prev = false; - } // for parameters and value nodes, set its execution order to be 0; int exec_order = 0; std::string node_name = anf_node->fullname_with_scope(); @@ -1114,6 +1101,13 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), [](size_t inner_item) { return SizeToInt(inner_item); }); + bool keep_prev; + if (anf_node->isa()) { + keep_prev = true; + debug_services_->MoveTensorCurrentToPrev(tensor_name); + } else { + keep_prev = false; + } bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev); if (!ret) { MS_LOG(ERROR) << "LoadMemToHost:" @@ -1123,9 +1117,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output void Debugger::LoadParametersAndConst() { if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return; - if (!(num_step_ == 0 || device_target_ == kAscendDevice || - (device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration()))) - return; MS_EXCEPTION_IF_NULL(graph_ptr_); // load parameters MS_LOG(INFO) << "Start to load Parameters!"; @@ -1199,5 +1190,8 @@ void Debugger::ClearCurrentData() { if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) debug_services_->EmptyCurrentTensor(); } +bool Debugger::TensorExistsInCurrent(std::string tensor_name) { + return debug_services_->TensorExistsInCurrent(tensor_name); +} } // namespace mindspore diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 5e79d18b8e1..ee9196e5df0 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -145,6 +145,8 @@ class Debugger : public std::enable_shared_from_this { std::list GetGraphPtrList() { return graph_ptr_list_; } + bool TensorExistsInCurrent(std::string tensor_name); + private: // private constructor for singleton Debugger(); @@ -197,7 +199,7 @@ class Debugger : public std::enable_shared_from_this { // analyze tensors and check watchpoint conditions // return names of tensors and what condition they hit std::list CheckWatchpoints(const std::string &watchnode = std::string(), - const CNodePtr &kernel = NULL); + const CNodePtr &kernel = nullptr, bool recheck = false); // send watchpoints that hit void SendWatchpoints(const std::list &points); diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h index 16a58976eb6..af69519b80b 100644 --- a/mindspore/ccsrc/debug/tensor_load.h +++ b/mindspore/ccsrc/debug/tensor_load.h @@ -33,6 +33,44 @@ class TensorLoader { ~TensorLoader() { EmptyTensor(); } + void MoveTensorCurrentToPrev(std::string tensor_name) { + auto handle = tensor_list_map.extract(tensor_name); + if (!handle.empty()) { + MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map"; + prev_tensor_list_map.insert(std::move(handle)); + } + } + + void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); } + + bool TensorExistsInCurrent(std::string tensor_name) { + return tensor_list_map.find(tensor_name) != tensor_list_map.end(); + } + + // only parameters will return true + bool PrevTensorExistsInCurrent(std::string tensor_name) { return TensorExistsInCurrent(tensor_name + ":prev"); } + + void MoveParametersCurrentToPrev() { + MS_LOG(INFO) << "Moving parameters from current map to previous map"; + auto iter = tensor_list_map.begin(); + while (iter != tensor_list_map.end()) { + auto key = iter->first; + if (PrevTensorExistsInCurrent(key)) { + // :prev tensor only exists for parameter. Move it to prev + ++iter; + MoveTensorCurrentToPrev(key); + } else { + ++iter; + } + } + } + + bool IsPrevTensor(std::string tensor_name) { + const std::string suffix = ":prev"; + if (tensor_name.length() <= suffix.length()) return false; + return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin()); + } + bool LoadNewTensor(std::shared_ptr tensor, bool keep_prev) { std::lock_guard lg(lock_); if (keep_prev) { @@ -43,20 +81,32 @@ class TensorLoader { tensor_list_map.insert(std::move(handle)); } } - tensor_list.push_back(tensor); tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value auto node_name = tensor->GetName(); node_name = node_name.substr(0, node_name.find_first_of(":")); node_tensor_map.insert({node_name, tensor}); return true; } - std::vector> GetTensor() { return tensor_list; } + + std::vector> GetTensor() { + std::vector> tensor_list; + for (auto &it : tensor_list_map) { + if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second); + } + return tensor_list; + } + + std::shared_ptr GetTensor(const std::string &tensor_name) { + auto iter = tensor_list_map.find(tensor_name); + if (iter != tensor_list_map.end()) return iter->second; + return nullptr; + } uint32_t GetIterNum() { return iter_num; } std::map> GetTensorMap() { return tensor_list_map; } - std::shared_ptr GetPrevTensor(std::string tensor_name) { + std::shared_ptr GetPrevTensor(const std::string &tensor_name) { if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) { return tensor_list_map[tensor_name + ":prev"]; } @@ -91,14 +141,13 @@ class TensorLoader { prev_tensor_list_map.clear(); node_tensor_map.clear(); tensor_list_map.swap(prev_tensor_list_map); - tensor_list.clear(); } void EmptyPrevTensor() { prev_tensor_list_map.clear(); } void EmptyCurrentTensor() { tensor_list_map.clear(); - tensor_list.clear(); + node_tensor_map.clear(); } void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } @@ -142,7 +191,6 @@ class TensorLoader { } private: - std::vector> tensor_list; std::map> tensor_list_map; std::multimap> node_tensor_map; std::map> prev_tensor_list_map; diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index 2525d78aabe..336fa3395e6 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -674,6 +674,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const { bool ret = false; + if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { + MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; + return true; + } // TensorData is freed up in AscendSession class auto tensor_data = std::make_shared(); tensor_data->SetName(tensor_name); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index c1a80a78e7f..b70b7dbb41d 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -296,8 +296,6 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); #ifdef ENABLE_DEBUGGER MS_LOG(INFO) << "Start load step"; - uint32_t cur_iter = 0; - MS_LOG(INFO) << "Cur iter is " << cur_iter; for (auto graph_ptr : debugger_->GetGraphPtrList()) { debugger_->SetGraphPtr(graph_ptr); // load output diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc index 75bb1d5262f..af2570598bd 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc @@ -87,6 +87,11 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi return true; } + if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { + MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; + return true; + } + mindspore::tensor::TensorPtr out_tensor = std::make_shared(type_id_, host_shape); size_t host_size = out_tensor->data().nbytes(); auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 9ee99d06485..cfb20a39379 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -154,8 +154,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, std::vector real_outputs; real_outputs = CheckRealOutput(node_name, output_size); - for (std::vector::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) { - auto j = *it; + for (int j : real_outputs) { auto addr = kernel_outputs[j]; auto type = AnfAlgo::GetOutputInferDataType(kernel, j); auto format = kOpFormat_DEFAULT;