!10021 Fixed double stutter issue at the end of step for debugger

From: @adelshafiei Reviewed-by: @john_tzanakakis,@wangyue01 Signed-off-by: @wangyue01
2021-01-06 10:08:55 +08:00 · 2021-01-06 10:08:55 +08:00 · 59d2affa97
parent 3d87cda287 1d795a4f9d
commit 59d2affa97
3 changed files with 21 additions and 7 deletions
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -361,7 +361,7 @@ bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) {
  return false;
 }
-void Debugger::PostExecuteNode(const CNodePtr &kernel) {
+void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
  // access lock for public method
  std::lock_guard<std::mutex> a_lock(access_lock_);
  if (pipeline::ExecutorPy::GetDebugTerminate()) {
@ -380,8 +380,9 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel) {
        hit_empty_flag = false;
      }
    }
-    if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) {
+    if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_) && !last_kernel) {
      // if kernel is not watchpoint and is next_to or continue_to node, suspend
      // No need to suspend if this is the last node in graph since PostExecute suspends at the end of graph
      CommandLoop();
    }
    return;
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@ -83,7 +83,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
  bool ReadNodeDataRequired(const CNodePtr &kernel);
-  void PostExecuteNode(const CNodePtr &kernel);
+  void PostExecuteNode(const CNodePtr &kernel, bool last_kernel);
  // suspend the execution after a debug_op
  void PostDebugOp();
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@ -104,7 +104,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
                    const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
                    const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
                    const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
-                    bool dump_enabled) {
+                    bool dump_enabled, bool last_kernel) {
  // check if we should read the kernel data
  bool read_data = false;
  auto &dump_json_parser = DumpJsonParser::GetInstance();
@ -179,7 +179,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
      }
    }
  }
-  debugger->PostExecuteNode(kernel);
+  debugger->PostExecuteNode(kernel, last_kernel);
 }
 }  // namespace
@ -586,6 +586,19 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
  }
 }
 CNodePtr GetLastKernel(const session::KernelGraph *graph) {
  const auto &kernels = graph->execution_order();
  CNodePtr last_kernel;
  for (const auto &kernel : kernels) {
    if (AnfAlgo::IsInplaceNode(kernel, "skip")) {
      continue;
    } else {
      last_kernel = kernel;
    }
  }
  return last_kernel;
 }
 bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(mem_reuse_util_);
@ -610,7 +623,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
      profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
    profiler_inst->SetStepTraceOpName(profiling_trace);
  }
-
+  CNodePtr last_kernel = GetLastKernel(graph);
  for (const auto &kernel : kernels) {
    auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
    MS_EXCEPTION_IF_NULL(kernel_mod);
@ -666,7 +679,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
      // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
      LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
-                     dump_enabled);
+                     dump_enabled, kernel == last_kernel);
    }
    exec_order = exec_order + 1;
    FreeKernelDynamicRes(kernel);