!10021 Fixed double stutter issue at the end of step for debugger
From: @adelshafiei Reviewed-by: @john_tzanakakis,@wangyue01 Signed-off-by: @wangyue01
This commit is contained in:
commit
59d2affa97
|
@ -361,7 +361,7 @@ bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void Debugger::PostExecuteNode(const CNodePtr &kernel) {
|
||||
void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
if (pipeline::ExecutorPy::GetDebugTerminate()) {
|
||||
|
@ -380,8 +380,9 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel) {
|
|||
hit_empty_flag = false;
|
||||
}
|
||||
}
|
||||
if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) {
|
||||
if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_) && !last_kernel) {
|
||||
// if kernel is not watchpoint and is next_to or continue_to node, suspend
|
||||
// No need to suspend if this is the last node in graph since PostExecute suspends at the end of graph
|
||||
CommandLoop();
|
||||
}
|
||||
return;
|
||||
|
|
|
@ -83,7 +83,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
bool ReadNodeDataRequired(const CNodePtr &kernel);
|
||||
|
||||
void PostExecuteNode(const CNodePtr &kernel);
|
||||
void PostExecuteNode(const CNodePtr &kernel, bool last_kernel);
|
||||
|
||||
// suspend the execution after a debug_op
|
||||
void PostDebugOp();
|
||||
|
|
|
@ -104,7 +104,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
|
||||
const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
|
||||
const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
|
||||
bool dump_enabled) {
|
||||
bool dump_enabled, bool last_kernel) {
|
||||
// check if we should read the kernel data
|
||||
bool read_data = false;
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
|
@ -179,7 +179,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
}
|
||||
}
|
||||
}
|
||||
debugger->PostExecuteNode(kernel);
|
||||
debugger->PostExecuteNode(kernel, last_kernel);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
@ -586,6 +586,19 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
|
|||
}
|
||||
}
|
||||
|
||||
CNodePtr GetLastKernel(const session::KernelGraph *graph) {
|
||||
const auto &kernels = graph->execution_order();
|
||||
CNodePtr last_kernel;
|
||||
for (const auto &kernel : kernels) {
|
||||
if (AnfAlgo::IsInplaceNode(kernel, "skip")) {
|
||||
continue;
|
||||
} else {
|
||||
last_kernel = kernel;
|
||||
}
|
||||
}
|
||||
return last_kernel;
|
||||
}
|
||||
|
||||
bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_);
|
||||
|
@ -610,7 +623,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
|
||||
profiler_inst->SetStepTraceOpName(profiling_trace);
|
||||
}
|
||||
|
||||
CNodePtr last_kernel = GetLastKernel(graph);
|
||||
for (const auto &kernel : kernels) {
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
|
@ -666,7 +679,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
|
||||
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
|
||||
LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
|
||||
dump_enabled);
|
||||
dump_enabled, kernel == last_kernel);
|
||||
}
|
||||
exec_order = exec_order + 1;
|
||||
FreeKernelDynamicRes(kernel);
|
||||
|
|
Loading…
Reference in New Issue