!10021 Fixed double stutter issue at the end of step for debugger
From: @adelshafiei Reviewed-by: @john_tzanakakis,@wangyue01 Signed-off-by: @wangyue01
This commit is contained in:
commit
59d2affa97
|
@ -361,7 +361,7 @@ bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Debugger::PostExecuteNode(const CNodePtr &kernel) {
|
void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
|
||||||
// access lock for public method
|
// access lock for public method
|
||||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
if (pipeline::ExecutorPy::GetDebugTerminate()) {
|
if (pipeline::ExecutorPy::GetDebugTerminate()) {
|
||||||
|
@ -380,8 +380,9 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel) {
|
||||||
hit_empty_flag = false;
|
hit_empty_flag = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) {
|
if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_) && !last_kernel) {
|
||||||
// if kernel is not watchpoint and is next_to or continue_to node, suspend
|
// if kernel is not watchpoint and is next_to or continue_to node, suspend
|
||||||
|
// No need to suspend if this is the last node in graph since PostExecute suspends at the end of graph
|
||||||
CommandLoop();
|
CommandLoop();
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -83,7 +83,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
||||||
|
|
||||||
bool ReadNodeDataRequired(const CNodePtr &kernel);
|
bool ReadNodeDataRequired(const CNodePtr &kernel);
|
||||||
|
|
||||||
void PostExecuteNode(const CNodePtr &kernel);
|
void PostExecuteNode(const CNodePtr &kernel, bool last_kernel);
|
||||||
|
|
||||||
// suspend the execution after a debug_op
|
// suspend the execution after a debug_op
|
||||||
void PostDebugOp();
|
void PostDebugOp();
|
||||||
|
|
|
@ -104,7 +104,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
||||||
const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
|
const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
|
||||||
const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
|
const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
|
||||||
const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
|
const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
|
||||||
bool dump_enabled) {
|
bool dump_enabled, bool last_kernel) {
|
||||||
// check if we should read the kernel data
|
// check if we should read the kernel data
|
||||||
bool read_data = false;
|
bool read_data = false;
|
||||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||||
|
@ -179,7 +179,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
debugger->PostExecuteNode(kernel);
|
debugger->PostExecuteNode(kernel, last_kernel);
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -586,6 +586,19 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CNodePtr GetLastKernel(const session::KernelGraph *graph) {
|
||||||
|
const auto &kernels = graph->execution_order();
|
||||||
|
CNodePtr last_kernel;
|
||||||
|
for (const auto &kernel : kernels) {
|
||||||
|
if (AnfAlgo::IsInplaceNode(kernel, "skip")) {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
last_kernel = kernel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return last_kernel;
|
||||||
|
}
|
||||||
|
|
||||||
bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) {
|
bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) {
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_);
|
MS_EXCEPTION_IF_NULL(mem_reuse_util_);
|
||||||
|
@ -610,7 +623,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
||||||
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
|
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
|
||||||
profiler_inst->SetStepTraceOpName(profiling_trace);
|
profiler_inst->SetStepTraceOpName(profiling_trace);
|
||||||
}
|
}
|
||||||
|
CNodePtr last_kernel = GetLastKernel(graph);
|
||||||
for (const auto &kernel : kernels) {
|
for (const auto &kernel : kernels) {
|
||||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||||
|
@ -666,7 +679,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
||||||
|
|
||||||
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
|
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
|
||||||
LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
|
LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
|
||||||
dump_enabled);
|
dump_enabled, kernel == last_kernel);
|
||||||
}
|
}
|
||||||
exec_order = exec_order + 1;
|
exec_order = exec_order + 1;
|
||||||
FreeKernelDynamicRes(kernel);
|
FreeKernelDynamicRes(kernel);
|
||||||
|
|
Loading…
Reference in New Issue