!20544 Fix the forever loop for multigraph on gpu

Merge pull request !20544 from maning202007/master
This commit is contained in:
i-robot 2021-07-20 11:34:40 +00:00 committed by Gitee
commit 2c36f092e3
4 changed files with 29 additions and 32 deletions

View File

@ -451,7 +451,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
// build kernel
BuildKernel(root_graph);
if (debugger_ && debugger_->partial_memory()) {
debugger_->PreExecute(root_graph, graph_sum_);
debugger_->PreExecute(root_graph);
}
SetSummaryNodes(root_graph.get());
// Alloc memory for child graph's inputs
@ -540,7 +540,7 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (debugger_ && debugger_->partial_memory()) {
debugger_->PreExecute(graph, graph_sum_);
debugger_->PreExecute(graph);
}
if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
MS_LOG(INFO) << "Precompile only, stop in build kernel step";
@ -588,7 +588,7 @@ bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedInser
void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
if (debugger_) {
debugger_->PreExecute(kernel_graph, graph_sum_);
debugger_->PreExecute(kernel_graph);
}
#if ENABLE_CPU && ENABLE_D
// Initialize parameter server

View File

@ -477,7 +477,7 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
if (debugger_) {
debugger_->PreExecute(kernel_graph, graph_sum_);
debugger_->PreExecute(kernel_graph);
}
DumpSetup(kernel_graph);

View File

@ -271,16 +271,15 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
if (device_target_ != kGPUDevice) {
return;
}
uint32_t graph_sum = graphs.size();
for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) {
const auto &graph = graphs[graph_index];
if (debugger_) {
debugger_->PreExecute(graph, graph_sum);
debugger_->PreExecute(graph);
}
DumpSetup(graph);
}
}
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
CheckDatasetSinkMode();
@ -294,10 +293,8 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
rungraph_id_list_.push_back(graph_id);
}
}
// check and save graph_ptr, suspend if graph is new
MS_LOG(INFO) << "total number graph: " << graph_sum;
// multiple graphs
if (graph_sum > 1) {
if (graph_proto_list_.size() > 1) {
// there are more than one graphs are not dataset_graph
if (not_dataset_graph_sum_ > 0) {
// only try to enable debugger if they are not all dataset graphs
@ -305,32 +302,21 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
EnableDebugger();
}
if (debugger_enabled_) {
if (graph_proto_list_.size()) {
// only send compiled graphs once.
auto dbg_graph_ptr = graph_ptr_;
// use current graph ptr to load parameters
graph_ptr_ = graph_ptr;
LoadParametersAndConst();
// revert graph ptr to original value
graph_ptr_ = dbg_graph_ptr;
// only send compiled graphs once at the initial step.
auto dbg_graph_ptr = graph_ptr_;
// use current graph ptr to load parameters
graph_ptr_ = graph_ptr;
LoadParametersAndConst();
// revert graph ptr to original value
graph_ptr_ = dbg_graph_ptr;
SendMultiGraphsAndSuspend(graph_proto_list_);
SendMultiGraphsAndSuspend(graph_proto_list_);
graph_proto_list_.clear();
} else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
// stop only when receive the first sub run graph for each step
// if we have stopped for the last kernel before, no need to stop again
if (pipeline::ExecutorPy::GetDebugTerminate()) {
return;
}
if (!(run_level_ == "node" && suspended_at_last_kernel_)) {
CommandLoop();
}
debug_services_->ResetLoadedTensors();
}
graph_proto_list_.clear();
}
}
} else if (graph_proto_list_.size() == 1) {
// single graph, and not the initial step
if (device_target_ == kGPUDevice && num_step_ != 0) {
if (debugger_enabled_ && !(run_level_ == "node" && suspended_at_last_kernel_)) {
CommandLoop();
@ -342,6 +328,17 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
graph_ptr_ = nullptr;
CheckGraphPtr(graph_ptr);
}
} else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
// Multiple graph, and not the initial step,
// stop only when receive the first sub run graph for each step
// if we have stopped for the last kernel before, no need to stop again
if (pipeline::ExecutorPy::GetDebugTerminate()) {
return;
}
if (!(run_level_ == "node" && suspended_at_last_kernel_)) {
CommandLoop();
}
debug_services_->ResetLoadedTensors();
}
// resets for the new graph
suspended_at_last_kernel_ = 0;

View File

@ -77,7 +77,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// enable debugger
// send graph and wait for command
// do nothing if graph is set already
void PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum = 1);
void PreExecute(const KernelGraphPtr &graph_ptr);
// analyze tensors and wait for command
// don't need a graph_ptr because it is saved during pre_execute