!20544 Fix the forever loop for multigraph on gpu
Merge pull request !20544 from maning202007/master
This commit is contained in:
commit
2c36f092e3
|
@ -451,7 +451,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
|||
// build kernel
|
||||
BuildKernel(root_graph);
|
||||
if (debugger_ && debugger_->partial_memory()) {
|
||||
debugger_->PreExecute(root_graph, graph_sum_);
|
||||
debugger_->PreExecute(root_graph);
|
||||
}
|
||||
SetSummaryNodes(root_graph.get());
|
||||
// Alloc memory for child graph's inputs
|
||||
|
@ -540,7 +540,7 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
|
|||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (debugger_ && debugger_->partial_memory()) {
|
||||
debugger_->PreExecute(graph, graph_sum_);
|
||||
debugger_->PreExecute(graph);
|
||||
}
|
||||
if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
|
||||
MS_LOG(INFO) << "Precompile only, stop in build kernel step";
|
||||
|
@ -588,7 +588,7 @@ bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedInser
|
|||
void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(kernel_graph, graph_sum_);
|
||||
debugger_->PreExecute(kernel_graph);
|
||||
}
|
||||
#if ENABLE_CPU && ENABLE_D
|
||||
// Initialize parameter server
|
||||
|
|
|
@ -477,7 +477,7 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
|
|||
void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(kernel_graph, graph_sum_);
|
||||
debugger_->PreExecute(kernel_graph);
|
||||
}
|
||||
|
||||
DumpSetup(kernel_graph);
|
||||
|
|
|
@ -271,16 +271,15 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
|
|||
if (device_target_ != kGPUDevice) {
|
||||
return;
|
||||
}
|
||||
uint32_t graph_sum = graphs.size();
|
||||
for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) {
|
||||
const auto &graph = graphs[graph_index];
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(graph, graph_sum);
|
||||
debugger_->PreExecute(graph);
|
||||
}
|
||||
DumpSetup(graph);
|
||||
}
|
||||
}
|
||||
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
|
||||
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
CheckDatasetSinkMode();
|
||||
|
@ -294,10 +293,8 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
|
|||
rungraph_id_list_.push_back(graph_id);
|
||||
}
|
||||
}
|
||||
// check and save graph_ptr, suspend if graph is new
|
||||
MS_LOG(INFO) << "total number graph: " << graph_sum;
|
||||
// multiple graphs
|
||||
if (graph_sum > 1) {
|
||||
if (graph_proto_list_.size() > 1) {
|
||||
// there are more than one graphs are not dataset_graph
|
||||
if (not_dataset_graph_sum_ > 0) {
|
||||
// only try to enable debugger if they are not all dataset graphs
|
||||
|
@ -305,32 +302,21 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
|
|||
EnableDebugger();
|
||||
}
|
||||
if (debugger_enabled_) {
|
||||
if (graph_proto_list_.size()) {
|
||||
// only send compiled graphs once.
|
||||
auto dbg_graph_ptr = graph_ptr_;
|
||||
// use current graph ptr to load parameters
|
||||
graph_ptr_ = graph_ptr;
|
||||
LoadParametersAndConst();
|
||||
// revert graph ptr to original value
|
||||
graph_ptr_ = dbg_graph_ptr;
|
||||
// only send compiled graphs once at the initial step.
|
||||
auto dbg_graph_ptr = graph_ptr_;
|
||||
// use current graph ptr to load parameters
|
||||
graph_ptr_ = graph_ptr;
|
||||
LoadParametersAndConst();
|
||||
// revert graph ptr to original value
|
||||
graph_ptr_ = dbg_graph_ptr;
|
||||
|
||||
SendMultiGraphsAndSuspend(graph_proto_list_);
|
||||
SendMultiGraphsAndSuspend(graph_proto_list_);
|
||||
|
||||
graph_proto_list_.clear();
|
||||
} else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
|
||||
// stop only when receive the first sub run graph for each step
|
||||
// if we have stopped for the last kernel before, no need to stop again
|
||||
if (pipeline::ExecutorPy::GetDebugTerminate()) {
|
||||
return;
|
||||
}
|
||||
if (!(run_level_ == "node" && suspended_at_last_kernel_)) {
|
||||
CommandLoop();
|
||||
}
|
||||
debug_services_->ResetLoadedTensors();
|
||||
}
|
||||
graph_proto_list_.clear();
|
||||
}
|
||||
}
|
||||
} else if (graph_proto_list_.size() == 1) {
|
||||
// single graph, and not the initial step
|
||||
if (device_target_ == kGPUDevice && num_step_ != 0) {
|
||||
if (debugger_enabled_ && !(run_level_ == "node" && suspended_at_last_kernel_)) {
|
||||
CommandLoop();
|
||||
|
@ -342,6 +328,17 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum) {
|
|||
graph_ptr_ = nullptr;
|
||||
CheckGraphPtr(graph_ptr);
|
||||
}
|
||||
} else if (graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
|
||||
// Multiple graph, and not the initial step,
|
||||
// stop only when receive the first sub run graph for each step
|
||||
// if we have stopped for the last kernel before, no need to stop again
|
||||
if (pipeline::ExecutorPy::GetDebugTerminate()) {
|
||||
return;
|
||||
}
|
||||
if (!(run_level_ == "node" && suspended_at_last_kernel_)) {
|
||||
CommandLoop();
|
||||
}
|
||||
debug_services_->ResetLoadedTensors();
|
||||
}
|
||||
// resets for the new graph
|
||||
suspended_at_last_kernel_ = 0;
|
||||
|
|
|
@ -77,7 +77,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// enable debugger
|
||||
// send graph and wait for command
|
||||
// do nothing if graph is set already
|
||||
void PreExecute(const KernelGraphPtr &graph_ptr, uint32_t graph_sum = 1);
|
||||
void PreExecute(const KernelGraphPtr &graph_ptr);
|
||||
|
||||
// analyze tensors and wait for command
|
||||
// don't need a graph_ptr because it is saved during pre_execute
|
||||
|
|
Loading…
Reference in New Issue