!19252 add dump pb and nccl protected

Merge pull request !19252 from limingqi107/bug_fix3
This commit is contained in:
i-robot 2021-07-02 12:00:45 +00:00 committed by Gitee
commit d0bab79fde
4 changed files with 27 additions and 17 deletions

View File

@ -279,6 +279,9 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() {
}
}
}
global_mem_block_list_.clear();
global_idle_mem_buf_map_.clear();
}
void DynamicMemPoolBestFit::DumpDynamicMemPoolInfo() {

View File

@ -32,6 +32,10 @@ ncclUniqueId NCCLWrapper::nccl_unique_id() const {
}
void NCCLWrapper::InitNCCLComm() {
if (comm_init_done_) {
return;
}
for (auto group : group_info_) {
std::string group_name = group.first;
NcclGroupInfo group_info = group.second;

View File

@ -24,6 +24,7 @@
#include "ir/tensor.h"
#include "backend/optimizer/common/helper.h"
#include "base/base_ref_utils.h"
#include "debug/dump_proto.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#endif
@ -297,6 +298,14 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
const auto &ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
bool save_graphs = ms_context->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
// Dump .pb graph before graph optimization.
if (save_graphs) {
DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id()));
}
// Execute optimization pass.
auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
device_context->OptimizeGraph(graph);
@ -308,8 +317,6 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
// 'KernelMod' is real executive object of kernel.
device_context->CreateKernel(graph->execution_order());
const auto &ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
// Create device address for all anf nodes of graph.
CreateDeviceAddress(graph, device_context);
@ -322,6 +329,12 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
session_->SetSummaryNodes(graph.get());
SetSummaryNodesRefCount(graph.get());
// Dump .pb graph after graph optimization.
if (save_graphs) {
DumpIRProto(graph, "after_opt_" + std::to_string(graph->graph_id()));
}
#ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance();
debugger->DumpInGraphCompiler(graph);
@ -329,6 +342,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
debugger->LoadGraphs(graph);
}
#endif
session_->DumpGraph(graph);
return graph->graph_id();
}

View File

@ -1962,22 +1962,10 @@ void GraphScheduler::LinkDeviceTensorStoreForAutoMonadActor(const std::vector<Ke
// Link from copy actor to kernel actor users.
for (auto &output_contorl : kernel_actor->output_control_arrows_) {
copy_actor->output_control_arrows_.emplace_back(output_contorl);
auto to_actor = FetchActor(output_contorl.Name());
MS_EXCEPTION_IF_NULL(to_actor);
if (output_contorl.Name().find("_LoopCountActor") != string::npos) {
auto real_to_actor = dynamic_cast<LoopCountActor *>(to_actor);
MS_EXCEPTION_IF_NULL(real_to_actor);
real_to_actor->input_controls_num_++;
} else if (output_contorl.Name().find("copy_from") != string::npos) {
auto real_to_actor = dynamic_cast<CopyActor *>(to_actor);
MS_EXCEPTION_IF_NULL(real_to_actor);
real_to_actor->input_controls_num_++;
} else {
auto real_to_actor = dynamic_cast<KernelActor *>(to_actor);
MS_EXCEPTION_IF_NULL(real_to_actor);
real_to_actor->input_controls_num_++;
}
}
// Move the control arrows from kernel actor to kernel actor users.
kernel_actor->output_control_arrows_.clear();
// Link from kernel actor to copy actor.
kernel_actor->output_control_arrows_.emplace_back(copy_actor->GetAID());
copy_actor->input_controls_num_++;