!6198 [Data Dump] Bug fix

Merge pull request !6198 from caifubi/dump
This commit is contained in:
mindspore-ci-bot 2020-09-15 20:47:23 +08:00 committed by Gitee
commit fe735a0f83
9 changed files with 20 additions and 88 deletions

View File

@ -4,7 +4,7 @@
"path": "/test",
"net_name": "ResNet50",
"iteration": 0,
"input_output": 0,
"input_output": 2,
"kernels": ["Default/Conv-op12"],
"support_device": [0,1,2,3,4,5,6,7]
},

View File

@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/net/",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

View File

@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/hccllog/0",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

View File

@ -1,22 +0,0 @@
{
"DumpSettings": {
"enable": false,
"trans_flag": false,
"path": "/tmp/hccllog/1",
"net_name": "ResNet50",
"mode": 0,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder",
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration, others: specified iteration ",
"kernels": "op's full scope name which need to be dump"
},
"other": {}
}

View File

@ -442,6 +442,7 @@ void AscendSession::InitRuntimeResource() {
if (!runtime_instance->Init()) {
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
}
DumpJsonParser::GetInstance().Parse();
MS_LOG(INFO) << "Finish!";
}

View File

@ -74,6 +74,10 @@ bool DumpJsonParser::IsDumpEnabled() {
void DumpJsonParser::Parse() {
std::lock_guard<std::mutex> guard(lock_);
if (already_parsed_) {
return;
}
already_parsed_ = true;
if (!IsDumpEnabled()) {
return;
}
@ -305,6 +309,8 @@ void DumpJsonParser::JudgeDumpEnabled() {
MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support";
}
context->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_);
MS_LOG(INFO) << "Dump status, e2e_dump_enabled:" << e2e_dump_enabled_
<< " async_dump_enabled:" << async_dump_enabled_;
}
bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {
@ -325,6 +331,9 @@ void DumpJsonParser::MatchKernel(const std::string &kernel_name) {
}
void DumpJsonParser::PrintUnusedKernel() {
if (!e2e_dump_enabled_ && !async_dump_enabled_) {
return;
}
for (const auto &iter : kernels_) {
if (iter.second == 0) {
MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first;
@ -362,16 +371,6 @@ bool DumpJsonParser::OutputNeedDump() const {
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
}
bool NeedAsyncDump(const CNodePtr &kernel) {
if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
return false;
}
MS_EXCEPTION_IF_NULL(kernel);
// dump all kernel if mode is set 0 in data_dump.json
return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
}
void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) {
if (e2e_dump_enabled_) {
MS_LOG(INFO) << "E2e dump no need to update dump kernel list";
@ -391,9 +390,6 @@ void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *>
update_kernels.try_emplace(input->fullname_with_scope(), 0);
}
}
} else if (NeedAsyncDump(kernel)) {
MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
update_kernels.try_emplace(kernel->fullname_with_scope(), 0);
}
}
kernels_.insert(update_kernels.begin(), update_kernels.end());

View File

@ -72,6 +72,7 @@ class DumpJsonParser {
uint32_t op_debug_mode_{0};
bool trans_flag_{false};
uint32_t cur_dump_iter_{0};
bool already_parsed_{false};
void ParseCommonDumpSetting(const nlohmann::json &content);
void ParseAsyncDumpSetting(const nlohmann::json &content);

View File

@ -206,14 +206,17 @@ bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger
}
}
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
std::string dump_path = dump_json_parser.path();
if (dump_path.back() == '/') {
dump_path = dump_path + net_name + '/' + iterator;
} else {
dump_path = dump_path + '/' + net_name + '/' + iterator;
if (dump_path.back() != '/') {
dump_path += "/";
}
dump_path += (net_name + "/device_" + std::to_string(device_id) + "/iteration_" + iterator);
DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger);
DumpParameters(graph, dump_path, debugger);

View File

@ -206,11 +206,8 @@ bool AscendKernelRuntime::Init() {
SetContext();
return true;
}
bool ret = false;
DumpJsonParser::GetInstance().Parse();
// Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
bool ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}