diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index a7820eaa734..d755d76a43e 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -835,8 +835,10 @@ void AscendSession::InitRuntimeResource() { if (!runtime_instance->Init()) { MS_LOG(EXCEPTION) << "Kernel runtime init error."; } - auto env_hccl_mode = common::GetEnv("MS_ENABLE_HCCL"); - if (!env_hccl_mode.empty() && env_hccl_mode != std::to_string(0)) { + auto env_table_file = common::GetEnv("RANK_TABLE_FILE"); + auto env_rank_id = common::GetEnv("RANK_ID"); + if (!(env_table_file.empty() || env_rank_id.empty())) { + // get actual rank id if it's distribution training case. rank_id_ = GetRankId(); } DumpInit(rank_id_); diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index bb14934b14d..c9b8231deac 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -206,10 +206,12 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s return false; } std::string npy_header = GenerateNpyHeader(shape, type); - fd << npy_header; - (void)fd.write(reinterpret_cast(data), SizeToLong(len)); - fd.close(); - ChangeFileMode(file_path, S_IRUSR); + if (!npy_header.empty()) { + fd << npy_header; + (void)fd.write(reinterpret_cast(data), SizeToLong(len)); + fd.close(); + ChangeFileMode(file_path, S_IRUSR); + } return true; } diff --git a/mindspore/ccsrc/debug/data_dump/npy_header.cc b/mindspore/ccsrc/debug/data_dump/npy_header.cc index 1746703e635..3da6fa73c4e 100644 --- a/mindspore/ccsrc/debug/data_dump/npy_header.cc +++ b/mindspore/ccsrc/debug/data_dump/npy_header.cc @@ -109,7 +109,8 @@ std::string GenerateNpyHeader(const ShapeVector &shape, TypeId type_id, bool for }; auto type_desc = type_desc_map.find(type_id); if (type_desc == type_desc_map.end()) { - MS_LOG(EXCEPTION) << "Not support dump the " << TypeIdToType(type_id)->ToString() << " data to npy file."; + MS_LOG(WARNING) << "Not support dump the " << TypeIdToType(type_id)->ToString() << " data to npy file."; + return std::string(); } NpyHeader npy_header{type_desc->second, fortran_order, shape}; diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc index 124c938f9b7..2a9d844ed02 100644 --- a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc +++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc @@ -140,9 +140,10 @@ void DataDumper::SetOpMappingInfo(NotNull dump_inf } uint32_t graph_id = kernel_graph_->graph_id(); uint32_t rank_id = 0; - auto env_hccl_mode = common::GetEnv("MS_ENABLE_HCCL"); - if (!env_hccl_mode.empty() && env_hccl_mode != std::to_string(0)) { - // get actual rank id if hcck is initiated. + auto env_table_file = common::GetEnv("RANK_TABLE_FILE"); + auto env_rank_id = common::GetEnv("RANK_ID"); + if (!(env_table_file.empty() || env_rank_id.empty())) { + // get actual rank id if it's distribution training case. if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) { MS_LOG(INFO) << "Failed to get rank id."; }