!16936 Unify dump directory structure

From: @tina_mengting_zhang
Reviewed-by: @john_tzanakakis,@pandoublefeng
Signed-off-by: @john_tzanakakis
This commit is contained in:
mindspore-ci-bot 2021-05-29 04:12:48 +08:00 committed by Gitee
commit 17099b048f
18 changed files with 149 additions and 190 deletions

View File

@ -897,10 +897,12 @@ void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const {
MS_LOG(INFO) << "Finish!"; MS_LOG(INFO) << "Finish!";
} }
void DumpInit() { void DumpInit(uint32_t device_id) {
auto &json_parser = DumpJsonParser::GetInstance(); auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse(); json_parser.Parse();
json_parser.CopyJsonToDir(); json_parser.CopyJsonToDir(device_id);
json_parser.CopyHcclJsonToDir(device_id);
json_parser.CopyMSCfgJsonToDir(device_id);
if (json_parser.async_dump_enabled()) { if (json_parser.async_dump_enabled()) {
if (AdxDataDumpServerInit() != 0) { if (AdxDataDumpServerInit() != 0) {
MS_LOG(EXCEPTION) << "Adx data dump server init failed"; MS_LOG(EXCEPTION) << "Adx data dump server init failed";
@ -915,7 +917,7 @@ void AscendSession::InitRuntimeResource() {
if (!runtime_instance->Init()) { if (!runtime_instance->Init()) {
MS_LOG(EXCEPTION) << "Kernel runtime init error."; MS_LOG(EXCEPTION) << "Kernel runtime init error.";
} }
DumpInit(); DumpInit(device_id_);
MS_LOG(INFO) << "Finish!"; MS_LOG(INFO) << "Finish!";
} }
@ -1240,16 +1242,8 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs)
DumpIR("trace_code_graph", graph, true, kWholeStack); DumpIR("trace_code_graph", graph, true, kWholeStack);
} }
std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id()); std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id());
if (json_parser.e2e_dump_enabled()) { if (json_parser.e2e_dump_enabled() || json_parser.async_dump_enabled()) {
std::string root_dir = json_parser.path() + "/" + json_parser.net_name() + "/device_" + std::to_string(device_id); std::string root_dir = json_parser.path() + "/rank_" + std::to_string(device_id);
std::string target_dir = root_dir + "/graphs";
std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir";
DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack);
DumpIR("trace_code_graph", graph, true, kWholeStack, ir_file_path);
DumpGraphExeOrder("ms_execution_order_graph_" + std::to_string(graph->graph_id()) + ".csv", root_dir,
graph->execution_order());
} else if (json_parser.async_dump_enabled()) {
std::string root_dir = json_parser.path() + "/device_" + std::to_string(device_id);
std::string target_dir = root_dir + "/graphs"; std::string target_dir = root_dir + "/graphs";
std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir"; std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir";
DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack); DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack);

View File

@ -47,7 +47,9 @@ namespace mindspore {
namespace session { namespace session {
void CPUSession::Init(uint32_t device_id) { void CPUSession::Init(uint32_t device_id) {
// Dump json config file if dump is enabled // Dump json config file if dump is enabled
DumpJsonParser::GetInstance().Parse(); auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
json_parser.CopyMSCfgJsonToDir(device_id);
InitExecutor(kCPUDevice, device_id); InitExecutor(kCPUDevice, device_id);
} }

View File

@ -106,7 +106,8 @@ void GPUSession::Init(uint32_t device_id) {
ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id); ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id);
auto &json_parser = DumpJsonParser::GetInstance(); auto &json_parser = DumpJsonParser::GetInstance();
// Dump json config file if dump is enabled // Dump json config file if dump is enabled
json_parser.CopyJsonToDir(); json_parser.CopyJsonToDir(device_id);
json_parser.CopyMSCfgJsonToDir(device_id);
MS_LOG(INFO) << "Set device id " << device_id << " for gpu session."; MS_LOG(INFO) << "Set device id " << device_id << " for gpu session.";
InitExecutor(kGPUDevice, device_id); InitExecutor(kGPUDevice, device_id);
} }
@ -391,7 +392,7 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
} }
if (json_parser.e2e_dump_enabled()) { if (json_parser.e2e_dump_enabled()) {
std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id()); std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id());
std::string root_dir = json_parser.path() + "/" + json_parser.net_name() + "/device_" + std::to_string(device_id); std::string root_dir = json_parser.path() + "/rank_" + std::to_string(device_id);
std::string target_dir = root_dir + "/graphs"; std::string target_dir = root_dir + "/graphs";
std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir"; std::string ir_file_path = target_dir + "/" + "ms_output_" + final_graph + ".ir";
DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack); DumpIRProtoWithSrcInfo(graph, final_graph, target_dir, kDebugWholeStack);

View File

@ -29,7 +29,7 @@ void CPUE2eDump::DumpCNodeData(const CNodePtr &node, uint32_t graph_id) {
MS_LOG(DEBUG) << "E2e dump CNode data start: " << kernel_name << ", current iteration is " MS_LOG(DEBUG) << "E2e dump CNode data start: " << kernel_name << ", current iteration is "
<< dump_json_parser.cur_dump_iter(); << dump_json_parser.cur_dump_iter();
std::string dump_path = GenerateDumpPath(); std::string dump_path = GenerateDumpPath(graph_id);
if (dump_json_parser.InputNeedDump()) { if (dump_json_parser.InputNeedDump()) {
DumpCNodeInputs(node, dump_path); DumpCNodeInputs(node, dump_path);
} }
@ -134,7 +134,7 @@ void CPUE2eDump::DumpParametersAndConst(const session::KernelGraph *graph, uint3
MS_LOG(INFO) << "Start e2e dump parameters and Const values"; MS_LOG(INFO) << "Start e2e dump parameters and Const values";
std::map<std::string, size_t> const_map; std::map<std::string, size_t> const_map;
GetConstantId(graph, &const_map); GetConstantId(graph, &const_map);
const std::string &dump_path = GenerateDumpPath(); const std::string &dump_path = GenerateDumpPath(graph_id);
// dump parameters // dump parameters
const auto &parameters = graph->inputs(); const auto &parameters = graph->inputs();

View File

@ -27,7 +27,6 @@ constexpr auto kCommonDumpSettings = "common_dump_settings";
constexpr auto kAsyncDumpSettings = "async_dump_settings"; constexpr auto kAsyncDumpSettings = "async_dump_settings";
constexpr auto kE2eDumpSettings = "e2e_dump_settings"; constexpr auto kE2eDumpSettings = "e2e_dump_settings";
constexpr auto kDumpMode = "dump_mode"; constexpr auto kDumpMode = "dump_mode";
constexpr auto kDumpFormat = "dump_format";
constexpr auto kPath = "path"; constexpr auto kPath = "path";
constexpr auto kNetName = "net_name"; constexpr auto kNetName = "net_name";
constexpr auto kIteration = "iteration"; constexpr auto kIteration = "iteration";
@ -44,8 +43,6 @@ constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG";
} // namespace } // namespace
namespace mindspore { namespace mindspore {
uint32_t DumpJsonParser::dump_format_ = 0;
auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) { auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) {
auto iter = content.find(key); auto iter = content.find(key);
if (iter == content.end()) { if (iter == content.end()) {
@ -116,7 +113,7 @@ void DumpJsonParser::Parse() {
JudgeDumpEnabled(); JudgeDumpEnabled();
} }
void DumpJsonParser::CopyJsonToDir() { void DumpJsonParser::CopyJsonToDir(uint32_t device_id) {
this->Parse(); this->Parse();
if (!IsDumpEnabled()) { if (!IsDumpEnabled()) {
return; return;
@ -127,7 +124,8 @@ void DumpJsonParser::CopyJsonToDir() {
} }
std::ifstream json_file(dump_config_file.value()); std::ifstream json_file(dump_config_file.value());
if (async_dump_enabled_ || e2e_dump_enabled_) { if (async_dump_enabled_ || e2e_dump_enabled_) {
auto realpath = Common::GetRealPath(path_ + "/.metadata/data_dump.json"); auto realpath =
Common::GetRealPath(path_ + "/rank_" + std::to_string(device_id) + "/.dump_metadata/data_dump.json");
if (!realpath.has_value()) { if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed in CopyJsonDir."; MS_LOG(ERROR) << "Get real path failed in CopyJsonDir.";
} }
@ -137,6 +135,47 @@ void DumpJsonParser::CopyJsonToDir() {
ChangeFileMode(realpath.value(), S_IRUSR); ChangeFileMode(realpath.value(), S_IRUSR);
} }
} }
void DumpJsonParser::CopyHcclJsonToDir(uint32_t device_id) {
if (!IsDumpEnabled()) {
return;
}
std::string config_path = common::GetEnv("MINDSPORE_HCCL_CONFIG_PATH");
if (config_path.empty()) {
return;
}
std::ifstream json_file(config_path);
auto realpath = Common::GetRealPath(path_ + "/rank_" + std::to_string(device_id) + "/.dump_metadata/hccl.json");
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed in CopyHcclJsonToDir.";
} else {
std::ofstream json_copy(realpath.value());
json_copy << json_file.rdbuf();
json_copy.close();
ChangeFileMode(realpath.value(), S_IRUSR);
}
}
void DumpJsonParser::CopyMSCfgJsonToDir(uint32_t device_id) {
if (!IsDumpEnabled()) {
return;
}
auto realpath = Common::GetRealPath(path_ + "/rank_" + std::to_string(device_id) + "/.dump_metadata/config.json");
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed in CopyMSConfigJsonToDir.";
} else {
nlohmann::json ms_info;
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
ms_info["device_target"] = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
ms_info["ms_version"] = "1.2.0";
std::ofstream json_create(realpath.value());
json_create << ms_info;
json_create.close();
ChangeFileMode(realpath.value(), S_IRUSR);
}
}
bool DumpJsonParser::GetIterDumpFlag() { bool DumpJsonParser::GetIterDumpFlag() {
return e2e_dump_enabled_ && (iteration_ == 0 || cur_dump_iter_ == iteration_); return e2e_dump_enabled_ && (iteration_ == 0 || cur_dump_iter_ == iteration_);
} }
@ -148,8 +187,7 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
return false; return false;
} }
std::string file_format = dump_format_ == 1 ? ".npy" : ".bin"; auto realpath = Common::GetRealPath(filename + ".npy");
auto realpath = Common::GetRealPath(filename + file_format);
if (!realpath.has_value()) { if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed."; MS_LOG(ERROR) << "Get real path failed.";
return false; return false;
@ -160,10 +198,8 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
return false; return false;
} }
if (dump_format_ == 1) { std::string npy_header = GenerateNpyHeader(shape, type);
std::string npy_header = GenerateNpyHeader(shape, type); fd << npy_header;
fd << npy_header;
}
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
fd.close(); fd.close();
return true; return true;
@ -186,7 +222,6 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) {
ParseInputOutput(*input_output); ParseInputOutput(*input_output);
ParseKernels(*kernels); ParseKernels(*kernels);
ParseSupportDevice(*support_device); ParseSupportDevice(*support_device);
ParseDumpFormat(*common_dump_settings);
} }
void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) { void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) {
@ -240,18 +275,7 @@ void DumpJsonParser::ParseDumpMode(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kDumpMode); CheckJsonUnsignedType(content, kDumpMode);
dump_mode_ = content; dump_mode_ = content;
if (dump_mode_ != 0 && dump_mode_ != 1) { if (dump_mode_ != 0 && dump_mode_ != 1) {
MS_LOG(EXCEPTION) << "Dump config parse failed, dump_mode should be 0 or 1, but got " << dump_format_; MS_LOG(EXCEPTION) << "Dump config parse failed, dump_mode should be 0 or 1, but got " << dump_mode_;
}
}
void DumpJsonParser::ParseDumpFormat(const nlohmann::json &content) {
auto iter = content.find(kDumpFormat);
if (iter == content.end()) {
return;
}
dump_format_ = *iter;
if (dump_format_ != 0 && dump_format_ != 1) {
MS_LOG(EXCEPTION) << "Dump config parse failed, dump_format should be 0(.bin) or 1(.npy), but got " << dump_format_;
} }
} }

View File

@ -35,7 +35,9 @@ class DumpJsonParser {
void Parse(); void Parse();
static bool DumpToFile(const std::string &filename, const void *data, size_t len, const ShapeVector &shape, static bool DumpToFile(const std::string &filename, const void *data, size_t len, const ShapeVector &shape,
TypeId type); TypeId type);
void CopyJsonToDir(); void CopyJsonToDir(uint32_t device_id);
void CopyHcclJsonToDir(uint32_t device_id);
void CopyMSCfgJsonToDir(uint32_t device_id);
bool NeedDump(const std::string &op_full_name) const; bool NeedDump(const std::string &op_full_name) const;
void MatchKernel(const std::string &kernel_name); void MatchKernel(const std::string &kernel_name);
void PrintUnusedKernel(); void PrintUnusedKernel();
@ -63,7 +65,6 @@ class DumpJsonParser {
DISABLE_COPY_AND_ASSIGN(DumpJsonParser) DISABLE_COPY_AND_ASSIGN(DumpJsonParser)
std::mutex lock_; std::mutex lock_;
static uint32_t dump_format_;
bool async_dump_enabled_{false}; bool async_dump_enabled_{false};
bool e2e_dump_enabled_{false}; bool e2e_dump_enabled_{false};
uint32_t dump_mode_{0}; uint32_t dump_mode_{0};
@ -86,7 +87,6 @@ class DumpJsonParser {
auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key); auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key);
void ParseDumpMode(const nlohmann::json &content); void ParseDumpMode(const nlohmann::json &content);
void ParseDumpFormat(const nlohmann::json &content);
void ParseDumpPath(const nlohmann::json &content); void ParseDumpPath(const nlohmann::json &content);
void ParseNetName(const nlohmann::json &content); void ParseNetName(const nlohmann::json &content);
void ParseIteration(const nlohmann::json &content); void ParseIteration(const nlohmann::json &content);

View File

@ -34,7 +34,7 @@ uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
return kernel_runtime->device_id(); return kernel_runtime->device_id();
} }
std::string GenerateDumpPath(const uint32_t *device_id) { std::string GenerateDumpPath(uint32_t graph_id, const uint32_t *device_id) {
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string net_name = dump_json_parser.net_name(); std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter()); std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
@ -42,12 +42,9 @@ std::string GenerateDumpPath(const uint32_t *device_id) {
if (dump_path.back() != '/') { if (dump_path.back() != '/') {
dump_path += "/"; dump_path += "/";
} }
if (device_id == nullptr) { uint32_t physical_device = device_id == nullptr ? 0 : ConvertPhysicalDeviceId(*device_id);
dump_path += (net_name + "/iteration_" + iterator); dump_path +=
} else { ("rank_" + std::to_string(physical_device) + "/" + net_name + "/" + std::to_string(graph_id) + "/" + iterator);
auto physical_device = ConvertPhysicalDeviceId(*device_id);
dump_path += (net_name + "/device_" + std::to_string(physical_device) + "/iteration_" + iterator);
}
return dump_path; return dump_path;
} }

View File

@ -27,7 +27,7 @@ namespace mindspore {
static const size_t PARAMETER_OUTPUT_INDEX = 0; static const size_t PARAMETER_OUTPUT_INDEX = 0;
static const size_t VALUE_NODE_OUTPUT_INDEX = 0; static const size_t VALUE_NODE_OUTPUT_INDEX = 0;
std::string GenerateDumpPath(const uint32_t *device_id = nullptr); std::string GenerateDumpPath(uint32_t graph_id, const uint32_t *device_id = nullptr);
void GetFileKernelName(NotNull<std::string *> kernel_name); void GetFileKernelName(NotNull<std::string *> kernel_name);

View File

@ -88,7 +88,13 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag); GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(node, j); auto type = AnfAlgo::GetOutputInferDataType(node, j);
auto device_type = AnfAlgo::GetOutputDeviceDataType(node, j); auto device_type = AnfAlgo::GetOutputDeviceDataType(node, j);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "output_" + std::to_string(j); std::string op_type = AnfAlgo::GetCNodeName(node);
uint32_t task_id = 0;
uint32_t stream_id = 0;
uint64_t timestamp = GetTimeStamp();
std::string file_path = dump_path + '/' + op_type + '.' + *kernel_name + '.' + std::to_string(task_id) + '.' +
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
std::to_string(j);
if (IsDeviceTargetGPU()) { if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), int_shapes, type, device_type, DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), int_shapes, type, device_type,
trans_flag, j, debugger); trans_flag, j, debugger);
@ -98,6 +104,12 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
} }
} }
uint64_t E2eDump::GetTimeStamp() {
auto cur_sys_time = std::chrono::system_clock::now();
uint64_t timestamp = std::chrono::duration_cast<std::chrono::microseconds>(cur_sys_time.time_since_epoch()).count();
return timestamp;
}
void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) { void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
@ -143,12 +155,16 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
tensor_name = node->fullname_with_scope(); tensor_name = node->fullname_with_scope();
slot = j; slot = j;
} }
ShapeVector int_shapes; ShapeVector int_shapes;
GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag); GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(input, index); auto type = AnfAlgo::GetOutputInferDataType(input, index);
auto device_type = AnfAlgo::GetOutputDeviceDataType(input, index); auto device_type = AnfAlgo::GetOutputDeviceDataType(input, index);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "input_" + std::to_string(j); std::string op_type = AnfAlgo::GetCNodeName(node);
uint64_t timestamp = GetTimeStamp();
uint32_t task_id = 0;
uint32_t stream_id = 0;
std::string file_path = dump_path + '/' + op_type + '.' + *kernel_name + '.' + std::to_string(task_id) + '.' +
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
if (IsDeviceTargetGPU()) { if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, tensor_name, NOT_NULL(addr), int_shapes, type, device_type, trans_flag, slot, DumpGPUMemToFile(file_path, tensor_name, NOT_NULL(addr), int_shapes, type, device_type, trans_flag, slot,
debugger); debugger);
@ -190,8 +206,7 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_
GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag); GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(anf_node, output_index); auto type = AnfAlgo::GetOutputInferDataType(anf_node, output_index);
auto device_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); auto device_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
std::string file_path = dump_path + '/' + dump_name + "_output_0";
std::string file_path = dump_path + '/' + dump_name + '_' + "output_0";
if (IsDeviceTargetGPU()) { if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node_name, NOT_NULL(addr), int_shapes, type, device_type, trans_flag, 0, debugger); DumpGPUMemToFile(file_path, node_name, NOT_NULL(addr), int_shapes, type, device_type, trans_flag, 0, debugger);
} else { } else {
@ -235,7 +250,7 @@ bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t device_id, co
} }
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter(); MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
MS_LOG(INFO) << "Current graph id is " << graph_id; MS_LOG(INFO) << "Current graph id is " << graph_id;
std::string dump_path = GenerateDumpPath(&device_id); std::string dump_path = GenerateDumpPath(graph_id, &device_id);
DumpInput(graph, dump_path, debugger); DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger); DumpOutput(graph, dump_path, debugger);

View File

@ -55,6 +55,9 @@ class E2eDump {
static void DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path, static void DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
bool trans_flag, std::map<std::string, size_t> *const_map, const Debugger *debugger); bool trans_flag, std::map<std::string, size_t> *const_map, const Debugger *debugger);
inline static unsigned int starting_graph_id = INT32_MAX; inline static unsigned int starting_graph_id = INT32_MAX;
// Get time stamp since epoch in microseconds
static uint64_t GetTimeStamp();
}; };
} // namespace mindspore } // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_ #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_

View File

@ -164,19 +164,11 @@ class TensorLoader {
MS_LOG(ERROR) << "Dump file path is null!"; MS_LOG(ERROR) << "Dump file path is null!";
return false; return false;
} }
std::string shape = "shape";
if (host_shape.size()) {
for (auto &value : host_shape) {
shape = shape + '_' + std::to_string(value);
}
} else {
shape = shape + "_0";
}
std::string path = ""; std::string path = "";
if (trans_flag) { if (trans_flag) {
path = filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt; path = filepath + '.' + host_fmt;
} else { } else {
path = filepath + '_' + shape + '_' + TypeIdToType(device_type)->ToString() + '_' + addr_format; path = filepath + '.' + addr_format;
} }
MS_LOG(INFO) << "Dump path is " << path; MS_LOG(INFO) << "Dump path is " << path;

View File

@ -667,16 +667,8 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
MS_LOG(ERROR) << "Dump file path is null!"; MS_LOG(ERROR) << "Dump file path is null!";
return ret; return ret;
} }
std::string shape = "shape";
if (host_shape.size()) {
for (auto &value : host_shape) {
shape = shape + '_' + std::to_string(value);
}
} else {
shape = shape + "_0";
}
if (trans_flag) { if (trans_flag) {
std::string path = filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt; std::string path = filepath + '.' + host_fmt;
MS_LOG(INFO) << "E2E Dump path is " << path; MS_LOG(INFO) << "E2E Dump path is " << path;
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape); mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
size_t host_size = out_tensor->data().nbytes(); size_t host_size = out_tensor->data().nbytes();
@ -692,7 +684,7 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
if (ret_rt_memcpy != RT_ERROR_NONE) { if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
} }
std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_; std::string path = filepath + '.' + format_;
MS_LOG(INFO) << "E2E Dump path is " << path; MS_LOG(INFO) << "E2E Dump path is " << path;
ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_, host_shape_, type_id_); ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_, host_shape_, type_id_);
} }

View File

@ -137,14 +137,13 @@ void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_inf
if (dump_path.empty()) { if (dump_path.empty()) {
MS_LOG(EXCEPTION) << "Dump path invalid"; MS_LOG(EXCEPTION) << "Dump path invalid";
} }
uint32_t graph_id = kernel_graph_->graph_id();
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
dump_info->set_dump_path("/" + dump_path + "/device_" + std::to_string(device_id) + "/"); dump_info->set_dump_path("/" + dump_path + "/rank_" + std::to_string(device_id) + "/");
MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path; MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path;
dump_info->set_model_name(DumpJsonParser::GetInstance().net_name());
dump_info->set_model_name(DumpJsonParser::GetInstance().net_name() + "_graph_" +
std::to_string(kernel_graph_->graph_id()));
dump_info->set_dump_step(std::to_string(DumpJsonParser::GetInstance().iteration())); dump_info->set_dump_step(std::to_string(DumpJsonParser::GetInstance().iteration()));
dump_info->set_model_id(kernel_graph_->graph_id()); dump_info->set_model_id(graph_id);
dump_info->set_flag(kAicpuLoadFlag); dump_info->set_flag(kAicpuLoadFlag);
const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors(); const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();

View File

@ -29,15 +29,7 @@ bool CPUDeviceAddress::DumpMemToFile(const std::string &filepath, const std::str
MS_LOG(ERROR) << "Dump file path is null!"; MS_LOG(ERROR) << "Dump file path is null!";
return ret; return ret;
} }
std::string shape = "shape"; std::string path = filepath + '.' + format_;
if (host_shape.empty()) {
shape += "_0";
} else {
for (auto &value : host_shape) {
shape += '_' + std::to_string(value);
}
}
std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_;
MS_LOG(DEBUG) << "E2E Dump path is " << path; MS_LOG(DEBUG) << "E2E Dump path is " << path;
ret = DumpJsonParser::DumpToFile(path, ptr_, size_, host_shape, host_type); ret = DumpJsonParser::DumpToFile(path, ptr_, size_, host_shape, host_type);
return ret; return ret;

View File

@ -1,16 +0,0 @@
{
"common_dump_settings": {
"dump_mode": 0,
"path": "/test",
"net_name": "Net",
"iteration": 0,
"input_output": 0,
"kernels": ["Default/Conv-op12"],
"support_device": [0,1,2,3,4,5,6,7],
"dump_format": 1
},
"e2e_dump_settings": {
"enable": true,
"trans_flag": false
}
}

View File

@ -65,7 +65,7 @@ def test_async_dump():
change_current_dump_json('async_dump.json', dump_path) change_current_dump_json('async_dump.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/async_dump.json" os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/async_dump.json"
device_id = context.get_context("device_id") device_id = context.get_context("device_id")
dump_file_path = dump_path + '/device_{}/Net_graph_0/0/0/'.format(device_id) dump_file_path = dump_path + '/rank_{}/Net/0/0/'.format(device_id)
if os.path.isdir(dump_path): if os.path.isdir(dump_path):
shutil.rmtree(dump_path) shutil.rmtree(dump_path)
add = Net() add = Net()
@ -74,92 +74,49 @@ def test_async_dump():
assert len(os.listdir(dump_file_path)) == 1 assert len(os.listdir(dump_file_path)) == 1
def run_e2e_dump_bin(): def run_e2e_dump():
if sys.platform != 'linux': if sys.platform != 'linux':
return return
pwd = os.getcwd() pwd = os.getcwd()
dump_path = pwd + '/e2e_dump' dump_path = pwd + '/e2e_dump'
change_current_dump_json('e2e_dump_bin.json', dump_path) change_current_dump_json('e2e_dump.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump_bin.json' os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump.json'
device_id = context.get_context("device_id") if context.get_context("device_target") == "Ascend":
dump_file_path = dump_path device_id = context.get_context("device_id")
else:
device_id = 0
dump_file_path = dump_path + '/rank_{}/Net/0/1/'.format(device_id)
if os.path.isdir(dump_path): if os.path.isdir(dump_path):
shutil.rmtree(dump_path) shutil.rmtree(dump_path)
add = Net() add = Net()
add(Tensor(x), Tensor(y)) add(Tensor(x), Tensor(y))
if context.get_context("device_target") == "Ascend": time.sleep(5)
dump_file_path += '/Net/device_{}/iteration_1/'.format(device_id) assert len(os.listdir(dump_file_path)) == 5
output_name = "Default--Add-op1_output_0_shape_2_3_Float32_DefaultFormat.bin" if context.get_context("device_target") == "CPU":
else: output_name = "Default--Add-op3_output_0.DefaultFormat.npy"
dump_file_path += '/Net/iteration_1/' output_path = dump_file_path + output_name
output_name = "Default--Add-op3_output_0_shape_2_3_Float32_DefaultFormat.bin" real_path = os.path.realpath(output_path)
output_path = dump_file_path + output_name output = np.load(real_path)
real_path = os.path.realpath(output_path) expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
output = np.fromfile(real_path, dtype=np.float32) assert output.dtype == expect.dtype
expect = np.array([8, 10, 12, 14, 16, 18], np.float32) assert np.array_equal(output, expect)
assert output.dtype == expect.dtype
assert np.array_equal(output, expect)
def run_e2e_dump_npy():
if sys.platform != 'linux':
return
pwd = os.getcwd()
dump_path = pwd + '/e2e_dump'
change_current_dump_json('e2e_dump_npy.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump_npy.json'
device_id = context.get_context("device_id")
dump_file_path = dump_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
add = Net()
add(Tensor(x), Tensor(y))
if context.get_context("device_target") == "Ascend":
dump_file_path += '/Net/device_{}/iteration_1/'.format(device_id)
output_name = "Default--Add-op1_output_0_shape_2_3_Float32_DefaultFormat.npy"
else:
dump_file_path += '/Net/iteration_1/'
output_name = "Default--Add-op3_output_0_shape_2_3_Float32_DefaultFormat.npy"
output_path = dump_file_path + output_name
real_path = os.path.realpath(output_path)
output = np.load(real_path)
expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
assert output.dtype == expect.dtype
assert np.array_equal(output, expect)
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_e2e_dump_bin(): def test_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
run_e2e_dump_bin() run_e2e_dump()
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_e2e_dump_npy():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
run_e2e_dump_npy()
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_x86_cpu @pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_cpu_e2e_dump_bin(): def test_cpu_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU") context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
run_e2e_dump_bin() run_e2e_dump()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cpu_e2e_dump_npy():
context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="CPU")
run_e2e_dump_npy()
class ReluReduceMeanDenseRelu(Cell): class ReluReduceMeanDenseRelu(Cell):
@ -216,7 +173,7 @@ def test_async_dump_net_multi_layer_mode1():
label = Tensor(np.zeros(shape=(32, 1000)).astype(np.float32)) label = Tensor(np.zeros(shape=(32, 1000)).astype(np.float32))
net_dict = train_network(inputs, label) net_dict = train_network(inputs, label)
dump_path = "/tmp/async_dump/{}/device_{}/test_graph_0/0/0/".format(test_name, device_id) dump_path = "/tmp/async_dump/{}/rank_{}/test/0/0/".format(test_name, device_id)
dump_file = os.listdir(dump_path) dump_file = os.listdir(dump_path)
dump_file_name = "" dump_file_name = ""
for file in dump_file: for file in dump_file:

View File

@ -31,7 +31,7 @@ class TestMemoryDumper : public UT::Common {
TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) { TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) {
int len = 1000; int len = 1000;
int data[1000] = {0}; int data[len] = {0};
for (uint32_t i = 0; i < len; i++) { for (uint32_t i = 0; i < len; i++) {
data[i] = i % 10; data[i] = i % 10;
} }
@ -41,15 +41,18 @@ TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) {
ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {10, 100}, kNumberTypeInt32); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {10, 100}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open((filename + ".bin").c_str(), O_RDONLY); int fd = open((filename + ".npy").c_str(), O_RDONLY);
int readBack[1000] = {0}; int header_size = 32;
int readSize = read(fd, readBack, len * sizeof(int)); int npylen = len + header_size;
int readBack[npylen] = {0};
int readSize = read(fd, readBack, npylen * sizeof(int));
(void)close(fd); (void)close(fd);
ASSERT_EQ(readSize, len * sizeof(int)); ASSERT_EQ(readSize, npylen * sizeof(int));
ret = true; ret = true;
for (uint32_t i = 0; i < len; i++) { for (uint32_t i = 0; i < len; i++) {
if (data[i] != readBack[i]) { // Skip the size of npy header.
if (data[i] != readBack[i+header_size]) {
ret = false; ret = false;
break; break;
} }
@ -63,7 +66,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) {
TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) { TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) {
int len = 1000; int len = 1000;
int data[1000] = {0}; int data[len] = {0};
for (uint32_t i = 0; i < len; i++) { for (uint32_t i = 0; i < len; i++) {
data[i] = i % 10; data[i] = i % 10;
} }
@ -73,15 +76,18 @@ TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) {
ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector{100, 10}, kNumberTypeInt32); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector{100, 10}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open((filename + ".bin").c_str(), O_RDONLY); int fd = open((filename + ".npy").c_str(), O_RDONLY);
int readBack[1000] = {0}; int header_size = 32;
int readSize = read(fd, readBack, len * sizeof(int)); int npylen = len + header_size;
int readBack[npylen] = {0};
int readSize = read(fd, readBack, npylen * sizeof(int));
(void)close(fd); (void)close(fd);
ASSERT_EQ(readSize, len * sizeof(int)); ASSERT_EQ(readSize, npylen * sizeof(int));
ret = true; ret = true;
for (uint32_t i = 0; i < len; i++) { for (uint32_t i = 0; i < len; i++) {
if (data[i] != readBack[i]) { // Skip the size of npy header.
if (data[i] != readBack[i+header_size]) {
ret = false; ret = false;
break; break;
} }
@ -105,7 +111,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileNotExistDir) {
int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {1,}, kNumberTypeInt32); int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {1,}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open((filename + ".bin").c_str(), O_RDONLY); int fd = open((filename + ".npy").c_str(), O_RDONLY);
int readBack[1000] = {0}; int readBack[1000] = {0};
int readSize = read(fd, readBack, len * sizeof(int)); int readSize = read(fd, readBack, len * sizeof(int));
(void)close(fd); (void)close(fd);
@ -113,7 +119,8 @@ TEST_F(TestMemoryDumper, test_DumpToFileNotExistDir) {
ret = true; ret = true;
for (uint32_t i = 0; i < len; i++) { for (uint32_t i = 0; i < len; i++) {
if (data[i] != readBack[i]) { // Skip the size of npy header.
if (data[i] != readBack[i+1]) {
ret = false; ret = false;
break; break;
} }