!49787 add pynative mode operator overflow check for dump

Merge pull request !49787 from maoyaomin/mym_debugger_kernel_dumper
This commit is contained in:
i-robot 2023-03-08 06:42:11 +00:00 committed by Gitee
commit 9236c3b4e9
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 55 additions and 62 deletions

View File

@ -88,9 +88,6 @@ bool DumpJsonParser::IsDumpEnabled() {
auto context = MsContext::GetInstance(); auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context); MS_EXCEPTION_IF_NULL(context);
if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
MS_LOG(EXCEPTION) << "Dump is disabled in PyNative mode. Please set mode to GRAPH_MODE in context.";
}
return true; return true;
} }

View File

@ -555,12 +555,8 @@ void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
return; return;
} }
std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/"; std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
std::string graph_str; std::string graph_str =
if (Debugger::GetInstance()->GetAscendKernelByKernelFlag()) { IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
graph_str = std::to_string(graph->graph_id());
} else {
graph_str = IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
}
std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv"; std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
auto real_path = Common::CreatePrefixPath(file_name_to_check); auto real_path = Common::CreatePrefixPath(file_name_to_check);
if (!real_path.has_value()) { if (!real_path.has_value()) {

View File

@ -513,7 +513,12 @@ void Debugger::DumpParamsAndConstAndHistory() {
for (auto kernel_graph = executed_graph_ptr_set_.cbegin(); kernel_graph != executed_graph_ptr_set_.cend(); for (auto kernel_graph = executed_graph_ptr_set_.cbegin(); kernel_graph != executed_graph_ptr_set_.cend();
++kernel_graph) { ++kernel_graph) {
// Dump graph run hisotry for each graph. // Dump graph run hisotry for each graph.
E2eDump::DumpRunIter(*kernel_graph, GetRankID()); if (Debugger::GetInstance()->GetAscendKernelByKernelFlag() &&
(*kernel_graph)->graph_id() != (*kernel_graph)->root_graph_id()) {
MS_LOG(INFO) << "current graph graph_id = " << (*kernel_graph)->graph_id() << " is not root graph.";
} else {
E2eDump::DumpRunIter(*kernel_graph, GetRankID());
}
} }
if (!cur_root_graph_checked) { if (!cur_root_graph_checked) {
visited_root_graph_ids_.push_back(cur_root_graph_id_); visited_root_graph_ids_.push_back(cur_root_graph_id_);

View File

@ -215,7 +215,7 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
auto graph_id = kernel_graph->graph_id(); auto graph_id = kernel_graph->graph_id();
// for GPU, nodes are dumped in graph_id directory. // for GPU, nodes are dumped in graph_id directory.
if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) { if (IsDeviceTargetGPU()) {
debugger->DumpSingleNode(cnode, graph_id); debugger->DumpSingleNode(cnode, graph_id);
} else { } else {
// for Ascend, node are dumped in root_graph_id directory. // for Ascend, node are dumped in root_graph_id directory.

View File

@ -16,6 +16,7 @@
#include "plugin/device/ascend/hal/device/dump/kernel_dumper.h" #include "plugin/device/ascend/hal/device/dump/kernel_dumper.h"
#include <algorithm> #include <algorithm>
#include <utility>
#ifndef ENABLE_SECURITY #ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h" #include "debug/data_dump/dump_json_parser.h"
#endif #endif
@ -44,7 +45,7 @@ static constexpr uint64_t kOpDebugMemorySize = 2048;
const size_t kDebugP2pSize = 8UL; const size_t kDebugP2pSize = 8UL;
} // namespace } // namespace
DUMPER_REG(kAscendDevice, KernelDumper); DUMPER_REG(kAscendDevice, KernelDumper);
std::mutex KernelDumper::debug_register_mutex_; std::mutex KernelDumper::dumper_mutex_;
std::map<rtStream_t, std::unique_ptr<OpDebugTask>> KernelDumper::op_debug_tasks; std::map<rtStream_t, std::unique_ptr<OpDebugTask>> KernelDumper::op_debug_tasks;
std::map<uint32_t, bool> KernelDumper::is_data_map; std::map<uint32_t, bool> KernelDumper::is_data_map;
std::map<std::string, std::string> KernelDumper::stream_task_graphs; std::map<std::string, std::string> KernelDumper::stream_task_graphs;
@ -80,9 +81,17 @@ KernelDumper::~KernelDumper() {
} }
void KernelDumper::OpLoadDumpInfo(const CNodePtr &kernel) { void KernelDumper::OpLoadDumpInfo(const CNodePtr &kernel) {
std::lock_guard<std::mutex> lock(debug_register_mutex_); auto stream = AscendStreamMng::GetInstance().GetStream(AnfAlgo::GetStreamId(kernel));
aicpu::dump::OpMappingInfo dump_info; if (stream == nullptr) {
SetOpMappingInfo(NOT_NULL(&dump_info), kernel); stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex);
}
if (DumpJsonParser::GetInstance().op_debug_mode() > 0) {
auto rt_ret = rtStreamSynchronize(stream);
dumper_mutex_.unlock();
if (rt_ret != ACL_ERROR_RT_AICORE_OVER_FLOW) {
return;
}
}
if (!KernelNeedDump(kernel)) { if (!KernelNeedDump(kernel)) {
return; return;
@ -91,10 +100,9 @@ void KernelDumper::OpLoadDumpInfo(const CNodePtr &kernel) {
MS_LOG(WARNING) << "[KernelDumper] kernel [" << kernel->UniqueName() << "] is a non-task node, skip dump."; MS_LOG(WARNING) << "[KernelDumper] kernel [" << kernel->UniqueName() << "] is a non-task node, skip dump.";
return; return;
} }
auto stream = AscendStreamMng::GetInstance().GetStream(AnfAlgo::GetStreamId(kernel)); aicpu::dump::OpMappingInfo dump_info;
if (stream == nullptr) { SetOpMappingInfo(NOT_NULL(&dump_info), kernel);
stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex);
}
DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope()); DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope());
aicpu::dump::Task task; aicpu::dump::Task task;
ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
@ -105,7 +113,7 @@ void KernelDumper::OpLoadDumpInfo(const CNodePtr &kernel) {
graph_id_ = AnfAlgo::GetGraphId(kernel.get()); graph_id_ = AnfAlgo::GetGraphId(kernel.get());
std::string stream_task_id = std::to_string(stream_id_) + std::to_string(task_id_); std::string stream_task_id = std::to_string(stream_id_) + std::to_string(task_id_);
KernelDumper::stream_task_graphs.emplace(stream_task_id, kernel->fullname_with_scope()); KernelDumper::stream_task_graphs.emplace(stream_task_id, kernel->fullname_with_scope());
MS_LOG(INFO) << "[DataDump] Get runtime info graph_id:" << graph_id_ << " stream_id:" << stream_id_ MS_LOG(INFO) << "[KernelDumper] Get runtime info graph_id:" << graph_id_ << " stream_id:" << stream_id_
<< " task_id:" << task_id_ << " fullname:" << kernel->fullname_with_scope(); << " task_id:" << task_id_ << " fullname:" << kernel->fullname_with_scope();
} }
@ -114,12 +122,12 @@ void KernelDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_i
dump_info->set_dump_path(dump_path_); dump_info->set_dump_path(dump_path_);
dump_info->set_model_name(net_name_); dump_info->set_model_name(net_name_);
dump_info->set_dump_step(iteration_); dump_info->set_dump_step(iteration_);
auto graph_id = AnfAlgo::GetGraphId(kernel.get());
dump_info->set_model_id(graph_id);
dump_info->set_flag(kAicpuLoadFlag);
FuncGraphPtr f_graph = kernel->func_graph(); FuncGraphPtr f_graph = kernel->func_graph();
auto kernel_graph_ = f_graph->cast<KernelGraphPtr>(); auto kernel_graph_ = f_graph->cast<KernelGraphPtr>();
auto root_graph_id = kernel_graph_->root_graph_id();
dump_info->set_model_id(root_graph_id);
dump_info->set_flag(kAicpuLoadFlag);
auto input_ctrl_tensors = kernel_graph_->device_loop_control_tensors(); auto input_ctrl_tensors = kernel_graph_->device_loop_control_tensors();
if (input_ctrl_tensors.size() > 0) { if (input_ctrl_tensors.size() > 0) {
auto kCurLoopCountName = "current_loop_count"; auto kCurLoopCountName = "current_loop_count";
@ -225,7 +233,6 @@ void KernelDumper::ExecutorDumpOp(const aicpu::dump::OpMappingInfo &op_mapping_i
MS_LOG(ERROR) << "[KernelDumper] Call rt api rtCpuKernelLaunch Failed, rt_ret = " << rt_ret; MS_LOG(ERROR) << "[KernelDumper] Call rt api rtCpuKernelLaunch Failed, rt_ret = " << rt_ret;
return; return;
} }
rtStreamSynchronize(stream_);
} }
void KernelDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) { void KernelDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) {
@ -375,7 +382,6 @@ void KernelDumper::MallocP2PDebugMem(const void *const op_debug_addr) {
} }
void KernelDumper::OpDebugRegisterForStream(const CNodePtr &kernel) { void KernelDumper::OpDebugRegisterForStream(const CNodePtr &kernel) {
std::lock_guard<std::mutex> lock(register_mutex_);
uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode(); uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
auto iter = kOverflowModeStr.find(op_debug_mode); auto iter = kOverflowModeStr.find(op_debug_mode);
if (iter == kOverflowModeStr.end()) { if (iter == kOverflowModeStr.end()) {
@ -384,6 +390,7 @@ void KernelDumper::OpDebugRegisterForStream(const CNodePtr &kernel) {
if (op_debug_mode == kNoOverflow) { if (op_debug_mode == kNoOverflow) {
return; return;
} }
dumper_mutex_.lock();
auto stream = AscendStreamMng::GetInstance().GetStream(AnfAlgo::GetStreamId(kernel)); auto stream = AscendStreamMng::GetInstance().GetStream(AnfAlgo::GetStreamId(kernel));
if (stream == nullptr) { if (stream == nullptr) {
stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex); stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex);
@ -391,6 +398,8 @@ void KernelDumper::OpDebugRegisterForStream(const CNodePtr &kernel) {
if (KernelDumper::op_debug_tasks.find(stream) != KernelDumper::op_debug_tasks.end()) { if (KernelDumper::op_debug_tasks.find(stream) != KernelDumper::op_debug_tasks.end()) {
return; return;
} else { } else {
std::string stream_id = std::to_string(AnfAlgo::GetStreamId(kernel));
KernelDumper::stream_task_graphs.emplace(stream_id, "KernelDumper");
auto graph_id = AnfAlgo::GetGraphId(kernel.get()); auto graph_id = AnfAlgo::GetGraphId(kernel.get());
if (KernelDumper::is_data_map.find(graph_id) != KernelDumper::is_data_map.end()) { if (KernelDumper::is_data_map.find(graph_id) != KernelDumper::is_data_map.end()) {
return; return;

View File

@ -72,6 +72,7 @@ class KernelDumper : public debug::OverflowDumper {
static std::map<rtStream_t, std::unique_ptr<OpDebugTask>> op_debug_tasks; static std::map<rtStream_t, std::unique_ptr<OpDebugTask>> op_debug_tasks;
static std::map<uint32_t, bool> is_data_map; static std::map<uint32_t, bool> is_data_map;
static std::map<std::string, std::string> stream_task_graphs; static std::map<std::string, std::string> stream_task_graphs;
static std::mutex dumper_mutex_;
string dump_path_; string dump_path_;
string net_name_; string net_name_;
@ -79,7 +80,6 @@ class KernelDumper : public debug::OverflowDumper {
private: private:
// Support multi-thread. // Support multi-thread.
static std::mutex debug_register_mutex_;
bool load_flag_; bool load_flag_;
uint32_t graph_id_; uint32_t graph_id_;
uint32_t task_id_{0U}; uint32_t task_id_{0U};
@ -91,7 +91,6 @@ class KernelDumper : public debug::OverflowDumper {
void *dev_load_mem_ = nullptr; void *dev_load_mem_ = nullptr;
void *proto_dev_mem_ = nullptr; void *proto_dev_mem_ = nullptr;
void *proto_size_dev_mem_ = nullptr; void *proto_size_dev_mem_ = nullptr;
std::mutex register_mutex_;
std::string overflow_dump_filename = "debug_files"; std::string overflow_dump_filename = "debug_files";
void *p2p_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr;
void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info, const CNodePtr &kernel); void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info, const CNodePtr &kernel);

View File

@ -43,6 +43,7 @@
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h" #include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h" #include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
#include "plugin/device/ascend/hal/device/dump/ascend_dump.h" #include "plugin/device/ascend/hal/device/dump/ascend_dump.h"
#include "debug/data_dump/overflow_dumper.h"
using Adx::AdxRegDumpProcessCallBack; using Adx::AdxRegDumpProcessCallBack;
using mindspore::device::ascend::ProfilingManager; using mindspore::device::ascend::ProfilingManager;
@ -370,7 +371,13 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex); stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex);
} }
MS_EXCEPTION_IF_NULL(stream); MS_EXCEPTION_IF_NULL(stream);
#ifdef ENABLE_DEBUGGER
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
auto register_dumper = debug::OverflowDumper::GetInstance(kAscendDevice);
register_dumper->Init();
register_dumper->OpDebugRegisterForStream(kernel);
}
#endif
bool is_dynamic_shape = common::AnfAlgo::IsDynamicShape(kernel); bool is_dynamic_shape = common::AnfAlgo::IsDynamicShape(kernel);
if (!is_dynamic_shape || !(common::AnfAlgo::GetBooleanAttr(kernel, kAttrMSFunction))) { if (!is_dynamic_shape || !(common::AnfAlgo::GetBooleanAttr(kernel, kAttrMSFunction))) {
auto iter = node_atomics_persistent_cache_.find(kernel); auto iter = node_atomics_persistent_cache_.find(kernel);
@ -399,6 +406,12 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
return false; return false;
} }
} }
#ifdef ENABLE_DEBUGGER
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
auto kernel_dumper = debug::OverflowDumper::GetInstance(kAscendDevice);
kernel_dumper->OpLoadDumpInfo(kernel);
}
#endif
#ifndef ENABLE_SECURITY #ifndef ENABLE_SECURITY
auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance(); auto ascend_instance = profiler::ascend::AscendProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(ascend_instance); MS_EXCEPTION_IF_NULL(ascend_instance);

View File

@ -24,7 +24,6 @@
#ifndef ENABLE_SECURITY #ifndef ENABLE_SECURITY
#include "debug/data_dump/cpu_e2e_dump.h" #include "debug/data_dump/cpu_e2e_dump.h"
#include "debug/data_dump/e2e_dump.h" #include "debug/data_dump/e2e_dump.h"
#include "debug/data_dump/overflow_dumper.h"
#include "utils/ms_context.h" #include "utils/ms_context.h"
#endif #endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
@ -80,18 +79,9 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
#endif #endif
} else if (device_context->GetDeviceType() == device::DeviceType::kAscend) { } else if (device_context->GetDeviceType() == device::DeviceType::kAscend) {
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#ifndef ENABLE_SECURITY
auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
graph_id_sets_.insert(kernel_graph->graph_id());
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
auto kernel_dumper = debug::OverflowDumper::GetInstance(kAscendDevice);
kernel_dumper->Init();
kernel_dumper->OpDebugRegisterForStream(cnode);
kernel_dumper->OpLoadDumpInfo(cnode);
}
#endif
auto debugger = Debugger::GetInstance(); auto debugger = Debugger::GetInstance();
if (debugger != nullptr) { if (debugger != nullptr) {
auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
debugger->InsertExecutedGraph(kernel_graph); debugger->InsertExecutedGraph(kernel_graph);
debugger->SetAscendKernelByKernelFlag(true); debugger->SetAscendKernelByKernelFlag(true);
bool read_data = CheckReadData(cnode); bool read_data = CheckReadData(cnode);
@ -186,7 +176,7 @@ void DebugActor::DebugOnStepBegin(const std::vector<KernelGraphPtr> &graphs,
return kernel->fullname_with_scope().find("InitDataSetQueue") != std::string::npos; return kernel->fullname_with_scope().find("InitDataSetQueue") != std::string::npos;
}); });
} }
if (!is_data_map_ && !graphs[0]->is_graph_run_mode()) { if (!is_data_map_) {
auto kCurLoopCountName = "current_loop_count"; auto kCurLoopCountName = "current_loop_count";
for (size_t i = 0; i < graphs.size(); i++) { for (size_t i = 0; i < graphs.size(); i++) {
const auto &graph_ = graphs[i]; const auto &graph_ = graphs[i];
@ -200,7 +190,7 @@ void DebugActor::DebugOnStepBegin(const std::vector<KernelGraphPtr> &graphs,
} }
auto tensor = device_loop_control_tensors.at(kCurLoopCountName); auto tensor = device_loop_control_tensors.at(kCurLoopCountName);
MS_EXCEPTION_IF_NULL(tensor); MS_EXCEPTION_IF_NULL(tensor);
auto *cur_val = static_cast<int32_t *>(tensor->data_c()); auto *cur_val = static_cast<int64_t *>(tensor->data_c());
MS_EXCEPTION_IF_NULL(cur_val); MS_EXCEPTION_IF_NULL(cur_val);
*cur_val = current_step; *cur_val = current_step;
tensor->set_sync_status(kNeedSyncHostToDevice); tensor->set_sync_status(kNeedSyncHostToDevice);
@ -236,21 +226,6 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const
} }
#endif #endif
#ifdef ENABLE_DEBUGGER
#ifndef ENABLE_SECURITY
if (DumpJsonParser::GetInstance().async_dump_enabled() && DumpJsonParser::GetInstance().op_debug_mode() > 0 &&
Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
uint32_t rank_id = Debugger::GetRankID();
std::set<uint32_t>::iterator graph_id_iter;
for (graph_id_iter = graph_id_sets_.begin(); graph_id_iter != graph_id_sets_.end(); ++graph_id_iter) {
auto graph_id = *graph_id_iter;
DeleteNoOverflowFile(rank_id, graph_id);
}
graph_id_sets_.clear();
}
#endif
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance(); auto debugger = Debugger::GetInstance();
if (debugger != nullptr) { if (debugger != nullptr) {

View File

@ -55,7 +55,7 @@ class DebugActor : public ActorBase {
// The debug on step end. // The debug on step end.
void DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid); void DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid);
static inline uint32_t current_step{0}; static inline uint64_t current_step{0};
private: private:
// class members // class members
@ -63,7 +63,6 @@ class DebugActor : public ActorBase {
// Support multi-thread. // Support multi-thread.
std::mutex debug_mutex_; std::mutex debug_mutex_;
std::set<uint32_t> graph_id_sets_;
}; };
} // namespace runtime } // namespace runtime

View File

@ -101,8 +101,8 @@ def run_multi_root_graph_dump(device, dump_mode, test_name):
time.sleep(2) time.sleep(2)
execution_order_path = os.path.join(dump_path, 'rank_0', 'execution_order') execution_order_path = os.path.join(dump_path, 'rank_0', 'execution_order')
# Multi root graph script: check dump data dir and graph history files and see if iteration number is matched. # Multi root graph script: check dump data dir and graph history files and see if iteration number is matched.
if device == "GPU" or os.environ.get('GRAPH_OP_RUN') == "1": if device == "GPU":
# In GPU or KernelByKernel, we have 4 kernel graphs folders under rank_0 dir. # In GPU, we have 4 kernel graphs folders under rank_0 dir.
# In graph history dir, there are 2 files for each graph (ms_execution_order and ms_global_execution_order). # In graph history dir, there are 2 files for each graph (ms_execution_order and ms_global_execution_order).
assert len(os.listdir(dump_file_path)) == 4 assert len(os.listdir(dump_file_path)) == 4
assert len(os.listdir(execution_order_path)) == 8 assert len(os.listdir(execution_order_path)) == 8
@ -111,7 +111,7 @@ def run_multi_root_graph_dump(device, dump_mode, test_name):
check_graph_structure(dump_file_path, execution_order_path, '2', ['1', '3']) check_graph_structure(dump_file_path, execution_order_path, '2', ['1', '3'])
check_graph_structure(dump_file_path, execution_order_path, '3', ['5']) check_graph_structure(dump_file_path, execution_order_path, '3', ['5'])
else: else:
# In Ascend Super Kernel, we have 2 root graphs folders under rank_0 dir. # In Ascend, we have 2 root graphs folders under rank_0 dir.
# In graph history dir, there are 4 ms_execution_order files and 2 ms_global_execution_order files. # In graph history dir, there are 4 ms_execution_order files and 2 ms_global_execution_order files.
# Each graph should have 3 iterations. Each graph was executed once per epoch. # Each graph should have 3 iterations. Each graph was executed once per epoch.
# Graph 0 was executed in even iterations, graph 1 was executed in odd iterations. # Graph 0 was executed in even iterations, graph 1 was executed in odd iterations.