!12550 [MS][RDR] recording func_graph in pipeline and task debug info

From: @louie5
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-02-27 15:22:24 +08:00 committed by Gitee
commit 5524280075
8 changed files with 45 additions and 15 deletions

View File

@ -1017,7 +1017,7 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs)
for (auto &graph : all_graphs) { for (auto &graph : all_graphs) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
std::string tag = "graph_build"; std::string tag = "graph_build";
mindspore::RDR::RecordAnfGraph(SUBMODULE_ID, tag, graph, true, ".ir;.pb", graph->graph_id()); mindspore::RDR::RecordAnfGraph(SUBMODULE_ID, tag, graph, true, ".ir;.pb");
if (save_graphs) { if (save_graphs) {
std::string file_name = "graph_build_" + std::to_string(graph->graph_id()) + ".ir"; std::string file_name = "graph_build_" + std::to_string(graph->graph_id()) + ".ir";
DumpIR(file_name, graph, true, kWholeStack); DumpIR(file_name, graph, true, kWholeStack);

View File

@ -86,6 +86,9 @@
#include "ps/util.h" #include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h" #include "ps/ps_cache/ps_cache_manager.h"
#endif #endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif
namespace mindspore { namespace mindspore {
namespace session { namespace session {
@ -408,6 +411,10 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
} }
// Build kernel if node is cnode // Build kernel if node is cnode
BuildKernel(graph); BuildKernel(graph);
#ifdef ENABLE_DUMP_IR
std::string tag = "graph_build";
mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, tag, graph, false, ".ir,.pb");
#endif
// Get summary nodes. // Get summary nodes.
SetSummaryNodes(graph.get()); SetSummaryNodes(graph.get());
// Dump .pb graph after graph optimization // Dump .pb graph after graph optimization

View File

@ -16,6 +16,7 @@
#include "debug/rdr/graph_recorder.h" #include "debug/rdr/graph_recorder.h"
#include "mindspore/core/base/base.h" #include "mindspore/core/base/base.h"
#include "mindspore/core/ir/func_graph.h" #include "mindspore/core/ir/func_graph.h"
#include "backend/session/kernel_graph.h"
#include "mindspore/core/utils/log_adapter.h" #include "mindspore/core/utils/log_adapter.h"
#include "debug/anf_ir_dump.h" #include "debug/anf_ir_dump.h"
#include "debug/anf_ir_utils.h" #include "debug/anf_ir_utils.h"
@ -57,16 +58,17 @@ void DumpIRProto(const std::string &, const FuncGraphPtr &) {
void GraphRecorder::Export() { void GraphRecorder::Export() {
bool save_flag = false; bool save_flag = false;
int graph_id = -1;
auto tmp_realpath = GetFileRealPath(); if (func_graph_->isa<session::KernelGraph>()) {
auto kernel_graph = func_graph_->cast<KernelGraphPtr>();
graph_id = kernel_graph->graph_id();
}
std::string suffix = graph_id >= 0 ? std::to_string(graph_id) : "";
auto tmp_realpath = GetFileRealPath(suffix);
if (!tmp_realpath.has_value()) { if (!tmp_realpath.has_value()) {
return; return;
} }
std::string realpath = tmp_realpath.value(); std::string realpath = tmp_realpath.value();
if (graph_id_ >= 0) {
realpath += "_" + std::to_string(graph_id_);
}
if (graph_type_.find(".dat") != std::string::npos) { if (graph_type_.find(".dat") != std::string::npos) {
save_flag = true; save_flag = true;
AnfExporter exporter(""); AnfExporter exporter("");
@ -81,7 +83,7 @@ void GraphRecorder::Export() {
if (full_name_) { if (full_name_) {
DumpIRForRDR(realpath_ir, func_graph_, true, kTopStack); DumpIRForRDR(realpath_ir, func_graph_, true, kTopStack);
} else { } else {
DumpIRForRDR(realpath_ir, func_graph_, false, kOff); DumpIRForRDR(realpath_ir, func_graph_, false, kWholeStack);
} }
} }
if (graph_type_.find(".pb") != std::string::npos) { if (graph_type_.find(".pb") != std::string::npos) {

View File

@ -28,8 +28,8 @@ class GraphRecorder : public BaseRecorder {
public: public:
GraphRecorder() : BaseRecorder(), func_graph_(nullptr), graph_type_("") {} GraphRecorder() : BaseRecorder(), func_graph_(nullptr), graph_type_("") {}
GraphRecorder(const std::string &module, const std::string &tag, const FuncGraphPtr &graph, GraphRecorder(const std::string &module, const std::string &tag, const FuncGraphPtr &graph,
const std::string &file_type, int graph_id) const std::string &file_type)
: BaseRecorder(module, tag), func_graph_(graph), graph_type_(file_type), graph_id_(graph_id) {} : BaseRecorder(module, tag), func_graph_(graph), graph_type_(file_type) {}
~GraphRecorder() {} ~GraphRecorder() {}
void SetModule(const std::string &module) { module_ = module; } void SetModule(const std::string &module) { module_ = module; }
void SetGraphType(const std::string &file_type) { graph_type_ = file_type; } void SetGraphType(const std::string &file_type) { graph_type_ = file_type; }
@ -41,7 +41,6 @@ class GraphRecorder : public BaseRecorder {
private: private:
FuncGraphPtr func_graph_; FuncGraphPtr func_graph_;
std::string graph_type_; std::string graph_type_;
int graph_id_;
bool full_name_{false}; bool full_name_{false};
}; };
using GraphRecorderPtr = std::shared_ptr<GraphRecorder>; using GraphRecorderPtr = std::shared_ptr<GraphRecorder>;

View File

@ -75,9 +75,9 @@ bool RecordTaskDebugInfo(SubModuleId module, const std::string &tag,
#ifdef __linux__ #ifdef __linux__
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name, bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
const std::string &file_type, int graph_id) { const std::string &file_type) {
std::string submodule_name = std::string(GetSubModuleName(module)); std::string submodule_name = std::string(GetSubModuleName(module));
GraphRecorderPtr graph_recorder = std::make_shared<GraphRecorder>(submodule_name, tag, graph, file_type, graph_id); GraphRecorderPtr graph_recorder = std::make_shared<GraphRecorder>(submodule_name, tag, graph, file_type);
graph_recorder->SetDumpFlag(full_name); graph_recorder->SetDumpFlag(full_name);
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(graph_recorder)); bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(graph_recorder));
return ans; return ans;
@ -115,7 +115,7 @@ void ClearAll() { mindspore::RecorderManager::Instance().ClearAll(); }
#else #else
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name, bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
const std::string &file_type, int graph_id) { const std::string &file_type) {
static bool already_printed = false; static bool already_printed = false;
std::string submodule_name = std::string(GetSubModuleName(module)); std::string submodule_name = std::string(GetSubModuleName(module));
if (already_printed) { if (already_printed) {

View File

@ -37,7 +37,7 @@ using TaskDebugInfoPtr = std::shared_ptr<device::ascend::tasksink::TaskDebugInfo
#endif // ENABLE_D #endif // ENABLE_D
namespace RDR { namespace RDR {
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name, bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
const std::string &file_type = ".ir;.pb;.dat", int graph_id = -1); const std::string &file_type = ".ir;.pb;.dat");
bool RecordGraphExecOrder(const SubModuleId module, const std::string &tag, bool RecordGraphExecOrder(const SubModuleId module, const std::string &tag,
const std::vector<CNodePtr> &final_exec_order, int graph_id = 0); const std::vector<CNodePtr> &final_exec_order, int graph_id = 0);
bool RecordString(SubModuleId module, const std::string &tag, const std::string &data, bool RecordString(SubModuleId module, const std::string &tag, const std::string &data,

View File

@ -724,6 +724,21 @@ void Pipeline::Run() {
if (!result) { if (!result) {
MS_LOG(EXCEPTION) << "Pipeline running to end, failed in step:" << action.first; MS_LOG(EXCEPTION) << "Pipeline running to end, failed in step:" << action.first;
} }
#ifdef ENABLE_DUMP_IR
MS_LOG(INFO) << "Clone func_graph.";
std::string tag = GetBaseNameForIR(i, action.first);
if (resource_->func_graph() != nullptr) {
auto graph_clone = BasicClone(resource_->func_graph());
if (graph_clone != nullptr) {
mindspore::RDR::RecordAnfGraph(SUBMODULE_ID, tag, graph_clone, false, ".ir");
} else {
MS_LOG(WARNING) << "Clone func_graph failed in pipeline, no func_graph recording in RDR.";
}
} else {
MS_LOG(WARNING) << "Resource's func_graph is empty in pipeline, no func_graph recording in RDR";
}
MS_LOG(INFO) << "Clone func_graph end.";
#endif
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG) && resource_->func_graph() != nullptr) { if (MsContext::GetInstance()->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG) && resource_->func_graph() != nullptr) {
auto graph = resource_->func_graph(); auto graph = resource_->func_graph();
if (graph != nullptr) { if (graph != nullptr) {

View File

@ -21,6 +21,9 @@
#include "utils/ms_utils.h" #include "utils/ms_utils.h"
#include "runtime/device/ascend/profiling/profiling_utils.h" #include "runtime/device/ascend/profiling/profiling_utils.h"
#include "runtime/device/ascend/profiling/profiling_manager.h" #include "runtime/device/ascend/profiling/profiling_manager.h"
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif
namespace mindspore { namespace mindspore {
namespace device { namespace device {
@ -36,6 +39,10 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
return false; return false;
} }
MS_LOG(INFO) << "GenTasks end..."; MS_LOG(INFO) << "GenTasks end...";
#ifdef ENABLE_DUMP_IR
string task_info_tag = "task_info_graph";
mindspore::RDR::RecordTaskDebugInfo(SUBMODULE_ID, task_info_tag, task_debug_info_list_, graph_id);
#endif
auto context_ptr = MsContext::GetInstance(); auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(context_ptr);
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG); bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);