!12500 [MS][RDR] support recording task debug info

From: @louie5
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-02-24 20:02:57 +08:00 committed by Gitee
commit 013f26f2e0
12 changed files with 187 additions and 12 deletions

View File

@ -984,9 +984,6 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
#endif
MS_LOG(EXCEPTION) << "run task error!";
}
#ifdef ENABLE_DUMP_IR
mindspore::RDR::ClearAll();
#endif
MS_LOG(INFO) << "Finish!";
}

View File

@ -9,6 +9,9 @@ set(_DEBUG_SRC_LIST
)
if(ENABLE_DUMP_IR)
if(ENABLE_D)
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/rdr/task_debug_info_recorder.cc")
endif()
list(APPEND _DEBUG_SRC_LIST
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/base_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_exec_order_recorder.cc"
@ -29,7 +32,6 @@ if(ENABLE_DEBUGGER)
"${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc"
)
endif()
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
list(APPEND _DEBUG_SRC_LIST "data_dump/dump_json_parser.cc")

View File

@ -15,7 +15,6 @@
*/
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_
#define MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_
#include <memory>
#include <string>
#include <sstream>
@ -75,7 +74,6 @@ class BaseRecorder {
std::string filename_;
std::string timestamp_; // year,month,day,hour,minute,second
};
using BaseRecorderPtr = std::shared_ptr<BaseRecorder>;
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_

View File

@ -86,7 +86,6 @@ void GraphRecorder::Export() {
}
if (graph_type_.find(".pb") != std::string::npos) {
save_flag = true;
protobuf::DumpIRProto(realpath + ".pb", func_graph_); // save *.pb file
}
if (!save_flag) {

View File

@ -15,7 +15,6 @@
*/
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_GRAPH_RECORDER_H_
#define MINDSPORE_CCSRC_DEBUG_RDR_GRAPH_RECORDER_H_
#include <vector>
#include <string>
#include <memory>

View File

@ -22,7 +22,10 @@
#include "debug/rdr/stream_exec_order_recorder.h"
#include "mindspore/core/ir/func_graph.h"
#include "mindspore/core/ir/anf.h"
#ifdef ENABLE_D
#include "runtime/device/ascend/tasksink/task_generator.h"
#include "debug/rdr/task_debug_info_recorder.h"
#endif // ENABLE_D
namespace mindspore {
namespace {
static const char *GetSubModuleName(SubModuleId module_id) {
@ -59,6 +62,17 @@ static const char *GetSubModuleName(SubModuleId module_id) {
}
} // namespace
namespace RDR {
#ifdef ENABLE_D
bool RecordTaskDebugInfo(SubModuleId module, const std::string &tag,
const std::vector<TaskDebugInfoPtr> &task_debug_info_list, int graph_id) {
std::string submodule_name = std::string(GetSubModuleName(module));
TaskDebugInfoRecorderPtr task_debug_info_recorder =
std::make_shared<TaskDebugInfoRecorder>(submodule_name, tag, task_debug_info_list, graph_id);
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(task_debug_info_recorder));
return ans;
}
#endif // ENABLE_D
#ifdef __linux__
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
const std::string &file_type, int graph_id) {
@ -152,6 +166,15 @@ void TriggerAll() {
already_printed = true;
MS_LOG(WARNING) << "The RDR presently only support linux os.";
}
void ClearAll() {
static bool already_printed = false;
if (already_printed) {
return;
}
already_printed = true;
MS_LOG(WARNING) << "The RDR presently only support linux os.";
}
#endif // __linux__
} // namespace RDR
} // namespace mindspore

View File

@ -15,7 +15,6 @@
*/
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_RUNNING_DATA_RECORDER_H_
#define MINDSPORE_CCSRC_DEBUG_RDR_RUNNING_DATA_RECORDER_H_
#include <vector>
#include <string>
#include <memory>
@ -26,6 +25,16 @@ class FuncGraph;
class CNode;
using FuncGraphPtr = std::shared_ptr<FuncGraph>;
using CNodePtr = std::shared_ptr<CNode>;
#ifdef ENABLE_D
namespace device {
namespace ascend {
namespace tasksink {
class TaskDebugInfo;
} // namespace tasksink
} // namespace ascend
} // namespace device
using TaskDebugInfoPtr = std::shared_ptr<device::ascend::tasksink::TaskDebugInfo>;
#endif // ENABLE_D
namespace RDR {
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
const std::string &file_type = ".ir;.pb;.dat", int graph_id = -1);
@ -35,6 +44,10 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string
const std::string &filename = "");
bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id,
const std::vector<CNodePtr> &exec_order);
#ifdef ENABLE_D
bool RecordTaskDebugInfo(SubModuleId module, const std::string &tag,
const std::vector<TaskDebugInfoPtr> &task_debug_info_list, int graph_id = 0);
#endif // ENABLE_D
void TriggerAll();
void ClearAll();
} // namespace RDR

View File

@ -0,0 +1,28 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/rdr/task_debug_info_recorder.h"
#include "runtime/device/ascend/tasksink/task_generator.h"
namespace mindspore {
void TaskDebugInfoRecorder::Export() {
auto realpath = GetFileRealPath(std::to_string(graph_id_));
if (!realpath.has_value()) {
return;
}
std::string file_path = realpath.value() + ".ir";
device::ascend::tasksink::TaskGenerator::DumpTaskInfo(file_path, task_debug_info_);
}
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
#define MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
#include <vector>
#include <string>
#include <memory>
#include "debug/rdr/base_recorder.h"
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
class TaskDebugInfo;
} // namespace tasksink
} // namespace ascend
} // namespace device
using TaskDebugInfoPtr = std::shared_ptr<device::ascend::tasksink::TaskDebugInfo>;
class TaskDebugInfoRecorder : public BaseRecorder {
public:
TaskDebugInfoRecorder() {}
TaskDebugInfoRecorder(const std::string &module, const std::string &tag,
const std::vector<TaskDebugInfoPtr> &task_debug_info, int graph_id)
: BaseRecorder(module, tag), graph_id_(graph_id), task_debug_info_(task_debug_info) {}
virtual void Export();
private:
int graph_id_;
std::vector<TaskDebugInfoPtr> task_debug_info_;
};
using TaskDebugInfoRecorderPtr = std::shared_ptr<TaskDebugInfoRecorder>;
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_

View File

@ -62,6 +62,9 @@
#include "transform/graph_ir/df_graph_manager.h"
#include "transform/graph_ir/op_adapter_map.h"
#endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif
namespace mindspore {
// namespace to support intermediate representation definition
@ -346,6 +349,9 @@ void ExecutorPy::DelNetRes(const std::string &id) {
void ExecutorPy::ClearRes() {
MS_LOG(INFO) << "Clean executor resource!";
Resource::mem_cleaner().ClearPrimitivePyPythonObj();
#ifdef ENABLE_DUMP_IR
mindspore::RDR::ClearAll();
#endif
executor_ = nullptr;
}

View File

@ -250,6 +250,56 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
}
#ifdef ENABLE_DUMP_IR
void TaskGenerator::DumpTaskInfo(const string &real_filename,
const std::vector<TaskDebugInfoPtr> &task_debug_info_list) {
OrderedMap<AnfNodePtr, int32_t> para_map;
ChangeFileMode(real_filename, S_IRWXU);
std::ofstream fout(real_filename);
if (!fout.is_open()) {
MS_LOG(ERROR) << "Open dump file '" << real_filename << "' failed!";
return;
}
size_t index = 0;
for (auto &task_debug_info : task_debug_info_list) {
fout << "op_name:" << task_debug_info->op_name_ << "\n"
<< "task_index:" << index << "\t"
<< "task_num:" << task_debug_info->task_num_ << "\t"
<< "task0_stream_id:" << task_debug_info->stream_id_ << "\t"
<< "task0_type:" << task_debug_info->type_ << "\t"
<< "task0_dump_flag:" << task_debug_info->dump_flag_ << "\n";
index++;
if (task_debug_info->input_addrs_.size()) {
fout << "input address:";
for (auto &input : task_debug_info->input_addrs_) {
fout << input->addr << "(" << input->size << ")\t";
}
fout << "\n";
}
if (task_debug_info->output_addrs_.size()) {
fout << "output address:";
for (auto &output : task_debug_info->output_addrs_) {
fout << output->addr << "(" << output->size << ")\t";
}
fout << "\n";
}
if (task_debug_info->workspace_addrs_.size()) {
fout << "workspace address:";
for (auto &workspace : task_debug_info->workspace_addrs_) {
fout << workspace->addr << "(" << workspace->size << ")\t";
}
fout << "\n";
}
fout << "\n";
}
fout.close();
// set file mode to read only by user
ChangeFileMode(real_filename, S_IRUSR);
}
void TaskGenerator::DumpTaskInfo(const std::string &real_filename) {
if (real_filename.size() > PATH_MAX) {
MS_LOG(ERROR) << "File path " << real_filename << " is too long.";
@ -322,8 +372,18 @@ void TaskGenerator::DumpTaskInfo(const std::string &real_filename) {
return;
}
already_printed = true;
MS_LOG(WARNING) << "The functionality of dumping function graph IR is disabled, "
<< "please recompile source to enable it. See help of building script.";
MS_LOG(WARNING) << "The functionality of dumping task debug info is disabled, "
<< "please enable ENABLE_DUMP_IR with '-D on' and recomiple source.";
}
void TaskGenerator::DumpTaskInfo(const string &real_filename,
const std::vector<TaskDebugInfoPtr> &task_debug_info_list) {
static bool already_printed = false;
if (already_printed) {
return;
}
already_printed = true;
MS_LOG(WARNING) << "The functionality of dumping task debug info is disabled, "
<< "please enable ENABLE_DUMP_IR with '-D on' and recomiple source.";
}
#endif
} // namespace tasksink

View File

@ -58,6 +58,8 @@ class TaskGenerator {
bool GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
uint32_t graph_id);
std::vector<TaskDebugInfoPtr> GetTaskDebugInfo() const { return task_debug_info_list_; }
static void DumpTaskInfo(const string &real_filename, const std::vector<TaskDebugInfoPtr> &task_debug_info_list);
private:
std::vector<TaskDebugInfoPtr> task_debug_info_list_;