forked from mindspore-Ecosystem/mindspore
!12500 [MS][RDR] support recording task debug info
From: @louie5 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
013f26f2e0
|
@ -984,9 +984,6 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
|
|||
#endif
|
||||
MS_LOG(EXCEPTION) << "run task error!";
|
||||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
mindspore::RDR::ClearAll();
|
||||
#endif
|
||||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,9 @@ set(_DEBUG_SRC_LIST
|
|||
)
|
||||
|
||||
if(ENABLE_DUMP_IR)
|
||||
if(ENABLE_D)
|
||||
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/rdr/task_debug_info_recorder.cc")
|
||||
endif()
|
||||
list(APPEND _DEBUG_SRC_LIST
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/base_recorder.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_exec_order_recorder.cc"
|
||||
|
@ -29,7 +32,6 @@ if(ENABLE_DEBUGGER)
|
|||
"${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
|
||||
list(APPEND _DEBUG_SRC_LIST "data_dump/dump_json_parser.cc")
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
@ -75,7 +74,6 @@ class BaseRecorder {
|
|||
std::string filename_;
|
||||
std::string timestamp_; // year,month,day,hour,minute,second
|
||||
};
|
||||
|
||||
using BaseRecorderPtr = std::shared_ptr<BaseRecorder>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_BASE_RECORDER_H_
|
||||
|
|
|
@ -86,7 +86,6 @@ void GraphRecorder::Export() {
|
|||
}
|
||||
if (graph_type_.find(".pb") != std::string::npos) {
|
||||
save_flag = true;
|
||||
|
||||
protobuf::DumpIRProto(realpath + ".pb", func_graph_); // save *.pb file
|
||||
}
|
||||
if (!save_flag) {
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_GRAPH_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_RDR_GRAPH_RECORDER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
|
|
@ -22,7 +22,10 @@
|
|||
#include "debug/rdr/stream_exec_order_recorder.h"
|
||||
#include "mindspore/core/ir/func_graph.h"
|
||||
#include "mindspore/core/ir/anf.h"
|
||||
|
||||
#ifdef ENABLE_D
|
||||
#include "runtime/device/ascend/tasksink/task_generator.h"
|
||||
#include "debug/rdr/task_debug_info_recorder.h"
|
||||
#endif // ENABLE_D
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
static const char *GetSubModuleName(SubModuleId module_id) {
|
||||
|
@ -59,6 +62,17 @@ static const char *GetSubModuleName(SubModuleId module_id) {
|
|||
}
|
||||
} // namespace
|
||||
namespace RDR {
|
||||
#ifdef ENABLE_D
|
||||
bool RecordTaskDebugInfo(SubModuleId module, const std::string &tag,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list, int graph_id) {
|
||||
std::string submodule_name = std::string(GetSubModuleName(module));
|
||||
TaskDebugInfoRecorderPtr task_debug_info_recorder =
|
||||
std::make_shared<TaskDebugInfoRecorder>(submodule_name, tag, task_debug_info_list, graph_id);
|
||||
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(task_debug_info_recorder));
|
||||
return ans;
|
||||
}
|
||||
#endif // ENABLE_D
|
||||
|
||||
#ifdef __linux__
|
||||
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
|
||||
const std::string &file_type, int graph_id) {
|
||||
|
@ -152,6 +166,15 @@ void TriggerAll() {
|
|||
already_printed = true;
|
||||
MS_LOG(WARNING) << "The RDR presently only support linux os.";
|
||||
}
|
||||
|
||||
void ClearAll() {
|
||||
static bool already_printed = false;
|
||||
if (already_printed) {
|
||||
return;
|
||||
}
|
||||
already_printed = true;
|
||||
MS_LOG(WARNING) << "The RDR presently only support linux os.";
|
||||
}
|
||||
#endif // __linux__
|
||||
} // namespace RDR
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_RUNNING_DATA_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_RDR_RUNNING_DATA_RECORDER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
@ -26,6 +25,16 @@ class FuncGraph;
|
|||
class CNode;
|
||||
using FuncGraphPtr = std::shared_ptr<FuncGraph>;
|
||||
using CNodePtr = std::shared_ptr<CNode>;
|
||||
#ifdef ENABLE_D
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
namespace tasksink {
|
||||
class TaskDebugInfo;
|
||||
} // namespace tasksink
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
using TaskDebugInfoPtr = std::shared_ptr<device::ascend::tasksink::TaskDebugInfo>;
|
||||
#endif // ENABLE_D
|
||||
namespace RDR {
|
||||
bool RecordAnfGraph(const SubModuleId module, const std::string &tag, const FuncGraphPtr &graph, bool full_name,
|
||||
const std::string &file_type = ".ir;.pb;.dat", int graph_id = -1);
|
||||
|
@ -35,6 +44,10 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string
|
|||
const std::string &filename = "");
|
||||
bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id,
|
||||
const std::vector<CNodePtr> &exec_order);
|
||||
#ifdef ENABLE_D
|
||||
bool RecordTaskDebugInfo(SubModuleId module, const std::string &tag,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list, int graph_id = 0);
|
||||
#endif // ENABLE_D
|
||||
void TriggerAll();
|
||||
void ClearAll();
|
||||
} // namespace RDR
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "debug/rdr/task_debug_info_recorder.h"
|
||||
#include "runtime/device/ascend/tasksink/task_generator.h"
|
||||
|
||||
namespace mindspore {
|
||||
void TaskDebugInfoRecorder::Export() {
|
||||
auto realpath = GetFileRealPath(std::to_string(graph_id_));
|
||||
if (!realpath.has_value()) {
|
||||
return;
|
||||
}
|
||||
std::string file_path = realpath.value() + ".ir";
|
||||
device::ascend::tasksink::TaskGenerator::DumpTaskInfo(file_path, task_debug_info_);
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "debug/rdr/base_recorder.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
namespace tasksink {
|
||||
class TaskDebugInfo;
|
||||
} // namespace tasksink
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
|
||||
using TaskDebugInfoPtr = std::shared_ptr<device::ascend::tasksink::TaskDebugInfo>;
|
||||
class TaskDebugInfoRecorder : public BaseRecorder {
|
||||
public:
|
||||
TaskDebugInfoRecorder() {}
|
||||
TaskDebugInfoRecorder(const std::string &module, const std::string &tag,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info, int graph_id)
|
||||
: BaseRecorder(module, tag), graph_id_(graph_id), task_debug_info_(task_debug_info) {}
|
||||
virtual void Export();
|
||||
|
||||
private:
|
||||
int graph_id_;
|
||||
std::vector<TaskDebugInfoPtr> task_debug_info_;
|
||||
};
|
||||
using TaskDebugInfoRecorderPtr = std::shared_ptr<TaskDebugInfoRecorder>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
|
@ -62,6 +62,9 @@
|
|||
#include "transform/graph_ir/df_graph_manager.h"
|
||||
#include "transform/graph_ir/op_adapter_map.h"
|
||||
#endif
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
#include "debug/rdr/running_data_recorder.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
// namespace to support intermediate representation definition
|
||||
|
@ -346,6 +349,9 @@ void ExecutorPy::DelNetRes(const std::string &id) {
|
|||
void ExecutorPy::ClearRes() {
|
||||
MS_LOG(INFO) << "Clean executor resource!";
|
||||
Resource::mem_cleaner().ClearPrimitivePyPythonObj();
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
mindspore::RDR::ClearAll();
|
||||
#endif
|
||||
executor_ = nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -250,6 +250,56 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
|
|||
}
|
||||
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
void TaskGenerator::DumpTaskInfo(const string &real_filename,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list) {
|
||||
OrderedMap<AnfNodePtr, int32_t> para_map;
|
||||
ChangeFileMode(real_filename, S_IRWXU);
|
||||
std::ofstream fout(real_filename);
|
||||
|
||||
if (!fout.is_open()) {
|
||||
MS_LOG(ERROR) << "Open dump file '" << real_filename << "' failed!";
|
||||
return;
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
for (auto &task_debug_info : task_debug_info_list) {
|
||||
fout << "op_name:" << task_debug_info->op_name_ << "\n"
|
||||
<< "task_index:" << index << "\t"
|
||||
<< "task_num:" << task_debug_info->task_num_ << "\t"
|
||||
<< "task0_stream_id:" << task_debug_info->stream_id_ << "\t"
|
||||
<< "task0_type:" << task_debug_info->type_ << "\t"
|
||||
<< "task0_dump_flag:" << task_debug_info->dump_flag_ << "\n";
|
||||
index++;
|
||||
if (task_debug_info->input_addrs_.size()) {
|
||||
fout << "input address:";
|
||||
for (auto &input : task_debug_info->input_addrs_) {
|
||||
fout << input->addr << "(" << input->size << ")\t";
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
|
||||
if (task_debug_info->output_addrs_.size()) {
|
||||
fout << "output address:";
|
||||
for (auto &output : task_debug_info->output_addrs_) {
|
||||
fout << output->addr << "(" << output->size << ")\t";
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
|
||||
if (task_debug_info->workspace_addrs_.size()) {
|
||||
fout << "workspace address:";
|
||||
for (auto &workspace : task_debug_info->workspace_addrs_) {
|
||||
fout << workspace->addr << "(" << workspace->size << ")\t";
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
|
||||
fout.close();
|
||||
// set file mode to read only by user
|
||||
ChangeFileMode(real_filename, S_IRUSR);
|
||||
}
|
||||
void TaskGenerator::DumpTaskInfo(const std::string &real_filename) {
|
||||
if (real_filename.size() > PATH_MAX) {
|
||||
MS_LOG(ERROR) << "File path " << real_filename << " is too long.";
|
||||
|
@ -322,8 +372,18 @@ void TaskGenerator::DumpTaskInfo(const std::string &real_filename) {
|
|||
return;
|
||||
}
|
||||
already_printed = true;
|
||||
MS_LOG(WARNING) << "The functionality of dumping function graph IR is disabled, "
|
||||
<< "please recompile source to enable it. See help of building script.";
|
||||
MS_LOG(WARNING) << "The functionality of dumping task debug info is disabled, "
|
||||
<< "please enable ENABLE_DUMP_IR with '-D on' and recomiple source.";
|
||||
}
|
||||
void TaskGenerator::DumpTaskInfo(const string &real_filename,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list) {
|
||||
static bool already_printed = false;
|
||||
if (already_printed) {
|
||||
return;
|
||||
}
|
||||
already_printed = true;
|
||||
MS_LOG(WARNING) << "The functionality of dumping task debug info is disabled, "
|
||||
<< "please enable ENABLE_DUMP_IR with '-D on' and recomiple source.";
|
||||
}
|
||||
#endif
|
||||
} // namespace tasksink
|
||||
|
|
|
@ -58,6 +58,8 @@ class TaskGenerator {
|
|||
|
||||
bool GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
|
||||
uint32_t graph_id);
|
||||
std::vector<TaskDebugInfoPtr> GetTaskDebugInfo() const { return task_debug_info_list_; }
|
||||
static void DumpTaskInfo(const string &real_filename, const std::vector<TaskDebugInfoPtr> &task_debug_info_list);
|
||||
|
||||
private:
|
||||
std::vector<TaskDebugInfoPtr> task_debug_info_list_;
|
||||
|
|
Loading…
Reference in New Issue