Combine Async Dump and E2E Dump

This commit is contained in:
caifubi 2020-09-12 22:11:39 +08:00 committed by chujinjin
parent daf6739b22
commit 372c2e7951
30 changed files with 878 additions and 984 deletions

View File

@ -23,7 +23,7 @@ usage()
{ {
echo "Usage:" echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" echo " [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I arm64|arm32|x86_64] [-K] \\" echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I arm64|arm32|x86_64] [-K] \\"
echo " [-B on|off] [-w on|off] [-E] [-l on|off] [-n full|lite|off]" echo " [-B on|off] [-w on|off] [-E] [-l on|off] [-n full|lite|off]"
echo "" echo ""
@ -46,7 +46,6 @@ usage()
echo " -j[n] Set the threads when building (Default: -j8)" echo " -j[n] Set the threads when building (Default: -j8)"
echo " -e Use gpu, d or cpu" echo " -e Use gpu, d or cpu"
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -Q Enable dump memory, default off"
echo " -D Enable dumping of function graph ir, default on" echo " -D Enable dumping of function graph ir, default on"
echo " -z Compile dataset & mindrecord, default on" echo " -z Compile dataset & mindrecord, default on"
echo " -n Compile minddata with mindspore lite, available: off, lite, full, default is lite" echo " -n Compile minddata with mindspore lite, available: off, lite, full, default is lite"
@ -89,7 +88,6 @@ checkopts()
ENABLE_LOAD_IR="off" ENABLE_LOAD_IR="off"
ENABLE_TIMELINE="off" ENABLE_TIMELINE="off"
ENABLE_DUMP2PROTO="on" ENABLE_DUMP2PROTO="on"
ENABLE_DUMPE2E="off"
ENABLE_DUMP_IR="on" ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on" COMPILE_MINDDATA="on"
COMPILE_MINDDATA_LITE="off" COMPILE_MINDDATA_LITE="off"
@ -108,7 +106,7 @@ checkopts()
ENABLE_GPU="off" ENABLE_GPU="off"
# Process the options # Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:swB:En:T:' opt while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:swB:En:T:' opt
do do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in case "${opt}" in
@ -229,11 +227,6 @@ checkopts()
ENABLE_DUMP2PROTO="$OPTARG" ENABLE_DUMP2PROTO="$OPTARG"
echo "enable dump anf graph to proto file" echo "enable dump anf graph to proto file"
;; ;;
Q)
check_on_off $OPTARG Q
ENABLE_DUMPE2E="$OPTARG"
echo "enable dump end to end"
;;
D) D)
check_on_off $OPTARG D check_on_off $OPTARG D
ENABLE_DUMP_IR="$OPTARG" ENABLE_DUMP_IR="$OPTARG"
@ -301,9 +294,6 @@ checkopts()
done done
} }
checkopts "$@" checkopts "$@"
if [[ "X$ENABLE_GPU" = "Xon" ]] && [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
ENABLE_DEBUGGER="on"
fi
echo "---------------- MindSpore: build start ----------------" echo "---------------- MindSpore: build start ----------------"
mkdir -pv "${BUILD_PATH}/package/mindspore/lib" mkdir -pv "${BUILD_PATH}/package/mindspore/lib"
git submodule update --init graphengine git submodule update --init graphengine
@ -350,9 +340,6 @@ build_mindspore()
if [[ "X$ENABLE_DUMP2PROTO" = "Xon" ]]; then if [[ "X$ENABLE_DUMP2PROTO" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_PROTO=ON" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_PROTO=ON"
fi fi
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then if [[ "X$ENABLE_MPI" = "Xon" ]]; then

View File

@ -13,7 +13,6 @@ option(USE_GLOG "Use glog to output log" OFF)
option(ENABLE_PROFILE "Enable pipeline profile, default off" OFF) option(ENABLE_PROFILE "Enable pipeline profile, default off" OFF)
option(ENABLE_TIMELINE "Enable time line record" OFF) option(ENABLE_TIMELINE "Enable time line record" OFF)
option(ENABLE_DUMP_PROTO "Enable dump anf graph to file in ProtoBuffer format, default on" ON) option(ENABLE_DUMP_PROTO "Enable dump anf graph to file in ProtoBuffer format, default on" ON)
option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF)
option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON) option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
option(ENABLE_MPI "enable mpi" OFF) option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF) option(ENABLE_AKG "enable akg" OFF)
@ -116,10 +115,6 @@ if(ENABLE_MINDDATA)
endif() endif()
endif() endif()
if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E)
endif()
if(ENABLE_DEBUGGER) if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER) add_compile_definitions(ENABLE_DEBUGGER)
endif() endif()

View File

@ -1,17 +1,19 @@
{ {
"DumpSettings": { "common_dump_settings": {
"dump_mode": 0,
"path": "/test",
"net_name": "ResNet50", "net_name": "ResNet50",
"dump_mode": 1,
"op_debug_mode": 3,
"iteration": 0, "iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] "input_output": 0,
"kernels": ["Default/Conv-op12"],
"support_device": [0,1,2,3,4,5,6,7]
}, },
"e2e_dump_settings": {
"DumpSettingsSpec": { "enable": false,
"net_name": "net name eg:ResNet50", "trans_flag": false
"dump_mode": "0: dump all kernels, 1: dump kernels in kernels list", },
"op_debug_mode": "0: close debug, 1: debug ai-core overflow, 2: debug atomic overflow, 3: debug all overflow", "async_dump_settings": {
"iteration": "specified iteration ", "enable": false,
"kernels": "op's full scope name which need to be dump" "op_debug_mode": 0
} }
} }

View File

@ -21,7 +21,7 @@
#include <memory> #include <memory>
#include "framework/ge_runtime/task_info.h" #include "framework/ge_runtime/task_info.h"
#include "backend/kernel_compiler/kernel.h" #include "backend/kernel_compiler/kernel.h"
#include "debug/data_dump_parser.h" #include "debug/data_dump/dump_json_parser.h"
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore { namespace mindspore {
@ -32,7 +32,9 @@ class AscendKernelMod : public KernelMod {
const std::vector<AddressPtr> &, uint32_t) = 0; const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; } uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; } uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() { return DataDumpParser::GetInstance().NeedDump(kernel_name_); } virtual bool NeedDump() {
return DumpJsonParser::GetInstance().NeedDump(kernel_name_) && DumpJsonParser::GetInstance().async_dump_enabled();
}
protected: protected:
uint32_t block_dim_{1}; uint32_t block_dim_{1};

View File

@ -38,8 +38,10 @@
#include "backend/optimizer/common/helper.h" #include "backend/optimizer/common/helper.h"
#include "runtime/device/kernel_runtime_manager.h" #include "runtime/device/kernel_runtime_manager.h"
#include "utils/config_manager.h" #include "utils/config_manager.h"
#include "debug/data_dump/dump_json_parser.h"
#include "debug/tensor_load.h" #include "debug/tensor_load.h"
#include "backend/optimizer/graph_kernel/basic_ops_fusion.h" #include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/anf_ir_dump.h" #include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h" #include "debug/dump_proto.h"
@ -329,8 +331,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
LoadTensor(kernel_graph); LoadTensor(kernel_graph);
} }
#endif #endif
// dump used for debug
Dump(kernel_graph);
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
// debugger post-execution processing // debugger post-execution processing
if (debugger_) { if (debugger_) {
@ -565,6 +565,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance); MS_EXCEPTION_IF_NULL(runtime_instance);
bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink); bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink);
Dump(kernel_graph);
if (!ret_ok) { if (!ret_ok) {
MS_LOG(EXCEPTION) << "run task error!"; MS_LOG(EXCEPTION) << "run task error!";
} }
@ -574,9 +575,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "Start!"; MS_LOG(INFO) << "Start!";
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_); E2eDumpUtil::DumpData(kernel_graph.get());
MS_EXCEPTION_IF_NULL(runtime_instance);
(void)runtime_instance->DumpData(kernel_graph.get());
MS_LOG(INFO) << "Finish!"; MS_LOG(INFO) << "Finish!";
} }

View File

@ -47,6 +47,7 @@
#include "utils/ms_utils.h" #include "utils/ms_utils.h"
#include "common/trans.h" #include "common/trans.h"
#include "utils/ms_context.h" #include "utils/ms_context.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/tensor_load.h" #include "debug/tensor_load.h"
#include "debug/dump_proto.h" #include "debug/dump_proto.h"
@ -350,14 +351,10 @@ void GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const { void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
#ifdef ENABLE_DUMP_E2E
if (debugger_->DebuggerBackendEnabled()) { if (debugger_->DebuggerBackendEnabled()) {
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); E2eDumpUtil::DumpData(kernel_graph.get(), debugger_.get());
MS_EXCEPTION_IF_NULL(runtime_instance);
(void)runtime_instance->DumpData(kernel_graph.get(), debugger_.get());
} }
#endif
} }
bool GPUSession::DumpDataEnabledIteration() const { bool GPUSession::DumpDataEnabledIteration() const {

View File

@ -16,16 +16,11 @@ if (ENABLE_DEBUGGER)
) )
endif (ENABLE_DEBUGGER) endif (ENABLE_DEBUGGER)
if (ENABLE_D) if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
list(APPEND _DEBUG_SRC_LIST list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc" list(APPEND _DEBUG_SRC_LIST "data_dump/dump_json_parser.cc")
) list(APPEND _DEBUG_SRC_LIST "data_dump/e2e_dump_util.cc")
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
endif() endif()
if (ENABLE_DUMP_E2E)
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
endif (ENABLE_DUMP_E2E)
set_property(SOURCE ${_DEBUG_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG) set_property(SOURCE ${_DEBUG_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG)
add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST}) add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST})

View File

@ -0,0 +1,401 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump/dump_json_parser.h"
#include <fstream>
#include "utils/log_adapter.h"
#include "debug/common.h"
#include "utils/ms_context.h"
#include "utils/convert_utils_base.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace {
constexpr auto kCommonDumpSettings = "common_dump_settings";
constexpr auto kAsyncDumpSettings = "async_dump_settings";
constexpr auto kE2eDumpSettings = "e2e_dump_settings";
constexpr auto kDumpMode = "dump_mode";
constexpr auto kPath = "path";
constexpr auto kNetName = "net_name";
constexpr auto kIteration = "iteration";
constexpr auto kInputOutput = "input_output";
constexpr auto kKernels = "kernels";
constexpr auto kSupportDevice = "support_device";
constexpr auto kEnable = "enable";
constexpr auto kOpDebugMode = "op_debug_mode";
constexpr auto kTransFlag = "trans_flag";
constexpr auto kDumpInputAndOutput = 0;
constexpr auto kDumpInputOnly = 1;
constexpr auto kDumpOutputOnly = 2;
constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG";
} // namespace
namespace mindspore {
auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) {
auto iter = content.find(key);
if (iter == content.end()) {
MS_LOG(EXCEPTION) << "Check dump json failed, " << key << " not found";
}
return iter;
}
std::string GetIfstreamString(const std::ifstream &ifstream) {
std::stringstream buffer;
buffer << ifstream.rdbuf();
return buffer.str();
}
bool DumpJsonParser::IsDumpEnabled() {
auto config_path = std::getenv(kMindsporeDumpConfig);
if (config_path == nullptr) {
MS_LOG(INFO) << "Dump config path is null";
return false;
}
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
MS_LOG(INFO) << "Dump is disabled in PyNative mode";
return false;
}
return true;
}
void DumpJsonParser::Parse() {
std::lock_guard<std::mutex> guard(lock_);
if (!IsDumpEnabled()) {
return;
}
auto dump_config_file = Common::GetConfigFile(kMindsporeDumpConfig);
if (!dump_config_file.has_value()) {
MS_LOG(EXCEPTION) << "Get dump config file failed";
}
std::ifstream json_file(dump_config_file.value());
if (!json_file.is_open()) {
MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed.";
}
nlohmann::json j;
try {
json_file >> j;
} catch (nlohmann::json::parse_error &e) {
MS_LOG(ERROR) << "Dump json contents:" << GetIfstreamString(json_file);
MS_LOG(EXCEPTION) << "Parse dump json failed, error:" << e.what();
}
// convert json to string
std::stringstream ss;
ss << j;
std::string cfg = ss.str();
MS_LOG(INFO) << "Dump json:" << cfg;
ParseCommonDumpSetting(j);
ParseAsyncDumpSetting(j);
ParseE2eDumpSetting(j);
JudgeDumpEnabled();
}
bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len) {
if (filename.empty() || data == nullptr || len == 0) {
MS_LOG(ERROR) << "Incorrect parameter.";
return false;
}
auto realpath = Common::GetRealPath(filename);
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed.";
return false;
}
std::ofstream fd;
fd.open(realpath.value(), std::ios::binary | std::ios::out);
if (!fd.is_open()) {
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
return false;
}
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
fd.close();
return true;
}
void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) {
auto common_dump_settings = CheckJsonKeyExist(content, kCommonDumpSettings);
auto dump_mode = CheckJsonKeyExist(*common_dump_settings, kDumpMode);
auto path = CheckJsonKeyExist(*common_dump_settings, kPath);
auto net_name = CheckJsonKeyExist(*common_dump_settings, kNetName);
auto iteration = CheckJsonKeyExist(*common_dump_settings, kIteration);
auto input_output = CheckJsonKeyExist(*common_dump_settings, kInputOutput);
auto kernels = CheckJsonKeyExist(*common_dump_settings, kKernels);
auto support_device = CheckJsonKeyExist(*common_dump_settings, kSupportDevice);
ParseDumpMode(*dump_mode);
ParseDumpPath(*path);
ParseNetName(*net_name);
ParseIteration(*iteration);
ParseInputOutput(*input_output);
ParseKernels(*kernels);
ParseSupportDevice(*support_device);
}
void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) {
// async dump setting is optional
auto async_dump_setting = content.find(kAsyncDumpSettings);
if (async_dump_setting == content.end()) {
MS_LOG(INFO) << "No async_dump_settings";
return;
}
auto async_dump_enable = CheckJsonKeyExist(*async_dump_setting, kEnable);
auto op_debug_mode = CheckJsonKeyExist(*async_dump_setting, kOpDebugMode);
async_dump_enabled_ = ParseEnable(*async_dump_enable);
ParseOpDebugMode(*op_debug_mode);
}
void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) {
auto e2e_dump_setting = CheckJsonKeyExist(content, kE2eDumpSettings);
if (e2e_dump_setting == content.end()) {
MS_LOG(INFO) << "No e2e_dump_settings";
return;
}
auto e2e_dump_enable = CheckJsonKeyExist(*e2e_dump_setting, kEnable);
auto trans_flag = CheckJsonKeyExist(*e2e_dump_setting, kTransFlag);
e2e_dump_enabled_ = ParseEnable(*e2e_dump_enable);
trans_flag_ = ParseEnable(*trans_flag);
}
void CheckJsonUnsignedType(const nlohmann::json &content, const std::string &key) {
if (!content.is_number_unsigned()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be unsigned int type";
}
}
void CheckJsonStringType(const nlohmann::json &content, const std::string &key) {
if (!content.is_string()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be string type";
}
}
void CheckJsonArrayType(const nlohmann::json &content, const std::string &key) {
if (!content.is_array()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be array type";
}
}
void DumpJsonParser::ParseDumpMode(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kDumpMode);
dump_mode_ = content;
if (dump_mode_ != 0 && dump_mode_ != 1) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. dump_mode should be 0 or 1";
}
}
void DumpJsonParser::ParseDumpPath(const nlohmann::json &content) {
CheckJsonStringType(content, kPath);
path_ = content;
if (!std::all_of(path_.begin(), path_.end(),
[](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) {
MS_LOG(EXCEPTION) << "Dump path only support alphabets, digit or {'-', '_', '/'}, but got:" << path_;
}
if (path_.empty()) {
MS_LOG(EXCEPTION) << "Dump path is empty";
}
}
void DumpJsonParser::ParseNetName(const nlohmann::json &content) {
CheckJsonStringType(content, kNetName);
net_name_ = content;
if (!std::all_of(net_name_.begin(), net_name_.end(),
[](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_'; })) {
MS_LOG(EXCEPTION) << "Dump path only support alphabets, digit or {'-', '_'}, but got:" << net_name_;
}
}
void DumpJsonParser::ParseIteration(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kIteration);
iteration_ = content;
}
void DumpJsonParser::ParseInputOutput(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kInputOutput);
input_output_ = content;
if (input_output_ < 0 || input_output_ > 2) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. input_output should be 0, 1, 2";
}
}
void DumpJsonParser::ParseKernels(const nlohmann::json &content) {
CheckJsonArrayType(content, kKernels);
for (const auto &kernel : content) {
auto kernel_str = kernel.dump();
kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
MS_LOG(INFO) << "Need dump kernel:" << kernel_str;
auto ret = kernels_.try_emplace({kernel_str, 0});
if (!ret.second) {
MS_LOG(WARNING) << "Duplicate dump kernel name:" << kernel_str;
}
}
}
void DumpJsonParser::ParseSupportDevice(const nlohmann::json &content) {
CheckJsonArrayType(content, kSupportDevice);
for (const auto &device : content) {
uint32_t device_id = device;
MS_LOG(INFO) << "Dump support device:" << device_id;
auto ret = support_devices_.emplace(device_id);
if (!ret.second) {
MS_LOG(WARNING) << "Duplicate support device:" << device_id;
}
}
}
bool DumpJsonParser::ParseEnable(const nlohmann::json &content) {
if (!content.is_boolean()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. 'enable' should be boolean type";
}
return content;
}
void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kOpDebugMode);
op_debug_mode_ = content;
if (op_debug_mode_ < 0 || op_debug_mode_ > 3) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. op_debug_mode should be 0, 1, 2, 3";
}
}
void DumpJsonParser::JudgeDumpEnabled() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
async_dump_enabled_ = false;
}
if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) {
if (async_dump_enabled_ && e2e_dump_enabled_) {
async_dump_enabled_ = false;
MS_LOG(INFO) << "Disable async dump";
}
}
if (!async_dump_enabled_ && !e2e_dump_enabled_) {
MS_LOG(WARNING) << "Dump json parse failed. Dump not enabled";
}
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
if (support_devices_.find(device_id) == support_devices_.end()) {
async_dump_enabled_ = false;
e2e_dump_enabled_ = false;
MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support";
}
context->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !e2e_dump_enabled_);
}
bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {
if (dump_mode_ == 0) {
return true;
}
auto iter = kernels_.find(op_full_name);
return iter != kernels_.end();
}
void DumpJsonParser::MatchKernel(const std::string &kernel_name) {
auto iter = kernels_.find(kernel_name);
if (iter == kernels_.end()) {
return;
}
iter->second = iter->second + 1;
MS_LOG(INFO) << "Match dump kernel:" << iter->first << " match times:" << iter->second;
}
void DumpJsonParser::PrintUnusedKernel() {
for (const auto &iter : kernels_) {
if (iter.second == 0) {
MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first;
}
}
}
std::string DumpJsonParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const {
std::string bin_path = "/var/log/npu/ide_daemon/dump";
const char *dump_data_path = std::getenv("DATA_DUMP_PATH");
if (dump_data_path != nullptr) {
bin_path.append(dump_data_path);
bin_path.append("_");
}
bin_path.append(std::to_string(device_id));
bin_path.append("/");
bin_path.append(net_name_);
bin_path.append("_");
bin_path.append(std::to_string(graph_id));
bin_path.append("/");
bin_path.append(std::to_string(dump_mode_));
bin_path.append("/");
bin_path.append(std::to_string(iteration_));
bin_path.append("/");
return bin_path;
}
bool DumpJsonParser::InputNeedDump() const {
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpInputOnly;
}
bool DumpJsonParser::OutputNeedDump() const {
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
}
bool NeedAsyncDump(const CNodePtr &kernel) {
if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
return false;
}
MS_EXCEPTION_IF_NULL(kernel);
// dump all kernel if mode is set 0 in data_dump.json
return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
}
void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) {
if (e2e_dump_enabled_) {
MS_LOG(INFO) << "E2e dump no need to update dump kernel list";
}
std::map<std::string, uint32_t> update_kernels;
for (const auto &kernel : kernel_graph->execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input = input_with_index.first;
if (input->isa<CNode>()) {
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
<< " Input:" << input->fullname_with_scope();
update_kernels.try_emplace(input->fullname_with_scope(), 0);
}
}
} else if (NeedAsyncDump(kernel)) {
MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
update_kernels.try_emplace(kernel->fullname_with_scope(), 0);
}
}
kernels_.insert(update_kernels.begin(), update_kernels.end());
}
} // namespace mindspore

View File

@ -0,0 +1,96 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_
#include <string>
#include <map>
#include <set>
#include <mutex>
#include "nlohmann/json.hpp"
#include "utils/ms_utils.h"
#include "backend/session/kernel_graph.h"
namespace mindspore {
class DumpJsonParser {
public:
static DumpJsonParser &GetInstance() {
static DumpJsonParser instance;
return instance;
}
void Parse();
static bool DumpToFile(const std::string &filename, const void *data, size_t len);
bool NeedDump(const std::string &op_full_name) const;
void MatchKernel(const std::string &kernel_name);
void PrintUnusedKernel();
bool async_dump_enabled() const { return async_dump_enabled_; }
bool e2e_dump_enabled() const { return e2e_dump_enabled_; }
uint32_t dump_mode() const { return dump_mode_; }
std::string path() const { return path_; }
std::string net_name() const { return net_name_; }
uint32_t iteration() const { return iteration_; }
uint32_t input_output() const { return input_output_; }
uint32_t op_debug_mode() const { return op_debug_mode_; }
bool trans_flag() const { return trans_flag_; }
uint32_t cur_dump_iter() { return cur_dump_iter_; }
void UpdateDumpIter() { ++cur_dump_iter_; }
bool InputNeedDump() const;
bool OutputNeedDump() const;
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;
void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph);
private:
DumpJsonParser() = default;
~DumpJsonParser() = default;
DISABLE_COPY_AND_ASSIGN(DumpJsonParser)
std::mutex lock_;
bool async_dump_enabled_{false};
bool e2e_dump_enabled_{false};
uint32_t dump_mode_{0};
std::string path_;
std::string net_name_;
uint32_t iteration_{0};
uint32_t input_output_{0};
std::map<std::string, uint32_t> kernels_;
std::set<uint32_t> support_devices_;
uint32_t op_debug_mode_{0};
bool trans_flag_{false};
uint32_t cur_dump_iter_{0};
void ParseCommonDumpSetting(const nlohmann::json &content);
void ParseAsyncDumpSetting(const nlohmann::json &content);
void ParseE2eDumpSetting(const nlohmann::json &content);
bool IsDumpEnabled();
auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key);
void ParseDumpMode(const nlohmann::json &content);
void ParseDumpPath(const nlohmann::json &content);
void ParseNetName(const nlohmann::json &content);
void ParseIteration(const nlohmann::json &content);
void ParseInputOutput(const nlohmann::json &content);
void ParseKernels(const nlohmann::json &content);
void ParseSupportDevice(const nlohmann::json &content);
bool ParseEnable(const nlohmann::json &content);
void ParseOpDebugMode(const nlohmann::json &content);
void JudgeDumpEnabled();
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_

View File

@ -0,0 +1,222 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump/e2e_dump_util.h"
#include <algorithm>
#include "debug/data_dump/dump_json_parser.h"
#include "common/trans.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/ms_context.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debug_services.h"
#include "debug/tensor_load.h"
#include "debug/debugger/debugger.h"
#endif
namespace {
const size_t PRAMATER_OUTPUT_INDEX = 0;
}
namespace mindspore {
void E2eDumpUtil::GetFileKernelName(NotNull<std::string *> kernel_name) {
const std::string strsrc = "/";
const std::string strdst = "--";
std::string::size_type pos = 0;
std::string::size_type srclen = strsrc.size();
std::string::size_type dstlen = strdst.size();
while ((pos = kernel_name->find(strsrc, pos)) != std::string::npos) {
kernel_name->replace(pos, srclen, strdst);
pos += dstlen;
}
}
bool E2eDumpUtil::IsDeviceTargetGPU() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
}
void E2eDumpUtil::DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr,
bool trans_flag, const ShapeVector &int_shapes, const TypeId &type) {
auto format = kOpFormat_DEFAULT;
auto ret = addr->DumpMemToFile(trans_flag, file_path, format, int_shapes, type);
if (!ret) {
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << file_path << ", host_format:" << format
<< ".!";
}
}
void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger) {
#ifdef ENABLE_DEBUGGER
auto format = kOpFormat_DEFAULT;
DebugServices *debug_services = debugger->debug_services();
TensorLoader *tensor_loader = debug_services->tensor_loader();
auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, type,
addr->type_id(), addr->format(), slot);
if (!ret) {
MS_LOG(ERROR) << "DumpTensorToFile Failed: flag:" << std::to_string(trans_flag) << ", path:" << file_path
<< ", host_format:" << format;
}
#endif
}
void E2eDumpUtil::GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag,
NotNull<ShapeVector *> int_shapes) {
if (trans_flag) {
*int_shapes = trans::GetRuntimePaddingShape(node, index);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(node, index);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(*int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
}
void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!dump_json_parser.OutputNeedDump()) {
return;
}
MS_LOG(INFO) << "Start e2e dump output";
bool trans_flag = dump_json_parser.trans_flag();
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
}
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
GetFileKernelName(NOT_NULL(&kernel_name));
auto output_size = AnfAlgo::GetOutputTensorNum(node);
for (size_t j = 0; j < output_size; ++j) {
auto addr = AnfAlgo::GetOutputAddr(node, j);
ShapeVector int_shapes;
GetDumpIntShape(node, j, trans_flag, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(node, j);
std::string file_path = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
}
}
}
}
void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!dump_json_parser.InputNeedDump()) {
return;
}
MS_LOG(INFO) << "Start e2e dump input";
bool trans_flag = dump_json_parser.trans_flag();
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
}
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
GetFileKernelName(NOT_NULL(&kernel_name));
auto input_size = AnfAlgo::GetInputTensorNum(node);
for (size_t j = 0; j < input_size; ++j) {
auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(node, j);
auto input = kernel_with_index.first;
auto index = kernel_with_index.second;
auto addr = AnfAlgo::GetOutputAddr(input, index);
ShapeVector int_shapes;
GetDumpIntShape(input, index, trans_flag, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(input, index);
std::string file_path = dump_path + '/' + kernel_name + '_' + "input_" + std::to_string(j);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
}
}
}
}
void E2eDumpUtil::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
MS_LOG(INFO) << "Start e2e dump parameters";
bool trans_flag = dump_json_parser.trans_flag();
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
if (!item->isa<Parameter>()) {
continue;
}
std::string parameter_name = item->fullname_with_scope();
if (!dump_json_parser.NeedDump(parameter_name)) {
continue;
}
DumpJsonParser::GetInstance().MatchKernel(parameter_name);
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
ShapeVector int_shapes;
GetDumpIntShape(item, PRAMATER_OUTPUT_INDEX, trans_flag, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
std::string file_path = dump_path + '/' + parameter_name + '_' + "output_0";
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, parameter_name, NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
}
}
}
bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
dump_json_parser.UpdateDumpIter();
auto dump_flag = dump_json_parser.e2e_dump_enabled();
if (!dump_flag) {
MS_LOG(INFO) << "E2e dump is disabled, skip dump step";
return true;
}
if (dump_json_parser.iteration() != 0) {
if (dump_json_parser.cur_dump_iter() != dump_json_parser.iteration()) {
return true;
}
}
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
std::string dump_path = dump_json_parser.path();
if (dump_path.back() == '/') {
dump_path = dump_path + net_name + '/' + iterator;
} else {
dump_path = dump_path + '/' + net_name + '/' + iterator;
}
DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger);
DumpParameters(graph, dump_path, debugger);
return true;
}
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
#include <string>
#include "backend/session/kernel_graph.h"
#include "runtime/device/device_address.h"
#ifndef ENABLE_DEBUGGER
class Debugger;
#endif
namespace mindspore {
class E2eDumpUtil {
public:
E2eDumpUtil() = default;
~E2eDumpUtil() = default;
static bool DumpData(const session::KernelGraph *graph, Debugger *debugger = nullptr);
private:
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
static void DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
static void DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
static void GetFileKernelName(NotNull<std::string *> kernel_name);
static void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type);
static void DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger);
static void GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, NotNull<ShapeVector *> int_shapes);
static bool IsDeviceTargetGPU();
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_

View File

@ -1,236 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump_parser.h"
#include <fstream>
#include "utils/ms_context.h"
#include "debug/common.h"
static constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
static constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
static constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
static constexpr auto kConfigDumpMode = "dump_mode";
static constexpr auto kConfigOpDebugMode = "op_debug_mode";
static constexpr auto kConfigNetName = "net_name";
static constexpr auto kConfigIteration = "iteration";
static constexpr auto kConfigKernels = "kernels";
namespace mindspore {
void DataDumpParser::ResetParam() {
enable_ = false;
net_name_.clear();
dump_mode_ = 0;
dump_step_ = 0;
kernel_map_.clear();
}
bool DataDumpParser::DumpEnabled() const {
auto enable_dump = std::getenv(kEnableDataDump);
if (enable_dump == nullptr) {
MS_LOG(INFO) << "[DataDump] enable dump is null. If you want to dump data, please export ENABLE_DATA_DUMP";
return false;
}
auto enabled = std::atoi(enable_dump);
if (enabled != 1) {
MS_LOG(WARNING) << "[DataDump] If you want to dump data, please export ENABLE_DATA_DUMP=1";
return false;
}
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
}
return true;
}
std::optional<std::string> DataDumpParser::GetDumpPath() const {
auto dump_path = std::getenv(kDataDumpPath);
if (dump_path == nullptr) {
MS_LOG(ERROR) << "[DataDump] dump path is null. If you want to dump data, please export DATA_DUMP_PATH";
return {};
}
std::string dump_path_str(dump_path);
if (!std::all_of(dump_path_str.begin(), dump_path_str.end(),
[](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) {
MS_LOG(EXCEPTION) << "[DataDump] dump path only support alphabets, digit or {'-', '_', '/'}, but got:"
<< dump_path_str;
}
return dump_path_str;
}
std::string GetIfstreamString(const std::ifstream &ifstream) {
std::stringstream buffer;
buffer << ifstream.rdbuf();
return buffer.str();
}
void DataDumpParser::ParseDumpConfig() {
std::lock_guard<std::mutex> guard(lock_);
MS_LOG(INFO) << "[DataDump] parse start";
if (!DumpEnabled()) {
MS_LOG(INFO) << "[DataDump] dump not enable";
return;
}
ResetParam();
auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
if (!dump_config_file.has_value()) {
MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
}
std::ifstream json_file(dump_config_file.value());
if (!json_file.is_open()) {
MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
}
nlohmann::json j;
try {
json_file >> j;
} catch (nlohmann::json::parse_error &e) {
MS_LOG(ERROR) << "[DataDump] json contents:" << GetIfstreamString(json_file);
MS_LOG(EXCEPTION) << "[DataDump] parse json failed, error:" << e.what();
}
if (j.find("DumpSettings") == j.end()) {
MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
}
nlohmann::json dump_settings = j.at("DumpSettings");
// convert json to string
std::stringstream ss;
ss << dump_settings;
std::string cfg = ss.str();
MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
if (!IsConfigExist(dump_settings)) {
MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
}
if (!ParseDumpSetting(dump_settings)) {
MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
}
}
bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
if (!DumpEnabled()) {
return false;
}
if (dump_mode_ == 0) {
return true;
}
auto iter = kernel_map_.find(op_full_name);
return iter != kernel_map_.end();
}
bool CheckConfigKey(const nlohmann::json &dump_settings, const std::string &key) {
if (dump_settings.find(key) == dump_settings.end()) {
MS_LOG(ERROR) << "[DataDump] DumpSettings key:" << key << " is not exist.";
return false;
}
return true;
}
bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
return CheckConfigKey(dump_settings, kConfigDumpMode) && CheckConfigKey(dump_settings, kConfigNetName) &&
CheckConfigKey(dump_settings, kConfigOpDebugMode) && CheckConfigKey(dump_settings, kConfigIteration) &&
CheckConfigKey(dump_settings, kConfigKernels);
}
bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
auto mode = dump_settings.at(kConfigDumpMode);
auto op_debug_mode = dump_settings.at(kConfigOpDebugMode);
auto net_name = dump_settings.at(kConfigNetName);
auto iteration = dump_settings.at(kConfigIteration);
auto kernels = dump_settings.at(kConfigKernels);
if (!(mode.is_number_unsigned() && op_debug_mode.is_number_unsigned() && net_name.is_string() &&
iteration.is_number_unsigned() && kernels.is_array())) {
MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
enable_ = false;
return false;
}
CheckDumpMode(mode);
CheckOpDebugMode(op_debug_mode);
enable_ = true;
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
dump_mode_ = mode;
op_debug_mode_ = op_debug_mode;
net_name_ = net_name;
dump_step_ = iteration;
for (const auto &kernel : kernels) {
auto kernel_str = kernel.dump();
kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
kernel_map_.insert({kernel_str, 0});
}
return true;
}
void DataDumpParser::MatchKernel(const std::string &kernel_name) {
auto iter = kernel_map_.find(kernel_name);
if (iter == kernel_map_.end()) {
return;
}
iter->second = iter->second + 1;
MS_LOG(INFO) << "Match dump kernel:" << iter->first << " match times:" << iter->second;
}
void DataDumpParser::PrintUnusedKernel() {
for (const auto &iter : kernel_map_) {
if (iter.second == 0) {
MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first;
}
}
}
void DataDumpParser::CheckDumpMode(uint32_t dump_mode) const {
if (dump_mode != 0 && dump_mode != 1) {
MS_LOG(EXCEPTION) << "[DataDump] dump_mode in config json should be 0 or 1";
}
}
void DataDumpParser::CheckOpDebugMode(uint32_t op_debug_mode) const {
if (op_debug_mode < 0 || op_debug_mode > 3) {
MS_LOG(EXCEPTION) << "[DataDump] op_debug_mode in config json file should be [0-3]";
}
}
std::string DataDumpParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const {
std::string bin_path = "/var/log/npu/ide_daemon/dump";
const char *dump_data_path = std::getenv("DATA_DUMP_PATH");
if (dump_data_path != nullptr) {
bin_path.append(dump_data_path);
bin_path.append("_");
}
bin_path.append(std::to_string(device_id));
bin_path.append("/");
bin_path.append(net_name_);
bin_path.append("_");
bin_path.append(std::to_string(graph_id));
bin_path.append("/");
bin_path.append(std::to_string(dump_mode_));
bin_path.append("/");
bin_path.append(std::to_string(dump_step_));
bin_path.append("/");
return bin_path;
}
} // namespace mindspore

View File

@ -1,67 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
#define MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
#include <string>
#include <map>
#include <mutex>
#include <optional>
#include "nlohmann/json.hpp"
#include "utils/ms_utils.h"
namespace mindspore {
class DataDumpParser {
public:
static DataDumpParser &GetInstance() {
static DataDumpParser instance;
return instance;
}
void ParseDumpConfig();
bool NeedDump(const std::string &op_full_name) const;
bool DumpEnabled() const;
std::optional<std::string> GetDumpPath() const;
bool enable() const { return enable_; }
const std::string &net_name() const { return net_name_; }
uint32_t dump_mode() const { return dump_mode_; }
uint32_t op_debug_mode() const { return op_debug_mode_; }
uint32_t dump_step() const { return dump_step_; }
void MatchKernel(const std::string &kernel_name);
void PrintUnusedKernel();
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;
private:
DataDumpParser() = default;
virtual ~DataDumpParser() = default;
DISABLE_COPY_AND_ASSIGN(DataDumpParser);
void ResetParam();
bool IsConfigExist(const nlohmann::json &dump_settings) const;
bool ParseDumpSetting(const nlohmann::json &dump_settings);
void CheckDumpMode(uint32_t dump_mode) const;
void CheckOpDebugMode(uint32_t op_debug_mode) const;
std::mutex lock_;
bool enable_{false};
std::string net_name_;
uint32_t op_debug_mode_{0};
uint32_t dump_mode_{0};
uint32_t dump_step_{0};
std::map<std::string, uint32_t> kernel_map_;
};
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_

View File

@ -25,7 +25,7 @@
#include <utility> #include <utility>
#include <map> #include <map>
#include "debug/debugger/debugger.h" #include "debug/debugger/debugger.h"
#include "debug/data_dump_parser.h" #include "debug/data_dump/dump_json_parser.h"
#include "pipeline/jit/pipeline.h" #include "pipeline/jit/pipeline.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime_manager.h" #include "runtime/device/kernel_runtime_manager.h"
@ -137,7 +137,7 @@ void Debugger::EnableDebugger() {
} }
#ifdef ENABLE_D #ifdef ENABLE_D
// set operation overflow info // set operation overflow info
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_); overflow_bin_path_ = DumpJsonParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
// new overflow dump files will have a timestamp greater than last_overflow_bin_ // new overflow dump files will have a timestamp greater than last_overflow_bin_
last_overflow_bin_ = 0; last_overflow_bin_ = 0;
DIR *d; DIR *d;

View File

@ -1,178 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/e2e_dump.h"
#include <limits.h>
#include <fstream>
#include <string>
#include <optional>
#include <nlohmann/json.hpp>
#include "utils/log_adapter.h"
#include "utils/system/file_system.h"
#include "utils/system/env.h"
#include "utils/convert_utils.h"
#include "utils/ms_context.h"
#include "debug/common.h"
using json = nlohmann::json;
namespace mindspore {
Dump::Dump()
: dump_enable_(false),
trans_flag_(false),
dump_path_("/tmp/"),
dump_net_name_("net_name"),
dump_mode_(0),
dump_iter_(0),
cur_iter_(0) {}
bool Dump::IsKernelNeedDump(const std::string &kernel_name) {
if (dump_mode_ == 0) {
// Dump All Kernels mode
return true;
} else {
auto iter = std::find(dump_kernels_.begin(), dump_kernels_.end(), kernel_name);
if (iter != dump_kernels_.end()) {
return true;
}
}
return false;
}
bool Dump::ParseDumpConfig(const std::string &dump_config_file) {
std::ifstream jsonFile(dump_config_file);
if (!jsonFile.is_open()) {
MS_LOG(ERROR) << dump_config_file << " open failed.";
dump_enable_ = false;
return false;
}
json j;
jsonFile >> j;
if (j.find("DumpSettings") == j.end()) {
MS_LOG(ERROR) << "DumpSettings is not exist.";
dump_enable_ = false;
return false;
} else {
json dumpSettings = j.at("DumpSettings");
// convert json to string
std::stringstream ss;
ss << dumpSettings;
std::string cfg = ss.str();
MS_LOG(INFO) << "E2E Dump Settings Json: " << cfg;
if (!IsConfigExist(dumpSettings)) {
return false;
}
if (!IsConfigValid(dumpSettings)) {
return false;
}
}
return true;
}
bool Dump::IsConfigExist(const nlohmann::json &dumpSettings) {
if (dumpSettings.find("trans_flag") == dumpSettings.end() || dumpSettings.find("enable") == dumpSettings.end() ||
dumpSettings.find("mode") == dumpSettings.end() || dumpSettings.find("path") == dumpSettings.end() ||
dumpSettings.find("net_name") == dumpSettings.end() || dumpSettings.find("iteration") == dumpSettings.end() ||
dumpSettings.find("kernels") == dumpSettings.end()) {
MS_LOG(ERROR) << "DumpSettings keys is not exist.";
dump_enable_ = false;
return false;
}
return true;
}
bool Dump::IsConfigValid(const nlohmann::json &dumpSettings) {
auto trans_flag = dumpSettings.at("trans_flag");
auto enable = dumpSettings.at("enable");
auto mode = dumpSettings.at("mode");
auto path = dumpSettings.at("path");
auto net_name = dumpSettings.at("net_name");
auto iteration = dumpSettings.at("iteration");
auto kernels = dumpSettings.at("kernels");
if (!(enable.is_boolean() && trans_flag.is_boolean() && mode.is_number() && path.is_string() &&
net_name.is_string() && iteration.is_number() && kernels.is_array())) {
MS_LOG(ERROR) << "Element's type in Dump config json is invalid.";
dump_enable_ = false;
return false;
}
dump_enable_ = enable;
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
// dump_enable_ is true, close mem reuse
context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !dump_enable_);
trans_flag_ = trans_flag;
dump_mode_ = mode;
dump_path_ = path;
dump_net_name_ = net_name;
dump_iter_ = iteration;
for (const auto &kernel : kernels) {
dump_kernels_.push_back(kernel);
}
return true;
}
bool Dump::SetDumpConfFromJsonFile() {
const char *config_path_str = std::getenv("MINDSPORE_CONFIG_PATH");
if (config_path_str != nullptr) {
MS_LOG(INFO) << "Getenv MINDSPORE_CONFIG_PATH :" << config_path_str;
} else {
MS_LOG(INFO) << "No need E2E Dump. please export MINDSPORE_CONFIG_PATH eg: MINDSPORE_CONFIG_PATH=/etc";
dump_enable_ = false;
return false;
}
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
char real_path[PATH_MAX] = {0};
if (nullptr == realpath(config_path_str, real_path)) {
MS_LOG(ERROR) << "Env e2e dump path error, " << config_path_str;
dump_enable_ = false;
return false;
}
std::string dump_config_file = std::string(real_path) + "/e2e_dump_config_" + std::to_string(id) + ".json";
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
MS_EXCEPTION_IF_NULL(fs);
if (!fs->FileExist(dump_config_file)) {
MS_LOG(ERROR) << dump_config_file << " not exist.";
dump_enable_ = false;
return false;
}
return ParseDumpConfig(dump_config_file);
}
bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) {
if (filename.empty() || data == nullptr || len == 0) {
MS_LOG(ERROR) << "Incorrect parameter.";
return false;
}
auto realpath = Common::GetRealPath(filename);
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed.";
return false;
}
std::ofstream fd;
fd.open(realpath.value(), std::ios::binary | std::ios::out);
if (!fd.is_open()) {
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
return false;
}
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
fd.close();
return true;
}
} // namespace mindspore

View File

@ -1,70 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_E2E_DUMP_H
#define MINDSPORE_E2E_DUMP_H
#include <stdint.h>
#include <string>
#include <vector>
#include <iostream>
#include <memory>
#include <nlohmann/json.hpp>
namespace mindspore {
class Dump {
public:
Dump();
~Dump() = default;
bool dump_enable() const { return dump_enable_; }
bool trans_flag() const { return trans_flag_; }
std::string dump_path() const { return dump_path_; }
std::string dump_net_name() const { return dump_net_name_; }
uint32_t dump_iter() const { return dump_iter_; }
void UpdataCurIter() { cur_iter_++; }
uint32_t cur_iter() const { return cur_iter_; }
bool IsKernelNeedDump(const std::string &kernel_name);
bool SetDumpConfFromJsonFile();
static bool DumpToFile(const std::string &filename, const void *data, size_t len);
protected:
bool dump_enable_;
bool trans_flag_;
std::string dump_path_;
std::string dump_net_name_;
uint32_t dump_mode_;
uint32_t dump_iter_;
uint32_t cur_iter_;
std::vector<std::string> dump_kernels_;
private:
bool ParseDumpConfig(const std::string &dump_config_file);
bool IsConfigExist(const nlohmann::json &dumpSettings);
bool IsConfigValid(const nlohmann::json &dumpSettings);
};
using DumpConfPtr = std::shared_ptr<Dump>;
} // namespace mindspore
#endif // MINDSPORE_E2E_DUMP_H

View File

@ -24,10 +24,8 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "debug/tensor_data.h" #include "debug/tensor_data.h"
#include "debug/data_dump/dump_json_parser.h"
#include "ir/dtype.h" #include "ir/dtype.h"
#ifdef ENABLE_DUMP_E2E
#include "debug/e2e_dump.h"
#endif
namespace mindspore { namespace mindspore {
class TensorLoader { class TensorLoader {
public: public:
@ -98,7 +96,6 @@ class TensorLoader {
void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
#ifdef ENABLE_DUMP_E2E
bool DumpTensorToFile(std::string tensor_name, bool trans_flag, const std::string &filepath, bool DumpTensorToFile(std::string tensor_name, bool trans_flag, const std::string &filepath,
const std::string &host_fmt, const std::vector<int> &host_shape, TypeId host_type, const std::string &host_fmt, const std::vector<int> &host_shape, TypeId host_type,
TypeId addr_type_id, std::string addr_format, size_t slot) const { TypeId addr_type_id, std::string addr_format, size_t slot) const {
@ -132,12 +129,11 @@ class TensorLoader {
mindspore::tensor::TensorPtr out_tensor = node->GetTensor(); mindspore::tensor::TensorPtr out_tensor = node->GetTensor();
size_t host_size = out_tensor->data().nbytes(); size_t host_size = out_tensor->data().nbytes();
ret = mindspore::Dump::DumpToFile(path, out_tensor->data_c(), host_size); ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size);
} }
return ret; return ret;
} }
#endif
private: private:
std::vector<std::shared_ptr<TensorData>> tensor_list; std::vector<std::shared_ptr<TensorData>> tensor_list;

View File

@ -30,9 +30,7 @@
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "utils/utils.h" #include "utils/utils.h"
#include "common/trans.h" #include "common/trans.h"
#ifdef ENABLE_DUMP_E2E #include "debug/data_dump/dump_json_parser.h"
#include "debug/e2e_dump.h"
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#include "debug/tensor_load.h" #include "debug/tensor_load.h"
#endif #endif
@ -622,7 +620,6 @@ AscendDeviceAddress::~AscendDeviceAddress() {
} }
} }
#ifdef ENABLE_DUMP_E2E
bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &filepath, const std::string &host_fmt, bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const { const ShapeVector &host_shape, TypeId host_type) const {
bool ret = false; bool ret = false;
@ -649,7 +646,7 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
MS_LOG(ERROR) << "Copy device mem to host failed"; MS_LOG(ERROR) << "Copy device mem to host failed";
return ret; return ret;
} }
ret = mindspore::Dump::DumpToFile(path, out_tensor->data_c(), host_size); ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size);
} else { } else {
auto host_tmp = std::vector<uint8_t>(size_); auto host_tmp = std::vector<uint8_t>(size_);
auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
@ -659,12 +656,11 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
std::string path = std::string path =
filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension; filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension;
MS_LOG(INFO) << "E2E Dump path is " << path; MS_LOG(INFO) << "E2E Dump path is " << path;
ret = mindspore::Dump::DumpToFile(path, host_tmp.data(), size_); ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_);
} }
return ret; return ret;
} }
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order, bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order,

View File

@ -42,10 +42,8 @@ class AscendDeviceAddress : public DeviceAddress {
bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override; bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override;
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override; bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override;
DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; } DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; }
#ifdef ENABLE_DUMP_E2E
bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt, bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const; const ShapeVector &host_shape, TypeId host_type) const override;
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt, bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger, const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger,

View File

@ -39,6 +39,7 @@
#include "backend/kernel_compiler/tbe/tbe_utils.h" #include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "runtime/device/ascend/ascend_memory_manager.h" #include "runtime/device/ascend/ascend_memory_manager.h"
#include "debug/tensor_load.h" #include "debug/tensor_load.h"
#include "debug/data_dump/dump_json_parser.h"
#include "utils/shape_utils.h" #include "utils/shape_utils.h"
#ifdef MEM_REUSE_DEBUG #ifdef MEM_REUSE_DEBUG
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h" #include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
@ -115,7 +116,7 @@ void AscendKernelRuntime::ClearGraphModelMap() {
} }
graph_data_dumper_.clear(); graph_data_dumper_.clear();
// tell users which dump kernel name not used // tell users which dump kernel name not used
DataDumpParser::GetInstance().PrintUnusedKernel(); DumpJsonParser::GetInstance().PrintUnusedKernel();
for (auto &iter : graph_model_map_) { for (auto &iter : graph_model_map_) {
MS_LOG(INFO) << "Ge UnloadModel " << iter.first; MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
@ -206,15 +207,8 @@ bool AscendKernelRuntime::Init() {
return true; return true;
} }
bool ret = false; bool ret = false;
#ifdef ENABLE_DUMP_E2E
ret = SetDumpConf();
if (!ret) {
MS_LOG(INFO) << "No dump conf to set!";
}
#endif
DataDumpParser::GetInstance().ParseDumpConfig();
DumpJsonParser::GetInstance().Parse();
// Start up profiling before rtSetDevice // Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) { if (!ret) {
@ -233,124 +227,6 @@ bool AscendKernelRuntime::Init() {
return ret; return ret;
} }
#ifdef ENABLE_DUMP_E2E
namespace {
void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(dump_conf);
bool trans_flag = dump_conf->trans_flag();
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_conf->IsKernelNeedDump(kernel_name)) {
continue;
}
const std::string strsrc = "/";
const std::string strdst = "--";
std::string::size_type pos = 0;
std::string::size_type srclen = strsrc.size();
std::string::size_type dstlen = strdst.size();
while ((pos = kernel_name.find(strsrc, pos)) != std::string::npos) {
kernel_name.replace(pos, srclen, strdst);
pos += dstlen;
}
auto output_size = AnfAlgo::GetOutputTensorNum(node);
for (size_t j = 0; j < output_size; ++j) {
auto addr = AnfAlgo::GetOutputAddr(node, j);
ShapeVector int_shapes;
if (trans_flag) {
int_shapes = trans::GetRuntimePaddingShape(node, j);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
auto type = AnfAlgo::GetOutputInferDataType(node, j);
auto format = kOpFormat_DEFAULT;
string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j);
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
auto ret = ascend_addr->DumpMemToFile(trans_flag, filepath, format, int_shapes, type);
if (!ret) {
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << filepath
<< ", host_format:" << format << ".!";
}
}
}
}
void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(dump_conf);
bool trans_flag = dump_conf->trans_flag();
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
if (!item->isa<Parameter>()) {
continue;
}
std::string parameter_name = item->fullname_with_scope();
if (!dump_conf->IsKernelNeedDump(parameter_name)) {
continue;
}
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
ShapeVector int_shapes;
if (trans_flag) {
int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
auto format = kOpFormat_DEFAULT;
string filepath = dump_path + '/' + parameter_name + '_' + "output_0";
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
auto ret = ascend_addr->DumpMemToFile(trans_flag, filepath, format, int_shapes, type);
if (!ret) {
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << filepath
<< ", host_format:" << format << ".!";
}
}
}
} // namespace
#endif
bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
#ifdef ENABLE_DUMP_E2E
MS_LOG(INFO) << "Start dump step";
DumpConfPtr dump_conf = GetDumpConf();
MS_EXCEPTION_IF_NULL(dump_conf);
dump_conf->UpdataCurIter();
bool dump_flag = dump_conf->dump_enable();
if (!dump_flag) {
MS_LOG(INFO) << "Dump flag is disable, pass dump step";
return true;
}
uint32_t cur_iter = dump_conf->cur_iter();
if (dump_conf->dump_iter() != 0) {
if (cur_iter != dump_conf->dump_iter()) {
return true;
}
}
MS_LOG(INFO) << "Cur iter is " << cur_iter;
std::string net_name = dump_conf->dump_net_name();
std::string iterator = to_string(cur_iter);
std::string dump_path = dump_conf->dump_path();
if (dump_path.back() == '/') {
dump_path = dump_path + net_name + '/' + iterator;
} else {
dump_path = dump_path + '/' + net_name + '/' + iterator;
}
// dump output
DumpOutput(graph, dump_path, dump_conf);
// dump parameters
DumpParameters(graph, dump_path, dump_conf);
#endif
return true;
}
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
namespace { namespace {
void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
@ -482,6 +358,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
MS_EXCEPTION(NotExistsError) << "session::KernelGraph is NULL!"; MS_EXCEPTION(NotExistsError) << "session::KernelGraph is NULL!";
} }
MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id(); MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id();
DumpJsonParser::GetInstance().UpdateNeedDumpKernels(NOT_NULL(graph));
#ifdef MEM_REUSE_DEBUG #ifdef MEM_REUSE_DEBUG
auto context_ptr = MsContext::GetInstance(); auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(context_ptr);
@ -580,9 +457,10 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph, void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
NotNull<std::function<void *()>> model_handle) { NotNull<std::function<void *()>> model_handle) {
if (!DataDumpParser::GetInstance().DumpEnabled()) { if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
return; return;
} }
MS_LOG(INFO) << "Start Distribute Debug Task";
auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle); auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle);
MS_EXCEPTION_IF_NULL(data_dumper); MS_EXCEPTION_IF_NULL(data_dumper);
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
@ -593,9 +471,10 @@ void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph
} }
void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) { void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
if (!DataDumpParser::GetInstance().DumpEnabled()) { if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
return; return;
} }
MS_LOG(INFO) << "Start Launch Dump Data";
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id); auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id);
if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) { if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) {
auto &data_dumper = dumper_iter->second; auto &data_dumper = dumper_iter->second;

View File

@ -25,7 +25,6 @@
#include "framework/ge_runtime/davinci_model.h" #include "framework/ge_runtime/davinci_model.h"
#include "runtime/device/kernel_runtime_manager.h" #include "runtime/device/kernel_runtime_manager.h"
#include "backend/session/session_basic.h" #include "backend/session/session_basic.h"
#include "debug/data_dump_parser.h"
#include "runtime/device/ascend/dump/data_dumper.h" #include "runtime/device/ascend/dump/data_dumper.h"
using ge::model_runner::TaskInfo; using ge::model_runner::TaskInfo;
@ -39,7 +38,6 @@ class AscendKernelRuntime : public KernelRuntime {
AscendKernelRuntime() = default; AscendKernelRuntime() = default;
~AscendKernelRuntime() override; ~AscendKernelRuntime() override;
bool Init() override; bool Init() override;
bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr) override;
bool LoadData(session::KernelGraph *graph, Debugger *debugger) override; bool LoadData(session::KernelGraph *graph, Debugger *debugger) override;
bool GenTask(const session::KernelGraph *graph); bool GenTask(const session::KernelGraph *graph);
bool LoadTask(const session::KernelGraph *graph); bool LoadTask(const session::KernelGraph *graph);

View File

@ -27,7 +27,7 @@
#include "runtime/device/ascend/dump/ge_dump.h" #include "runtime/device/ascend/dump/ge_dump.h"
#include "proto/op_mapping_info.pb.h" #include "proto/op_mapping_info.pb.h"
#include "utils/ms_context.h" #include "utils/ms_context.h"
#include "debug/data_dump_parser.h" #include "debug/data_dump/dump_json_parser.h"
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h" #include "debug/debugger/debugger.h"
#endif #endif
@ -68,6 +68,27 @@ DataDumper::~DataDumper() {
ReleaseDevMem(&op_debug_dump_args_); ReleaseDevMem(&op_debug_dump_args_);
} }
void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const {
for (const auto &kernel : kernel_graph_->execution_order()) {
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input = input_with_index.first;
if (input->isa<CNode>()) {
MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
<< " Input:" << input->fullname_with_scope();
kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>());
}
}
} else if (KernelNeedDump(kernel)) {
MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
kernel_map->try_emplace(kernel->fullname_with_scope(), kernel);
}
}
}
void DataDumper::LoadDumpInfo() { void DataDumper::LoadDumpInfo() {
MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
MS_EXCEPTION_IF_NULL(kernel_graph_); MS_EXCEPTION_IF_NULL(kernel_graph_);
@ -83,7 +104,7 @@ void DataDumper::LoadDumpInfo() {
} }
MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope(); MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
dump_kernel_names_.emplace_back(kernel->fullname_with_scope()); dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
DataDumpParser::GetInstance().MatchKernel(kernel->fullname_with_scope()); DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope());
aicpu::dump::Task task; aicpu::dump::Task task;
ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
@ -115,16 +136,16 @@ void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_inf
auto context_ptr = MsContext::GetInstance(); auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr); MS_EXCEPTION_IF_NULL(context_ptr);
MS_EXCEPTION_IF_NULL(kernel_graph_); MS_EXCEPTION_IF_NULL(kernel_graph_);
auto dump_path = DataDumpParser::GetInstance().GetDumpPath(); auto dump_path = DumpJsonParser::GetInstance().path();
if (!dump_path.has_value()) { if (dump_path.empty()) {
MS_LOG(EXCEPTION) << "Dump path invalid"; MS_LOG(EXCEPTION) << "Dump path invalid";
} }
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
dump_info->set_dump_path("/" + dump_path.value() + "_" + std::to_string(device_id) + "/"); dump_info->set_dump_path("/" + dump_path + "_" + std::to_string(device_id) + "/");
MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value(); MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path;
dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); dump_info->set_model_name(DumpJsonParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step())); dump_info->set_dump_step(std::to_string(DumpJsonParser::GetInstance().iteration()));
dump_info->set_model_id(kernel_graph_->graph_id()); dump_info->set_model_id(kernel_graph_->graph_id());
dump_info->set_flag(kAicpuLoadFlag); dump_info->set_flag(kAicpuLoadFlag);
@ -164,7 +185,7 @@ bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
} }
MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(kernel);
// dump all kernel if mode is set 0 in data_dump.json // dump all kernel if mode is set 0 in data_dump.json
return DataDumpParser::GetInstance().NeedDump(kernel->fullname_with_scope()); return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
} }
void DataDumper::UnloadDumpInfo() { void DataDumper::UnloadDumpInfo() {
@ -258,7 +279,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo
} }
void DataDumper::OpDebugRegister() { void DataDumper::OpDebugRegister() {
uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
auto iter = kOverflowModeStr.find(op_debug_mode); auto iter = kOverflowModeStr.find(op_debug_mode);
if (iter == kOverflowModeStr.end()) { if (iter == kOverflowModeStr.end()) {
MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode; MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode;
@ -294,7 +315,7 @@ void DataDumper::OpDebugRegister() {
} }
void DataDumper::OpDebugUnregister() { void DataDumper::OpDebugUnregister() {
uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode(); uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
if (op_debug_mode == kNoOverflow) { if (op_debug_mode == kNoOverflow) {
MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister."; MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister.";
return; return;
@ -337,6 +358,10 @@ void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
} }
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
if (!DumpJsonParser::GetInstance().OutputNeedDump()) {
MS_LOG(INFO) << "Skip dump output";
return;
}
MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope(); MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
auto input_size = AnfAlgo::GetInputTensorNum(kernel); auto input_size = AnfAlgo::GetInputTensorNum(kernel);
auto output_size = AnfAlgo::GetOutputTensorNum(kernel); auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
@ -367,6 +392,10 @@ void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::T
} }
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
if (!DumpJsonParser::GetInstance().InputNeedDump()) {
MS_LOG(INFO) << "Skip dump input";
return;
}
MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope(); MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
auto input_size = AnfAlgo::GetInputTensorNum(kernel); auto input_size = AnfAlgo::GetInputTensorNum(kernel);
uint64_t offset = 0; uint64_t offset = 0;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_DUMP_DATADUMP_H_ #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_DUMP_DATADUMP_H_
#include <tuple> #include <tuple>
#include <map> #include <map>
#include <set>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
@ -63,6 +64,7 @@ class DataDumper {
void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
void SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; void SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
void GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const;
std::function<void *()> model_handle_; std::function<void *()> model_handle_;
uint32_t debug_task_id_; uint32_t debug_task_id_;

View File

@ -66,6 +66,10 @@ class DeviceAddress : public mindspore::DeviceSync {
virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; } virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; }
virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; } virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; }
void *GetMutablePtr() const override { return ptr_; } void *GetMutablePtr() const override { return ptr_; }
virtual bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const {
return true;
}
protected: protected:
const void *ptr() const { return ptr_; } const void *ptr() const { return ptr_; }

View File

@ -33,6 +33,7 @@
#include "ir/dtype.h" #include "ir/dtype.h"
#include "profiler/device/gpu/gpu_profiling.h" #include "profiler/device/gpu/gpu_profiling.h"
#include "utils/shape_utils.h" #include "utils/shape_utils.h"
#include "debug/data_dump/dump_json_parser.h"
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#include "debug/debug_services.h" #include "debug/debug_services.h"
#endif #endif
@ -51,19 +52,12 @@ bool GPUKernelRuntime::Init() {
GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory(); GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory();
return true; return true;
} }
bool ret = false; bool ret = InitDevice();
#ifdef ENABLE_DUMP_E2E
ret = SetDumpConf();
if (!ret) {
MS_LOG(INFO) << "No dump conf to set!";
}
#endif
ret = InitDevice();
if (!ret) { if (!ret) {
MS_LOG(ERROR) << "InitDevice error."; MS_LOG(ERROR) << "InitDevice error.";
return ret; return ret;
} }
DumpJsonParser::GetInstance().Parse();
mem_manager_ = std::make_shared<GPUMemoryManager>(); mem_manager_ = std::make_shared<GPUMemoryManager>();
MS_EXCEPTION_IF_NULL(mem_manager_); MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory(); mem_manager_->MallocDeviceMemory();
@ -79,146 +73,6 @@ bool GPUKernelRuntime::Init() {
return ret; return ret;
} }
#ifdef ENABLE_DUMP_E2E
namespace {
void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf,
Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(dump_conf);
bool trans_flag = dump_conf->trans_flag();
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_conf->IsKernelNeedDump(kernel_name)) {
continue;
}
const std::string strsrc = "/";
const std::string strdst = "--";
std::string::size_type pos = 0;
std::string::size_type srclen = strsrc.size();
std::string::size_type dstlen = strdst.size();
while ((pos = kernel_name.find(strsrc, pos)) != std::string::npos) {
kernel_name.replace(pos, srclen, strdst);
pos += dstlen;
}
auto output_size = AnfAlgo::GetOutputTensorNum(node);
for (size_t j = 0; j < output_size; ++j) {
auto addr = AnfAlgo::GetOutputAddr(node, j);
TypeId addr_type_id = addr->type_id();
std::string addr_format = addr->format();
ShapeVector int_shapes;
if (trans_flag) {
int_shapes = trans::GetRuntimePaddingShape(node, j);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
auto type = AnfAlgo::GetOutputInferDataType(node, j);
auto format = kOpFormat_DEFAULT;
string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j);
DebugServices *debug_services = debugger->debug_services();
TensorLoader *tensor_loader = debug_services->tensor_loader();
std::string original_kernel_name = node->fullname_with_scope();
size_t slot = j;
auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, filepath, format, int_shapes, type,
addr_type_id, addr_format, slot);
if (!ret) {
std::string error = "DumpTensorToFile Failed: flag:" + std::to_string(trans_flag) + ", path:" + filepath +
", host_format:" + format + ".!";
}
}
}
}
void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_path, DumpConfPtr dump_conf,
Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(dump_conf);
bool trans_flag = dump_conf->trans_flag();
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
if (!item->isa<Parameter>()) {
continue;
}
std::string parameter_name = item->fullname_with_scope();
if (!dump_conf->IsKernelNeedDump(parameter_name)) {
continue;
}
auto addr = AnfAlgo::GetOutputAddr(item, PARAMETER_OUTPUT_INDEX);
TypeId addr_type_id = addr->type_id();
std::string addr_format = addr->format();
ShapeVector int_shapes;
if (trans_flag) {
int_shapes = trans::GetRuntimePaddingShape(item, PARAMETER_OUTPUT_INDEX);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(item, PARAMETER_OUTPUT_INDEX);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
auto type = AnfAlgo::GetOutputInferDataType(item, PARAMETER_OUTPUT_INDEX);
auto format = kOpFormat_DEFAULT;
string filepath = dump_path + '/' + parameter_name + '_' + "output_0";
DebugServices *debug_services = debugger->debug_services();
TensorLoader *tensor_loader = debug_services->tensor_loader();
std::string original_kernel_name = parameter_name;
size_t slot = 0;
auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, filepath, format, int_shapes, type,
addr_type_id, addr_format, slot);
if (!ret) {
std::string error = "DumpTensorToFile Failed: flag:" + std::to_string(trans_flag) + ", path:" + filepath +
", host_format:" + format + ".!";
}
}
}
} // namespace
bool GPUKernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "Start dump step";
DumpConfPtr dump_conf = GetDumpConf();
MS_EXCEPTION_IF_NULL(dump_conf);
dump_conf->UpdataCurIter();
bool dump_flag = dump_conf->dump_enable();
if (!dump_flag) {
MS_LOG(INFO) << "Dump flag is disable, pass dump step";
return true;
}
uint32_t cur_iter = dump_conf->cur_iter();
if (dump_conf->dump_iter() != 0) {
if (cur_iter != dump_conf->dump_iter()) {
return true;
}
}
MS_LOG(INFO) << "Cur iter is " << cur_iter;
std::string net_name = dump_conf->dump_net_name();
std::string iterator = std::to_string(cur_iter);
std::string dump_path = dump_conf->dump_path();
if (dump_path.back() == '/') {
dump_path = dump_path + net_name + '/' + iterator;
} else {
dump_path = dump_path + '/' + net_name + '/' + iterator;
}
// dump output
DumpOutput(graph, dump_path, dump_conf, debugger);
// dump parameters
DumpParameters(graph, dump_path, dump_conf, debugger);
return true;
}
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
namespace { namespace {
void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,

View File

@ -43,9 +43,6 @@ class GPUKernelRuntime : public KernelRuntime {
const std::vector<CNodePtr> &execution_order) override; const std::vector<CNodePtr> &execution_order) override;
void AssignMemory(session::KernelGraph *graph) override; void AssignMemory(session::KernelGraph *graph) override;
bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override;
#ifdef ENABLE_DUMP_E2E
bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr) override;
#endif
protected: protected:
DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,

View File

@ -27,6 +27,7 @@
#include "backend/session/kernel_graph.h" #include "backend/session/kernel_graph.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h" #include "backend/optimizer/common/helper.h"
#include "debug/data_dump/dump_json_parser.h"
#include "ir/value.h" #include "ir/value.h"
#include "utils/shape_utils.h" #include "utils/shape_utils.h"
using mindspore::kernel::Address; using mindspore::kernel::Address;
@ -34,21 +35,10 @@ using mindspore::kernel::AddressPtr;
namespace mindspore { namespace mindspore {
namespace device { namespace device {
KernelRuntime::~KernelRuntime() { KernelRuntime::~KernelRuntime() {}
#ifdef ENABLE_DUMP_E2E
dump_conf_ptr_ = nullptr;
#endif
}
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
bool KernelRuntime::DumpData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
if (graph != nullptr) {
return true;
}
return false;
}
bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; } bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; }
bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
@ -134,36 +124,21 @@ void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) {
} }
bool KernelRuntime::DumpDataEnabled() { bool KernelRuntime::DumpDataEnabled() {
bool ret = false; auto &dump_json_parser = DumpJsonParser::GetInstance();
#ifdef ENABLE_DUMP_E2E return dump_json_parser.e2e_dump_enabled();
DumpConfPtr dump_conf = GetDumpConf();
MS_EXCEPTION_IF_NULL(dump_conf);
bool dump_flag = dump_conf->dump_enable();
if (!dump_flag) {
return ret;
}
ret = true;
#endif
return ret;
} }
bool KernelRuntime::DumpDataEnabledIteration() { bool KernelRuntime::DumpDataEnabledIteration() {
bool ret = false; auto &dump_json_parser = DumpJsonParser::GetInstance();
#ifdef ENABLE_DUMP_E2E if (!dump_json_parser.e2e_dump_enabled()) {
if (!DumpDataEnabled()) { return false;
return ret;
} }
DumpConfPtr dump_conf = GetDumpConf();
MS_EXCEPTION_IF_NULL(dump_conf); auto cur_iter = dump_json_parser.cur_dump_iter() + 1;
uint32_t cur_iter = dump_conf->cur_iter() + 1; if (dump_json_parser.iteration() != 0) {
if (dump_conf->dump_iter() != 0) { return cur_iter == dump_json_parser.iteration();
if (cur_iter != dump_conf->dump_iter()) {
return ret;
} }
} return true;
ret = true;
#endif
return ret;
} }
void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
@ -858,16 +833,5 @@ DeviceAddressPtr KernelRuntime::AssignSingleOpLaunchMemory(size_t size, const st
MS_EXCEPTION_IF_NULL(base_ptr); MS_EXCEPTION_IF_NULL(base_ptr);
return device_address; return device_address;
} }
#ifdef ENABLE_DUMP_E2E
bool KernelRuntime::SetDumpConf() {
dump_conf_ptr_ = std::make_shared<Dump>();
MS_EXCEPTION_IF_NULL(dump_conf_ptr_);
bool ret = dump_conf_ptr_->SetDumpConfFromJsonFile();
return ret;
}
DumpConfPtr KernelRuntime::GetDumpConf() { return dump_conf_ptr_; }
#endif
} // namespace device } // namespace device
} // namespace mindspore } // namespace mindspore

View File

@ -24,9 +24,6 @@
#include "runtime/device/device_address.h" #include "runtime/device/device_address.h"
#include "ir/tensor.h" #include "ir/tensor.h"
#include "utils/convert_utils.h" #include "utils/convert_utils.h"
#ifdef ENABLE_DUMP_E2E
#include "debug/e2e_dump.h"
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h" #include "debug/debugger/debugger.h"
#endif #endif
@ -58,7 +55,6 @@ class KernelRuntime {
void RunOpClearMemory(const session::KernelGraph *graph); void RunOpClearMemory(const session::KernelGraph *graph);
bool DumpDataEnabled(); bool DumpDataEnabled();
bool DumpDataEnabledIteration(); bool DumpDataEnabledIteration();
virtual bool DumpData(session::KernelGraph *graph, Debugger *debugger = nullptr);
virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger);
virtual bool Load(session::KernelGraph *graph, bool is_task_sink); virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0;
@ -77,9 +73,6 @@ class KernelRuntime {
virtual bool SyncStream() = 0; virtual bool SyncStream() = 0;
virtual void ClearGlobalIdleMem() {} virtual void ClearGlobalIdleMem() {}
#ifdef ENABLE_DUMP_E2E
DumpConfPtr GetDumpConf();
#endif
// for GPU and D to impl // for GPU and D to impl
virtual void ReleaseDeviceRes() {} virtual void ReleaseDeviceRes() {}
void set_device_id(uint32_t device_id) { device_id_ = device_id; } void set_device_id(uint32_t device_id) { device_id_ = device_id; }
@ -101,9 +94,6 @@ class KernelRuntime {
void AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node); void AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node);
void AssignCommunicationNodeInputMem(MemType type, const AnfNodePtr &node); void AssignCommunicationNodeInputMem(MemType type, const AnfNodePtr &node);
void AssignCommunicationNodeMem(MemType type, const AnfNodePtr &node); void AssignCommunicationNodeMem(MemType type, const AnfNodePtr &node);
#ifdef ENABLE_DUMP_E2E
bool SetDumpConf();
#endif
private: private:
void AssignStaticMemoryOutput(session::KernelGraph *graph); void AssignStaticMemoryOutput(session::KernelGraph *graph);
@ -121,10 +111,6 @@ class KernelRuntime {
protected: protected:
uint32_t device_id_{0}; uint32_t device_id_{0};
#ifdef ENABLE_DUMP_E2E
DumpConfPtr dump_conf_ptr_;
#endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
Debugger *debugger_; Debugger *debugger_;
#endif #endif

View File

@ -4,7 +4,6 @@ message("build ut testcases...")
project(ut) project(ut)
set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..") set(PROJECT_DIR "${PROJECT_SOURCE_DIR}/../../..")
add_compile_definitions(ENABLE_DUMP_E2E)
if(ENABLE_DUMP_IR) if(ENABLE_DUMP_IR)
add_compile_definitions(ENABLE_DUMP_IR) add_compile_definitions(ENABLE_DUMP_IR)
endif(ENABLE_DUMP_IR) endif(ENABLE_DUMP_IR)
@ -84,9 +83,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/frontend/parallel/*.cc" "../../../mindspore/ccsrc/frontend/parallel/*.cc"
"../../../mindspore/ccsrc/frontend/operator/*.cc" "../../../mindspore/ccsrc/frontend/operator/*.cc"
# dont remove the 4 lines above # dont remove the 4 lines above
"../../../mindspore/ccsrc/debug/e2e_dump.cc" "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc"
"../../../mindspore/ccsrc/debug/common.cc" "../../../mindspore/ccsrc/debug/common.cc"
"../../../mindspore/ccsrc/debug/data_dump_parser.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc" "../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc" "../../../mindspore/ccsrc/runtime/device/ascend/profiling/profiling_engine_impl.cc"
"../../../mindspore/ccsrc/runtime/device/kernel_runtime.cc" "../../../mindspore/ccsrc/runtime/device/kernel_runtime.cc"

View File

@ -20,7 +20,7 @@
#include "utils/system/file_system.h" #include "utils/system/file_system.h"
#include "utils/system/env.h" #include "utils/system/env.h"
#define private public #define private public
#include "debug/e2e_dump.h" #include "debug/data_dump/dump_json_parser.h"
#undef private #undef private
namespace mindspore { namespace mindspore {
@ -38,7 +38,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) {
int ret; int ret;
char filename[] = "/tmp/dumpToFileTestFile"; char filename[] = "/tmp/dumpToFileTestFile";
ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int));
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open(filename, O_RDONLY);
@ -70,7 +70,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) {
int ret; int ret;
char filename[] = "../../dumpToFileTestFile"; char filename[] = "../../dumpToFileTestFile";
ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int));
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open(filename, O_RDONLY);
@ -102,7 +102,7 @@ TEST_F(TestMemoryDumper, test_DumpToFileNotExistDir) {
} }
char filename[] = "./tmp/dumpToFileTestFile"; char filename[] = "./tmp/dumpToFileTestFile";
int ret = mindspore::Dump::DumpToFile(filename, data, len * sizeof(int)); int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int));
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open(filename, O_RDONLY);