Support Cann callback api for ascend async dump

This commit is contained in:
TinaMengtingZhang 2021-11-12 17:57:36 -05:00
parent 3fc995a6ae
commit 07b653103e
16 changed files with 662 additions and 14 deletions

@ -1 +1 @@
Subproject commit fd9e9a96f97960ba46c21352b0df7719d3a0a3f7
Subproject commit 8f7df5fd1f7a70233e2aeaa6155dcd76b93e0b11

View File

@ -168,8 +168,11 @@ if(ENABLE_DEBUGGER)
ms_protobuf_generate(DEBUGGER_PROTO_SRCS DEBUGGER_PROTO_HDRS ${DEBUGGER_PROTO_LIST})
file(GLOB_RECURSE DEBUGGER_GRPC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_grpc.proto")
ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
file(GLOB_RECURSE DUMP_DATA_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/dump_data.proto")
ms_protobuf_generate(DUMP_DATA_PROTO_SRCS DUMP_DATA_PROTO_HDRS ${DUMP_DATA_PROTO_LIST})
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_DATA_PROTO_SRCS})
endif()
if(ENABLE_DUMP_PROTO)

View File

@ -42,6 +42,7 @@ constexpr auto kTransFlag = "trans_flag";
constexpr auto kStatisticDump = "statistic";
constexpr auto kTensorDump = "tensor";
constexpr auto kFullDump = "full";
constexpr auto kFileFormat = "file_format";
constexpr auto kDumpInputAndOutput = 0;
constexpr auto kDumpInputOnly = 1;
constexpr auto kDumpOutputOnly = 2;
@ -274,6 +275,8 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) {
ParseSupportDevice(*support_device);
if (!e2e_dump_enabled_) {
ParseOpDebugMode(*op_debug_mode);
ParseFileFormat(
*common_dump_settings); // Pass in the whole json string to parse because file_format field is optional.
}
}
@ -505,6 +508,23 @@ void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) {
}
}
void DumpJsonParser::ParseFileFormat(const nlohmann::json &content) {
auto iter = content.find(kFileFormat);
if (iter == content.end()) {
file_format_ = JsonFileFormat::FORMAT_BIN;
} else {
CheckJsonStringType(*iter, kFileFormat);
std::string file_format = *iter;
const std::map<std::string, JsonFileFormat> str_to_fmt_enum = {{"bin", JsonFileFormat::FORMAT_BIN},
{"npy", JsonFileFormat::FORMAT_NPY}};
if (str_to_fmt_enum.find(file_format) == str_to_fmt_enum.end()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. 'file_format' should be either 'npy' or 'bin', but got: "
<< file_format;
}
file_format_ = str_to_fmt_enum.at(file_format);
}
}
void DumpJsonParser::JsonConfigToString() {
std::string cur_config;
cur_config.append("dump_mode:");

View File

@ -59,6 +59,7 @@ class DumpJsonParser {
bool trans_flag() const { return trans_flag_; }
uint32_t cur_dump_iter() const { return cur_dump_iter_; }
void UpdateDumpIter() { ++cur_dump_iter_; }
bool FileFormatIsNpy() const { return file_format_ == JsonFileFormat::FORMAT_NPY; }
bool GetIterDumpFlag() const;
bool InputNeedDump() const;
bool OutputNeedDump() const;
@ -70,6 +71,7 @@ class DumpJsonParser {
void SaveGraph(session::KernelGraph *graph) { (void)graphs_.emplace_back(graph); }
const std::vector<session::KernelGraph *> &graphs() const { return graphs_; }
enum JsonDumpMode { DUMP_ALL = 0, DUMP_KERNEL = 1, DUMP_KERNELS_WITH_FLAG = 2 };
enum JsonFileFormat { FORMAT_NPY = 0, FORMAT_BIN = 1 };
private:
DumpJsonParser() = default;
@ -89,6 +91,7 @@ class DumpJsonParser {
std::vector<std::string> cell_dump_kernels_;
std::set<uint32_t> support_devices_;
uint32_t op_debug_mode_{0};
JsonFileFormat file_format_;
bool trans_flag_{false};
uint32_t cur_dump_iter_{0};
bool already_parsed_{false};
@ -112,6 +115,7 @@ class DumpJsonParser {
void ParseSupportDevice(const nlohmann::json &content);
bool ParseEnable(const nlohmann::json &content);
void ParseOpDebugMode(const nlohmann::json &content);
void ParseFileFormat(const nlohmann::json &content);
void JudgeDumpEnabled();
void JsonConfigToString();

View File

@ -24,6 +24,8 @@
#include "debug/data_dump/dump_json_parser.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/utils.h"
#include "debug/common.h"
namespace mindspore {
uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
@ -137,8 +139,7 @@ uint64_t GetTimeStamp() {
return timestamp;
}
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope) {
const std::string separator("--");
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const std::string &separator) {
std::size_t found = fullname_with_scope.rfind(separator);
std::string op_name;
if (found != std::string::npos) {
@ -146,4 +147,30 @@ std::string GetOpNameWithoutScope(const std::string &fullname_with_scope) {
}
return op_name;
}
void DumpToFile(const std::string &file_name, const std::string &dump_str) {
if (dump_str.empty()) {
MS_LOG(ERROR) << "Failed to dump empty tensor data.";
return;
}
auto real_path = Common::CreatePrefixPath(file_name);
if (!real_path.has_value()) {
MS_LOG(ERROR) << "CreatePrefixPath failed.";
return;
}
std::string real_path_str = real_path.value();
ChangeFileMode(real_path_str, S_IWUSR);
std::ofstream file(real_path_str, std::ofstream::out | std::ofstream::trunc);
if (!file.is_open()) {
MS_LOG(EXCEPTION) << "Open file " << real_path_str << "failed: " << ErrnoToString(errno);
}
file << dump_str;
if (file.bad()) {
file.close();
MS_LOG(EXCEPTION) << "Dump string to file " << real_path_str << " failed: " << ErrnoToString(errno);
}
file.close();
ChangeFileMode(real_path_str, S_IRUSR);
}
} // namespace mindspore

View File

@ -39,7 +39,10 @@ void DumpMemToFile(const std::string &file_path, const device::DeviceAddress &ad
const TypeId &type, bool trans_flag = false);
// Get time stamp since epoch in microseconds
uint64_t GetTimeStamp();
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope);
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const std::string &separator = "--");
// dump target string into file
void DumpToFile(const std::string &file_name, const std::string &dump_str);
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_UTILS_H_

View File

@ -19,6 +19,8 @@
#include <unistd.h>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <vector>
#include "debug/data_dump/dump_json_parser.h"
#include "common/trans.h"
@ -30,6 +32,7 @@
#include "utils/config_manager.h"
#include "utils/file_utils.h"
#include "debug/data_dump/tensor_stat_dump.h"
#include "abstract/utils.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debug_services.h"
#include "debug/tensor_load.h"
@ -37,6 +40,60 @@
#endif
namespace mindspore {
#ifdef ENABLE_D
using ProtoFormat = debugger::dump::OutputFormat;
using ProtoDataType = debugger::dump::OutputDataType;
constexpr int kDhaAtomicAddInfoSize = 128;
constexpr int kL2AtomicAddInfoSize = 128;
constexpr int kAiCoreInfoSize = 256;
constexpr int kDhaAtomicAddStatusSize = 256;
constexpr int kL2AtomicAddStatusSize = 256;
constexpr int kUint64Size = sizeof(uint64_t);
const std::set<std::pair<std::string, std::string>> kSuppTransFormatPair = {
// {device format, host format}
{kOpFormat_FRAC_Z, kOpFormat_NCHW}, {kOpFormat_FRAC_NZ, kOpFormat_NCHW},
{kOpFormat_NC1HWC0, kOpFormat_NCHW}, {kOpFormat_C1HWNCoC0, kOpFormat_NCHW},
{kOpFormat_NC1HWC0_C04, kOpFormat_NCHW}, {kOpFormat_NDC1HWC0, kOpFormat_NCHW},
{kOpFormat_FRACTAL_Z_3D, kOpFormat_NCHW}};
const std::map<ProtoFormat, std::string> kFormatToStringMap = {
{ProtoFormat::FORMAT_NCHW, kOpFormat_NCHW},
{ProtoFormat::FORMAT_NHWC, kOpFormat_NHWC},
{ProtoFormat::FORMAT_ND, kOpFormat_ND},
{ProtoFormat::FORMAT_NC1HWC0, kOpFormat_NC1HWC0},
{ProtoFormat::FORMAT_FRACTAL_Z, kOpFormat_FRAC_Z},
{ProtoFormat::FORMAT_NC1HWC0_C04, kOpFormat_NC1HWC0_C04},
{ProtoFormat::FORMAT_FRACTAL_Z_C04, kOpFormat_FRACTAL_Z_C04},
{ProtoFormat::FORMAT_NC1KHKWHWC0, kOpFormat_NC1KHKWHWC0},
{ProtoFormat::FORMAT_HWCN, kOpFormat_HWCN},
{ProtoFormat::FORMAT_NDHWC, kOpFormat_NDHWC},
{ProtoFormat::FORMAT_NCDHW, kOpFormat_NCDHW},
{ProtoFormat::FORMAT_DHWCN, kOpFormat_DHWCN},
{ProtoFormat::FORMAT_DHWNC, kOpFormat_DHWNC},
{ProtoFormat::FORMAT_NDC1HWC0, kOpFormat_NDC1HWC0},
{ProtoFormat::FORMAT_FRACTAL_Z_3D, kOpFormat_FRACTAL_Z_3D},
{ProtoFormat::FORMAT_C1HWNCoC0, kOpFormat_C1HWNCoC0},
{ProtoFormat::FORMAT_FRACTAL_NZ, kOpFormat_FRAC_NZ},
{ProtoFormat::FORMAT_FRACTAL_ZN_LSTM, kOpFormat_FRACTAL_ZN_LSTM}};
const std::map<ProtoDataType, mindspore::TypeId> kDataTypetoMSTypeMap = {
{ProtoDataType::DT_UNDEFINED, mindspore::TypeId::kTypeUnknown},
{ProtoDataType::DT_FLOAT, mindspore::TypeId::kNumberTypeFloat32},
{ProtoDataType::DT_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
{ProtoDataType::DT_INT8, mindspore::TypeId::kNumberTypeInt8},
{ProtoDataType::DT_UINT8, mindspore::TypeId::kNumberTypeUInt8},
{ProtoDataType::DT_INT16, mindspore::TypeId::kNumberTypeInt16},
{ProtoDataType::DT_UINT16, mindspore::TypeId::kNumberTypeUInt16},
{ProtoDataType::DT_INT32, mindspore::TypeId::kNumberTypeInt32},
{ProtoDataType::DT_INT64, mindspore::TypeId::kNumberTypeInt64},
{ProtoDataType::DT_UINT32, mindspore::TypeId::kNumberTypeUInt32},
{ProtoDataType::DT_UINT64, mindspore::TypeId::kNumberTypeUInt64},
{ProtoDataType::DT_BOOL, mindspore::TypeId::kNumberTypeBool},
{ProtoDataType::DT_DOUBLE, mindspore::TypeId::kNumberTypeFloat64},
{ProtoDataType::DT_STRING, mindspore::TypeId::kObjectTypeString}};
#endif
bool E2eDump::IsDeviceTargetGPU() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
@ -443,4 +500,178 @@ bool E2eDump::DumpDirExists(const std::string &dump_path) {
}
return false;
}
#ifdef ENABLE_D
void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
char *data_ptr) {
// dump input tensors
std::vector<debugger::dump::OpInput> input_tensors(dump_data.input().begin(), dump_data.input().end());
uint64_t offset = 0;
std::string in_path = dump_path + ".input.";
for (uint32_t slot = 0; slot < input_tensors.size(); slot++) {
auto in_tensor = input_tensors[slot];
std::string in_slot_path = in_path + std::to_string(slot) + ".";
auto succ = ConvertFormatForTensorAndDump(in_slot_path, in_tensor, data_ptr + offset);
if (!succ) {
MS_LOG(INFO) << "Failed to convert format for tensor " << in_slot_path;
}
offset += in_tensor.size();
}
// dump output tensors
std::vector<debugger::dump::OpOutput> output_tensors(dump_data.output().begin(), dump_data.output().end());
std::string out_path = dump_path + ".output.";
for (uint32_t slot = 0; slot < output_tensors.size(); slot++) {
auto out_tensor = output_tensors[slot];
std::string out_slot_path = out_path + std::to_string(slot) + ".";
auto succ = ConvertFormatForTensorAndDump(out_slot_path, out_tensor, data_ptr + offset);
if (!succ) {
MS_LOG(INFO) << "Failed to convert format for tensor " << out_slot_path;
}
offset += out_tensor.size();
}
}
template <typename T>
bool E2eDump::ConvertFormatForTensorAndDump(std::string dump_path, const T &tensor, char *data_ptr) {
// get format
auto iter_fmt = kFormatToStringMap.find(tensor.format());
if (iter_fmt == kFormatToStringMap.end()) {
MS_LOG(INFO) << "Unsupported tensor format " << iter_fmt->second << " for tensor " << dump_path;
return false;
}
std::string device_format = iter_fmt->second;
// get data type
auto iter_dtype = kDataTypetoMSTypeMap.find(tensor.data_type());
if (iter_dtype == kDataTypetoMSTypeMap.end()) {
MS_LOG(INFO) << "Unsupported tensor type " << iter_dtype->second << " for tensor " << dump_path;
return false;
}
auto src_type = iter_dtype->second;
// get host shape
std::vector<size_t> device_shape;
(void)std::copy(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(device_shape));
std::vector<size_t> host_shape;
(void)std::copy(tensor.original_shape().dim().begin(), tensor.original_shape().dim().end(),
std::back_inserter(host_shape));
ShapeVector shape_to;
(void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape_to), SizeToLong);
size_t data_size = (size_t)tensor.size();
bool trans_success = false;
auto trans_buf = std::vector<uint8_t>(data_size);
// convert format to host format. It can be either NCHW or ND (non 4-dimemsions).
const uint8_t kNumFourDim = 4;
std::string host_format;
if (host_shape.size() == kNumFourDim) {
host_format = kOpFormat_NCHW;
} else {
host_format = kOpFormat_ND;
}
if (device_format != host_format) {
auto iter = kSuppTransFormatPair.find(std::make_pair(device_format, host_format));
if (iter == kSuppTransFormatPair.end()) {
MS_LOG(INFO) << "Do not support convert from format " << device_format << " to " << host_format << " for tensor "
<< dump_path;
} else {
const trans::FormatArgs format_args{data_ptr, data_size, host_format, device_format,
host_shape, device_shape, src_type};
auto group = tensor.sub_format() > 1 ? tensor.sub_format() : 1;
trans_success = trans::TransFormatFromDeviceToHost(format_args, trans_buf.data(), group);
if (!trans_success) {
MS_LOG(ERROR) << "Trans format failed.";
}
}
}
// dump tensor data into npy file
bool dump_success = false;
if (trans_success) {
dump_path += host_format;
dump_success = DumpJsonParser::DumpToFile(dump_path, trans_buf.data(), data_size, shape_to, src_type);
} else {
dump_path += device_format;
dump_success = DumpJsonParser::DumpToFile(dump_path, data_ptr, data_size, shape_to, src_type);
}
return dump_success;
}
uint64_t UnpackUint64Value(char *ptr) {
#if defined(__APPLE__)
return *reinterpret_cast<const uint64_t *>(ptr);
#else
return le16toh(*reinterpret_cast<const uint64_t *>(ptr));
#endif
}
std::string IntToHexString(const uint64_t value) {
std::stringstream ss;
ss << "0x" << std::hex << value;
return ss.str();
}
nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
uint32_t index = 0;
uint64_t model_id = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t stream_id = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t task_id = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t task_type = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t pc_start = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t para_base = UnpackUint64Value(data_ptr + index);
nlohmann::json overflow_info;
overflow_info["model_id"] = model_id;
overflow_info["stream_id"] = stream_id;
overflow_info["task_id"] = task_id;
overflow_info["task_type"] = task_type;
overflow_info["pc_start"] = IntToHexString(pc_start);
overflow_info["para_base"] = IntToHexString(para_base);
return overflow_info;
}
void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
char *data_ptr) {
std::string out_path = dump_path + ".output.";
std::vector<debugger::dump::OpOutput> op_debug(dump_data.output().begin(), dump_data.output().end());
for (uint32_t slot = 0; slot < op_debug.size(); slot++) {
uint32_t index = 0;
// parse DHA Atomic Add info
nlohmann::json dha_atomic_add_info = ParseOverflowInfo(data_ptr + index);
index += kDhaAtomicAddInfoSize;
// parse L2 Atomic Add info
nlohmann::json l2_atomic_add_info = ParseOverflowInfo(data_ptr + index);
index += kL2AtomicAddInfoSize;
// parse AICore info
nlohmann::json ai_core_info = ParseOverflowInfo(data_ptr + index);
index += kAiCoreInfoSize;
// parse DHA Atomic Add status
dha_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
index += kDhaAtomicAddStatusSize;
// parse L2 Atomic Add status
l2_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
index += kL2AtomicAddStatusSize;
// parse AICore status
uint64_t kernel_code = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t block_idx = UnpackUint64Value(data_ptr + index);
index += kUint64Size;
uint64_t status = UnpackUint64Value(data_ptr + index);
ai_core_info["kernel_code"] = IntToHexString(kernel_code);
ai_core_info["block_idx"] = block_idx;
ai_core_info["status"] = status;
nlohmann::json opdebug_data;
opdebug_data["DHA Atomic Add"] = dha_atomic_add_info;
opdebug_data["L2 Atomic Add"] = l2_atomic_add_info;
opdebug_data["AI Core"] = ai_core_info;
// save json to file
DumpToFile(out_path + std::to_string(slot) + ".json", opdebug_data.dump());
}
}
#endif // ENABLE_D
} // namespace mindspore

View File

@ -26,6 +26,9 @@
#include "runtime/device/device_address.h"
#include "debug/data_dump/dump_json_parser.h"
#include "debug/data_dump/dump_utils.h"
#ifdef ENABLE_D
#include "proto/dump_data.pb.h"
#endif
#ifndef ENABLE_DEBUGGER
class Debugger;
@ -59,6 +62,13 @@ class E2eDump {
static bool DumpDirExists(const std::string &dump_path);
#ifdef ENABLE_D
static void DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data, char *data_ptr);
static void DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
char *data_ptr);
#endif
private:
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger);
@ -81,6 +91,13 @@ class E2eDump {
static void UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_mode);
#ifdef ENABLE_D
static nlohmann::json ParseOverflowInfo(char *data_ptr);
template <typename T>
static bool ConvertFormatForTensorAndDump(std::string dump_path, const T &tensor, char *data_ptr);
#endif
inline static unsigned int starting_graph_id = INT32_MAX;
};
} // namespace mindspore

View File

@ -1535,4 +1535,16 @@ bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
return debug_services_->TensorExistsInCurrent(tensor_name);
}
#ifdef ENABLE_D
std::shared_ptr<DumpDataBuilder> Debugger::LoadDumpDataBuilder(const std::string &node_name) {
auto iter = dump_data_construct_map_.find(node_name);
if (iter == dump_data_construct_map_.end()) {
dump_data_construct_map_[node_name] = std::make_shared<DumpDataBuilder>();
}
return dump_data_construct_map_[node_name];
}
void Debugger::ClearDumpDataBuilder(const std::string &node_name) { dump_data_construct_map_.erase(node_name); }
#endif
} // namespace mindspore

View File

@ -26,6 +26,9 @@
#include "debug/debugger/grpc_client.h"
#include "debug/debug_services.h"
#include "common/trans.h"
#ifdef ENABLE_D
#include "debug/dump_data_builder.h"
#endif
using debugger::Chunk;
using debugger::DataType;
@ -170,6 +173,12 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// check if dump using debugger backend is enabled
bool CheckDebuggerDumpEnabled() const;
#ifdef ENABLE_D
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);
void ClearDumpDataBuilder(const std::string &node_name);
#endif
private:
// private constructor for singleton
Debugger();
@ -289,6 +298,11 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// The vector of graph pointers that have been run in the current step.
std::vector<KernelGraphPtr> graph_ptr_step_vec_;
#ifdef ENABLE_D
// to construct kernel data for async dump, key is the dump path to the node
std::map<std::string, std::shared_ptr<DumpDataBuilder>> dump_data_construct_map_;
#endif
// singleton
static std::mutex instance_lock_;
static std::shared_ptr<Debugger> debugger_;

View File

@ -23,8 +23,12 @@
#include "debug/debugger/debugger.h"
#include "runtime/device/gpu/gpu_device_address.h"
#include "debug/data_dump/dump_json_parser.h"
#ifdef ENABLE_D
#include "debug/dump_data_builder.h"
#endif
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/kernel.h"
#include "debug/data_dump/e2e_dump.h"
using mindspore::kernel::AddressPtr;
using mindspore::kernel::KernelLaunchInfo;
@ -33,8 +37,6 @@ using KernelGraph = mindspore::session::KernelGraph;
using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
namespace mindspore {
static const size_t PARAMETER_OUTPUT_INDEX = 0;
std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
// define a vector containing real output number
std::vector<size_t> real_outputs;
@ -162,4 +164,52 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_
bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
debugger->PostExecuteNode(cnode, last_kernel);
}
#ifdef ENABLE_D
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size) {
MS_LOG(DEBUG) << "ADX DumpDataCallBack is called";
string file_name = dump_chunk->fileName;
uint32_t isLastChunk = dump_chunk->isLastChunk;
// parse chunk header
auto debugger = Debugger::GetInstance();
MS_EXCEPTION_IF_NULL(debugger);
auto dump_data_build = debugger->LoadDumpDataBuilder(file_name);
if (dump_data_build == nullptr) {
MS_LOG(ERROR) << "Failed to load dump data builder for node " << file_name;
return 0;
}
if (!dump_data_build->CopyDumpChunk(dump_chunk)) {
return 1;
}
if (isLastChunk == 1) {
// construct dump data object
debugger::dump::DumpData dump_data;
std::vector<char> data_buf;
if (!dump_data_build->ConstructDumpData(&dump_data, &data_buf)) {
MS_LOG(ERROR) << "Failed to parse data for node " << file_name;
return 0;
}
// convert and save to files
auto separator = file_name.rfind("/");
auto path_name = file_name.substr(0, separator);
auto file_base_name = file_name.substr(separator + 1);
if (file_base_name.rfind("Opdebug.Node_OpDebug.") == 0) {
// save overflow data
E2eDump::DumpOpDebugToFile(file_name, dump_data, data_buf.data());
} else {
// save tensor data
auto op_type = file_base_name.substr(0, file_base_name.find("."));
auto file_base_name_no_scope = GetOpNameWithoutScope(file_base_name, "_");
E2eDump::DumpTensorToFile(path_name + "/" + op_type + "." + file_base_name_no_scope, dump_data, data_buf.data());
}
debugger->ClearDumpDataBuilder(file_name);
}
return 0;
}
#endif
} // namespace mindspore

View File

@ -19,7 +19,11 @@
#include <string>
#include "debug/debugger/debugger.h"
#include "backend/kernel_compiler/kernel.h"
#ifdef ENABLE_D
#include "toolchain/adx_datadump_callback.h"
using Adx::DumpChunk;
#endif
using mindspore::kernel::KernelLaunchInfo;
namespace mindspore {
@ -36,4 +40,8 @@ bool CheckReadData(const CNodePtr &cnode);
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
#ifdef ENABLE_D
// Callback function to dump ascend async mode
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size);
#endif
} // namespace mindspore

View File

@ -0,0 +1,146 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package debugger.dump;
enum OutputDataType {
DT_UNDEFINED = 0;
DT_FLOAT = 1;
DT_FLOAT16 = 2;
DT_INT8 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_UINT16 = 6;
DT_INT32 = 7;
DT_INT64 = 8;
DT_UINT32 = 9;
DT_UINT64 = 10;
DT_BOOL = 11;
DT_DOUBLE = 12;
DT_STRING = 13;
DT_DUAL_SUB_INT8 = 14;
DT_DUAL_SUB_UINT8 = 15;
DT_COMPLEX64 = 16;
DT_COMPLEX128 = 17;
DT_QINT8 = 18;
DT_QINT16 = 19;
DT_QINT32 = 20;
DT_QUINT8 = 21;
DT_QUINT16 = 22;
DT_RESOURCE = 23;
DT_STRING_REF = 24;
DT_DUAL = 25;
}
enum OutputFormat {
FORMAT_NCHW = 0;
FORMAT_NHWC = 1;
FORMAT_ND = 2;
FORMAT_NC1HWC0 = 3;
FORMAT_FRACTAL_Z = 4;
FORMAT_NC1C0HWPAD = 5;
FORMAT_NHWC1C0 = 6;
FORMAT_FSR_NCHW = 7;
FORMAT_FRACTAL_DECONV = 8;
FORMAT_C1HWNC0 = 9;
FORMAT_FRACTAL_DECONV_TRANSPOSE = 10;
FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11;
FORMAT_NC1HWC0_C04 = 12;
FORMAT_FRACTAL_Z_C04 = 13;
FORMAT_CHWN = 14;
FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15;
FORMAT_HWCN = 16;
FORMAT_NC1KHKWHWC0 = 17;
FORMAT_BN_WEIGHT = 18;
FORMAT_FILTER_HWCK = 19;
FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20;
FORMAT_HASHTABLE_LOOKUP_KEYS = 21;
FORMAT_HASHTABLE_LOOKUP_VALUE = 22;
FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23;
FORMAT_HASHTABLE_LOOKUP_HITS = 24;
FORMAT_C1HWNCoC0 = 25;
FORMAT_MD = 26;
FORMAT_NDHWC = 27;
FORMAT_FRACTAL_ZZ = 28;
FORMAT_FRACTAL_NZ = 29;
FORMAT_NCDHW = 30;
FORMAT_DHWCN = 31; // 3D filter input tensor format
FORMAT_NDC1HWC0 = 32;
FORMAT_FRACTAL_Z_3D=33;
FORMAT_CN = 34;
FORMAT_NC = 35;
FORMAT_DHWNC = 36;
FORMAT_FRACTAL_Z_3D_TRANSPOSE = 37; // 3D filter(transpose) input tensor format
FORMAT_FRACTAL_ZN_LSTM = 38;
FORMAT_FRACTAL_Z_G = 39;
FORMAT_RESERVED = 40;
// Add new formats definition here
FORMAT_MAX = 0xff;
}
message OriginalOp {
string name = 1;
uint32 output_index = 2;
OutputDataType data_type = 3;
OutputFormat format = 4;
}
message Shape {
repeated uint64 dim = 1;
}
message OpOutput {
OutputDataType data_type = 1;
OutputFormat format = 2;
Shape shape = 3;
OriginalOp original_op = 4; // the original op corresponding to the output
bytes data = 5;
uint64 size = 6;
Shape original_shape = 7;
int32 sub_format = 8;
}
message OpInput {
OutputDataType data_type = 1;
OutputFormat format = 2;
Shape shape = 3;
bytes data = 4;
uint64 size = 5;
Shape original_shape = 6;
int32 sub_format = 7;
}
enum BufferType {
L1 = 0;
}
message OpBuffer {
BufferType buffer_type = 1;
bytes data = 2;
uint64 size = 3;
}
message DumpData {
string version = 1;
uint64 dump_time = 2;
repeated OpOutput output = 3;
repeated OpInput input = 4;
repeated OpBuffer buffer = 5;
string op_name = 6;
}

View File

@ -0,0 +1,86 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
#define MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
#include <vector>
#include <string>
#include <iostream>
#include "utils/log_adapter.h"
#ifdef ENABLE_D
#include "proto/dump_data.pb.h"
#include "toolchain/adx_datadump_callback.h"
using Adx::DumpChunk;
#endif
// This class is for building dump data receiving from adx server. Tensor Data for each kernel will be divided in pieces
// and each piece would be wrapped into DumpChunk struct. This class provides function to merge dump chunks and
// construct dump data object.
class DumpDataBuilder {
public:
DumpDataBuilder() {}
~DumpDataBuilder() = default;
#ifdef ENABLE_D
bool CopyDumpChunk(const DumpChunk *dump_chunk) {
try {
uint32_t buf_sz = dump_chunk->bufLen;
std::string buffer_str(reinterpret_cast<const char *>(dump_chunk->dataBuf), buf_sz);
chunk_list_.push_back(buffer_str);
total_sz_ += buf_sz;
} catch (std::bad_alloc &err) {
MS_LOG(ERROR) << "Failed to allocate memory for " << dump_chunk->fileName << ", reason: " << err.what();
return false;
}
return true;
}
bool ConstructDumpData(debugger::dump::DumpData *dump_data_proto, std::vector<char> *data_ptr) {
if (chunk_list_.empty()) {
return false;
}
// merge several chunks into one piece.
std::string dump_proto_str;
dump_proto_str.reserve(total_sz_);
for (auto item : chunk_list_) {
dump_proto_str += item;
}
chunk_list_.clear();
const int8_t header_len_offset = 8;
uint64_t header_len = *reinterpret_cast<const uint64_t *>(dump_proto_str.c_str());
std::string header = dump_proto_str.substr(header_len_offset, header_len);
if (!(*dump_data_proto).ParseFromString(header)) {
MS_LOG(ERROR) << "Failed to parse dump proto file.";
return false;
}
auto data_sz = total_sz_ - header_len_offset - header_len;
data_ptr->resize(data_sz);
auto ret = memcpy_s(data_ptr->data(), data_sz, dump_proto_str.c_str() + header_len_offset + header_len, data_sz);
if (ret != 0) {
MS_LOG(ERROR) << "Failed to get data from Adx";
return false;
}
return true;
}
#endif
private:
std::vector<std::string> chunk_list_;
uint64_t total_sz_{0};
};
#endif // MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_

View File

@ -118,7 +118,10 @@ def generate_dump_json(dump_path, json_file_name, test_key):
elif test_key == "test_Ascend_async_multi_root_graph_dump":
data = async_dump_dict_3
data["common_dump_settings"]["path"] = dump_path
elif test_key == "test_async_dump_file_format":
data = async_dump_dict
data["common_dump_settings"]["path"] = dump_path
data["common_dump_settings"]["file_format"] = "npy"
else:
raise ValueError(
"Failed to generate dump json file. The test name value " + test_key + " is invalid.")

View File

@ -51,17 +51,12 @@ x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump():
def run_async_dump(test_name):
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
dump_path = os.path.join(tmp_dir, 'async_dump')
dump_config_path = os.path.join(tmp_dir, 'async_dump.json')
generate_dump_json(dump_path, dump_config_path, 'test_async_dump')
generate_dump_json(dump_path, dump_config_path, test_name)
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
if os.path.isdir(dump_path):
@ -76,6 +71,35 @@ def test_async_dump():
del os.environ['MINDSPORE_DUMP_CONFIG']
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump():
"""
Feature: async dump on Ascend
Description: test async dump with default file_format value
Expectation: dump data are generated as protobuf file format (suffix with timestamp)
"""
run_async_dump("test_async_dump")
@pytest.mark.skip(reason="wait for run package updates in Dec 01")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump_file_format():
"""
Feature: async dump on Ascend in npy format
Description: test async dump with file_format is configured as npy
Expectation: dump data are generated as npy file format
"""
run_async_dump("test_async_dump_file_format")
def run_e2e_dump():
if sys.platform != 'linux':
return