forked from mindspore-Ecosystem/mindspore
Support Cann callback api for ascend async dump
This commit is contained in:
parent
3fc995a6ae
commit
07b653103e
|
@ -1 +1 @@
|
|||
Subproject commit fd9e9a96f97960ba46c21352b0df7719d3a0a3f7
|
||||
Subproject commit 8f7df5fd1f7a70233e2aeaa6155dcd76b93e0b11
|
|
@ -168,8 +168,11 @@ if(ENABLE_DEBUGGER)
|
|||
ms_protobuf_generate(DEBUGGER_PROTO_SRCS DEBUGGER_PROTO_HDRS ${DEBUGGER_PROTO_LIST})
|
||||
file(GLOB_RECURSE DEBUGGER_GRPC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_grpc.proto")
|
||||
ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
|
||||
file(GLOB_RECURSE DUMP_DATA_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/dump_data.proto")
|
||||
ms_protobuf_generate(DUMP_DATA_PROTO_SRCS DUMP_DATA_PROTO_HDRS ${DUMP_DATA_PROTO_LIST})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_DATA_PROTO_SRCS})
|
||||
endif()
|
||||
|
||||
if(ENABLE_DUMP_PROTO)
|
||||
|
|
|
@ -42,6 +42,7 @@ constexpr auto kTransFlag = "trans_flag";
|
|||
constexpr auto kStatisticDump = "statistic";
|
||||
constexpr auto kTensorDump = "tensor";
|
||||
constexpr auto kFullDump = "full";
|
||||
constexpr auto kFileFormat = "file_format";
|
||||
constexpr auto kDumpInputAndOutput = 0;
|
||||
constexpr auto kDumpInputOnly = 1;
|
||||
constexpr auto kDumpOutputOnly = 2;
|
||||
|
@ -274,6 +275,8 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) {
|
|||
ParseSupportDevice(*support_device);
|
||||
if (!e2e_dump_enabled_) {
|
||||
ParseOpDebugMode(*op_debug_mode);
|
||||
ParseFileFormat(
|
||||
*common_dump_settings); // Pass in the whole json string to parse because file_format field is optional.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -505,6 +508,23 @@ void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) {
|
|||
}
|
||||
}
|
||||
|
||||
void DumpJsonParser::ParseFileFormat(const nlohmann::json &content) {
|
||||
auto iter = content.find(kFileFormat);
|
||||
if (iter == content.end()) {
|
||||
file_format_ = JsonFileFormat::FORMAT_BIN;
|
||||
} else {
|
||||
CheckJsonStringType(*iter, kFileFormat);
|
||||
std::string file_format = *iter;
|
||||
const std::map<std::string, JsonFileFormat> str_to_fmt_enum = {{"bin", JsonFileFormat::FORMAT_BIN},
|
||||
{"npy", JsonFileFormat::FORMAT_NPY}};
|
||||
if (str_to_fmt_enum.find(file_format) == str_to_fmt_enum.end()) {
|
||||
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. 'file_format' should be either 'npy' or 'bin', but got: "
|
||||
<< file_format;
|
||||
}
|
||||
file_format_ = str_to_fmt_enum.at(file_format);
|
||||
}
|
||||
}
|
||||
|
||||
void DumpJsonParser::JsonConfigToString() {
|
||||
std::string cur_config;
|
||||
cur_config.append("dump_mode:");
|
||||
|
|
|
@ -59,6 +59,7 @@ class DumpJsonParser {
|
|||
bool trans_flag() const { return trans_flag_; }
|
||||
uint32_t cur_dump_iter() const { return cur_dump_iter_; }
|
||||
void UpdateDumpIter() { ++cur_dump_iter_; }
|
||||
bool FileFormatIsNpy() const { return file_format_ == JsonFileFormat::FORMAT_NPY; }
|
||||
bool GetIterDumpFlag() const;
|
||||
bool InputNeedDump() const;
|
||||
bool OutputNeedDump() const;
|
||||
|
@ -70,6 +71,7 @@ class DumpJsonParser {
|
|||
void SaveGraph(session::KernelGraph *graph) { (void)graphs_.emplace_back(graph); }
|
||||
const std::vector<session::KernelGraph *> &graphs() const { return graphs_; }
|
||||
enum JsonDumpMode { DUMP_ALL = 0, DUMP_KERNEL = 1, DUMP_KERNELS_WITH_FLAG = 2 };
|
||||
enum JsonFileFormat { FORMAT_NPY = 0, FORMAT_BIN = 1 };
|
||||
|
||||
private:
|
||||
DumpJsonParser() = default;
|
||||
|
@ -89,6 +91,7 @@ class DumpJsonParser {
|
|||
std::vector<std::string> cell_dump_kernels_;
|
||||
std::set<uint32_t> support_devices_;
|
||||
uint32_t op_debug_mode_{0};
|
||||
JsonFileFormat file_format_;
|
||||
bool trans_flag_{false};
|
||||
uint32_t cur_dump_iter_{0};
|
||||
bool already_parsed_{false};
|
||||
|
@ -112,6 +115,7 @@ class DumpJsonParser {
|
|||
void ParseSupportDevice(const nlohmann::json &content);
|
||||
bool ParseEnable(const nlohmann::json &content);
|
||||
void ParseOpDebugMode(const nlohmann::json &content);
|
||||
void ParseFileFormat(const nlohmann::json &content);
|
||||
|
||||
void JudgeDumpEnabled();
|
||||
void JsonConfigToString();
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "utils/utils.h"
|
||||
#include "debug/common.h"
|
||||
|
||||
namespace mindspore {
|
||||
uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
|
||||
|
@ -137,8 +139,7 @@ uint64_t GetTimeStamp() {
|
|||
return timestamp;
|
||||
}
|
||||
|
||||
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope) {
|
||||
const std::string separator("--");
|
||||
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const std::string &separator) {
|
||||
std::size_t found = fullname_with_scope.rfind(separator);
|
||||
std::string op_name;
|
||||
if (found != std::string::npos) {
|
||||
|
@ -146,4 +147,30 @@ std::string GetOpNameWithoutScope(const std::string &fullname_with_scope) {
|
|||
}
|
||||
return op_name;
|
||||
}
|
||||
|
||||
void DumpToFile(const std::string &file_name, const std::string &dump_str) {
|
||||
if (dump_str.empty()) {
|
||||
MS_LOG(ERROR) << "Failed to dump empty tensor data.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto real_path = Common::CreatePrefixPath(file_name);
|
||||
if (!real_path.has_value()) {
|
||||
MS_LOG(ERROR) << "CreatePrefixPath failed.";
|
||||
return;
|
||||
}
|
||||
std::string real_path_str = real_path.value();
|
||||
ChangeFileMode(real_path_str, S_IWUSR);
|
||||
std::ofstream file(real_path_str, std::ofstream::out | std::ofstream::trunc);
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(EXCEPTION) << "Open file " << real_path_str << "failed: " << ErrnoToString(errno);
|
||||
}
|
||||
file << dump_str;
|
||||
if (file.bad()) {
|
||||
file.close();
|
||||
MS_LOG(EXCEPTION) << "Dump string to file " << real_path_str << " failed: " << ErrnoToString(errno);
|
||||
}
|
||||
file.close();
|
||||
ChangeFileMode(real_path_str, S_IRUSR);
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -39,7 +39,10 @@ void DumpMemToFile(const std::string &file_path, const device::DeviceAddress &ad
|
|||
const TypeId &type, bool trans_flag = false);
|
||||
// Get time stamp since epoch in microseconds
|
||||
uint64_t GetTimeStamp();
|
||||
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope);
|
||||
std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const std::string &separator = "--");
|
||||
|
||||
// dump target string into file
|
||||
void DumpToFile(const std::string &file_name, const std::string &dump_str);
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_UTILS_H_
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "common/trans.h"
|
||||
|
@ -30,6 +32,7 @@
|
|||
#include "utils/config_manager.h"
|
||||
#include "utils/file_utils.h"
|
||||
#include "debug/data_dump/tensor_stat_dump.h"
|
||||
#include "abstract/utils.h"
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debug_services.h"
|
||||
#include "debug/tensor_load.h"
|
||||
|
@ -37,6 +40,60 @@
|
|||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
#ifdef ENABLE_D
|
||||
using ProtoFormat = debugger::dump::OutputFormat;
|
||||
using ProtoDataType = debugger::dump::OutputDataType;
|
||||
|
||||
constexpr int kDhaAtomicAddInfoSize = 128;
|
||||
constexpr int kL2AtomicAddInfoSize = 128;
|
||||
constexpr int kAiCoreInfoSize = 256;
|
||||
constexpr int kDhaAtomicAddStatusSize = 256;
|
||||
constexpr int kL2AtomicAddStatusSize = 256;
|
||||
constexpr int kUint64Size = sizeof(uint64_t);
|
||||
const std::set<std::pair<std::string, std::string>> kSuppTransFormatPair = {
|
||||
// {device format, host format}
|
||||
{kOpFormat_FRAC_Z, kOpFormat_NCHW}, {kOpFormat_FRAC_NZ, kOpFormat_NCHW},
|
||||
{kOpFormat_NC1HWC0, kOpFormat_NCHW}, {kOpFormat_C1HWNCoC0, kOpFormat_NCHW},
|
||||
{kOpFormat_NC1HWC0_C04, kOpFormat_NCHW}, {kOpFormat_NDC1HWC0, kOpFormat_NCHW},
|
||||
{kOpFormat_FRACTAL_Z_3D, kOpFormat_NCHW}};
|
||||
|
||||
const std::map<ProtoFormat, std::string> kFormatToStringMap = {
|
||||
{ProtoFormat::FORMAT_NCHW, kOpFormat_NCHW},
|
||||
{ProtoFormat::FORMAT_NHWC, kOpFormat_NHWC},
|
||||
{ProtoFormat::FORMAT_ND, kOpFormat_ND},
|
||||
{ProtoFormat::FORMAT_NC1HWC0, kOpFormat_NC1HWC0},
|
||||
{ProtoFormat::FORMAT_FRACTAL_Z, kOpFormat_FRAC_Z},
|
||||
{ProtoFormat::FORMAT_NC1HWC0_C04, kOpFormat_NC1HWC0_C04},
|
||||
{ProtoFormat::FORMAT_FRACTAL_Z_C04, kOpFormat_FRACTAL_Z_C04},
|
||||
{ProtoFormat::FORMAT_NC1KHKWHWC0, kOpFormat_NC1KHKWHWC0},
|
||||
{ProtoFormat::FORMAT_HWCN, kOpFormat_HWCN},
|
||||
{ProtoFormat::FORMAT_NDHWC, kOpFormat_NDHWC},
|
||||
{ProtoFormat::FORMAT_NCDHW, kOpFormat_NCDHW},
|
||||
{ProtoFormat::FORMAT_DHWCN, kOpFormat_DHWCN},
|
||||
{ProtoFormat::FORMAT_DHWNC, kOpFormat_DHWNC},
|
||||
{ProtoFormat::FORMAT_NDC1HWC0, kOpFormat_NDC1HWC0},
|
||||
{ProtoFormat::FORMAT_FRACTAL_Z_3D, kOpFormat_FRACTAL_Z_3D},
|
||||
{ProtoFormat::FORMAT_C1HWNCoC0, kOpFormat_C1HWNCoC0},
|
||||
{ProtoFormat::FORMAT_FRACTAL_NZ, kOpFormat_FRAC_NZ},
|
||||
{ProtoFormat::FORMAT_FRACTAL_ZN_LSTM, kOpFormat_FRACTAL_ZN_LSTM}};
|
||||
|
||||
const std::map<ProtoDataType, mindspore::TypeId> kDataTypetoMSTypeMap = {
|
||||
{ProtoDataType::DT_UNDEFINED, mindspore::TypeId::kTypeUnknown},
|
||||
{ProtoDataType::DT_FLOAT, mindspore::TypeId::kNumberTypeFloat32},
|
||||
{ProtoDataType::DT_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
|
||||
{ProtoDataType::DT_INT8, mindspore::TypeId::kNumberTypeInt8},
|
||||
{ProtoDataType::DT_UINT8, mindspore::TypeId::kNumberTypeUInt8},
|
||||
{ProtoDataType::DT_INT16, mindspore::TypeId::kNumberTypeInt16},
|
||||
{ProtoDataType::DT_UINT16, mindspore::TypeId::kNumberTypeUInt16},
|
||||
{ProtoDataType::DT_INT32, mindspore::TypeId::kNumberTypeInt32},
|
||||
{ProtoDataType::DT_INT64, mindspore::TypeId::kNumberTypeInt64},
|
||||
{ProtoDataType::DT_UINT32, mindspore::TypeId::kNumberTypeUInt32},
|
||||
{ProtoDataType::DT_UINT64, mindspore::TypeId::kNumberTypeUInt64},
|
||||
{ProtoDataType::DT_BOOL, mindspore::TypeId::kNumberTypeBool},
|
||||
{ProtoDataType::DT_DOUBLE, mindspore::TypeId::kNumberTypeFloat64},
|
||||
{ProtoDataType::DT_STRING, mindspore::TypeId::kObjectTypeString}};
|
||||
#endif
|
||||
|
||||
bool E2eDump::IsDeviceTargetGPU() {
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
|
@ -443,4 +500,178 @@ bool E2eDump::DumpDirExists(const std::string &dump_path) {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_D
|
||||
void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
|
||||
char *data_ptr) {
|
||||
// dump input tensors
|
||||
std::vector<debugger::dump::OpInput> input_tensors(dump_data.input().begin(), dump_data.input().end());
|
||||
uint64_t offset = 0;
|
||||
std::string in_path = dump_path + ".input.";
|
||||
for (uint32_t slot = 0; slot < input_tensors.size(); slot++) {
|
||||
auto in_tensor = input_tensors[slot];
|
||||
std::string in_slot_path = in_path + std::to_string(slot) + ".";
|
||||
auto succ = ConvertFormatForTensorAndDump(in_slot_path, in_tensor, data_ptr + offset);
|
||||
if (!succ) {
|
||||
MS_LOG(INFO) << "Failed to convert format for tensor " << in_slot_path;
|
||||
}
|
||||
offset += in_tensor.size();
|
||||
}
|
||||
|
||||
// dump output tensors
|
||||
std::vector<debugger::dump::OpOutput> output_tensors(dump_data.output().begin(), dump_data.output().end());
|
||||
std::string out_path = dump_path + ".output.";
|
||||
for (uint32_t slot = 0; slot < output_tensors.size(); slot++) {
|
||||
auto out_tensor = output_tensors[slot];
|
||||
std::string out_slot_path = out_path + std::to_string(slot) + ".";
|
||||
auto succ = ConvertFormatForTensorAndDump(out_slot_path, out_tensor, data_ptr + offset);
|
||||
if (!succ) {
|
||||
MS_LOG(INFO) << "Failed to convert format for tensor " << out_slot_path;
|
||||
}
|
||||
offset += out_tensor.size();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool E2eDump::ConvertFormatForTensorAndDump(std::string dump_path, const T &tensor, char *data_ptr) {
|
||||
// get format
|
||||
auto iter_fmt = kFormatToStringMap.find(tensor.format());
|
||||
if (iter_fmt == kFormatToStringMap.end()) {
|
||||
MS_LOG(INFO) << "Unsupported tensor format " << iter_fmt->second << " for tensor " << dump_path;
|
||||
return false;
|
||||
}
|
||||
std::string device_format = iter_fmt->second;
|
||||
// get data type
|
||||
auto iter_dtype = kDataTypetoMSTypeMap.find(tensor.data_type());
|
||||
if (iter_dtype == kDataTypetoMSTypeMap.end()) {
|
||||
MS_LOG(INFO) << "Unsupported tensor type " << iter_dtype->second << " for tensor " << dump_path;
|
||||
return false;
|
||||
}
|
||||
auto src_type = iter_dtype->second;
|
||||
// get host shape
|
||||
std::vector<size_t> device_shape;
|
||||
(void)std::copy(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(device_shape));
|
||||
std::vector<size_t> host_shape;
|
||||
(void)std::copy(tensor.original_shape().dim().begin(), tensor.original_shape().dim().end(),
|
||||
std::back_inserter(host_shape));
|
||||
ShapeVector shape_to;
|
||||
(void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(shape_to), SizeToLong);
|
||||
size_t data_size = (size_t)tensor.size();
|
||||
|
||||
bool trans_success = false;
|
||||
auto trans_buf = std::vector<uint8_t>(data_size);
|
||||
// convert format to host format. It can be either NCHW or ND (non 4-dimemsions).
|
||||
const uint8_t kNumFourDim = 4;
|
||||
std::string host_format;
|
||||
if (host_shape.size() == kNumFourDim) {
|
||||
host_format = kOpFormat_NCHW;
|
||||
} else {
|
||||
host_format = kOpFormat_ND;
|
||||
}
|
||||
if (device_format != host_format) {
|
||||
auto iter = kSuppTransFormatPair.find(std::make_pair(device_format, host_format));
|
||||
if (iter == kSuppTransFormatPair.end()) {
|
||||
MS_LOG(INFO) << "Do not support convert from format " << device_format << " to " << host_format << " for tensor "
|
||||
<< dump_path;
|
||||
} else {
|
||||
const trans::FormatArgs format_args{data_ptr, data_size, host_format, device_format,
|
||||
host_shape, device_shape, src_type};
|
||||
auto group = tensor.sub_format() > 1 ? tensor.sub_format() : 1;
|
||||
trans_success = trans::TransFormatFromDeviceToHost(format_args, trans_buf.data(), group);
|
||||
if (!trans_success) {
|
||||
MS_LOG(ERROR) << "Trans format failed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
// dump tensor data into npy file
|
||||
bool dump_success = false;
|
||||
if (trans_success) {
|
||||
dump_path += host_format;
|
||||
dump_success = DumpJsonParser::DumpToFile(dump_path, trans_buf.data(), data_size, shape_to, src_type);
|
||||
} else {
|
||||
dump_path += device_format;
|
||||
dump_success = DumpJsonParser::DumpToFile(dump_path, data_ptr, data_size, shape_to, src_type);
|
||||
}
|
||||
return dump_success;
|
||||
}
|
||||
|
||||
uint64_t UnpackUint64Value(char *ptr) {
|
||||
#if defined(__APPLE__)
|
||||
return *reinterpret_cast<const uint64_t *>(ptr);
|
||||
#else
|
||||
return le16toh(*reinterpret_cast<const uint64_t *>(ptr));
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string IntToHexString(const uint64_t value) {
|
||||
std::stringstream ss;
|
||||
ss << "0x" << std::hex << value;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
|
||||
uint32_t index = 0;
|
||||
uint64_t model_id = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t stream_id = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t task_id = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t task_type = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t pc_start = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t para_base = UnpackUint64Value(data_ptr + index);
|
||||
|
||||
nlohmann::json overflow_info;
|
||||
overflow_info["model_id"] = model_id;
|
||||
overflow_info["stream_id"] = stream_id;
|
||||
overflow_info["task_id"] = task_id;
|
||||
overflow_info["task_type"] = task_type;
|
||||
overflow_info["pc_start"] = IntToHexString(pc_start);
|
||||
overflow_info["para_base"] = IntToHexString(para_base);
|
||||
return overflow_info;
|
||||
}
|
||||
|
||||
void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
|
||||
char *data_ptr) {
|
||||
std::string out_path = dump_path + ".output.";
|
||||
std::vector<debugger::dump::OpOutput> op_debug(dump_data.output().begin(), dump_data.output().end());
|
||||
for (uint32_t slot = 0; slot < op_debug.size(); slot++) {
|
||||
uint32_t index = 0;
|
||||
// parse DHA Atomic Add info
|
||||
nlohmann::json dha_atomic_add_info = ParseOverflowInfo(data_ptr + index);
|
||||
index += kDhaAtomicAddInfoSize;
|
||||
// parse L2 Atomic Add info
|
||||
nlohmann::json l2_atomic_add_info = ParseOverflowInfo(data_ptr + index);
|
||||
index += kL2AtomicAddInfoSize;
|
||||
// parse AICore info
|
||||
nlohmann::json ai_core_info = ParseOverflowInfo(data_ptr + index);
|
||||
index += kAiCoreInfoSize;
|
||||
// parse DHA Atomic Add status
|
||||
dha_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
|
||||
index += kDhaAtomicAddStatusSize;
|
||||
// parse L2 Atomic Add status
|
||||
l2_atomic_add_info["status"] = UnpackUint64Value(data_ptr + index);
|
||||
index += kL2AtomicAddStatusSize;
|
||||
// parse AICore status
|
||||
uint64_t kernel_code = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t block_idx = UnpackUint64Value(data_ptr + index);
|
||||
index += kUint64Size;
|
||||
uint64_t status = UnpackUint64Value(data_ptr + index);
|
||||
ai_core_info["kernel_code"] = IntToHexString(kernel_code);
|
||||
ai_core_info["block_idx"] = block_idx;
|
||||
ai_core_info["status"] = status;
|
||||
|
||||
nlohmann::json opdebug_data;
|
||||
opdebug_data["DHA Atomic Add"] = dha_atomic_add_info;
|
||||
opdebug_data["L2 Atomic Add"] = l2_atomic_add_info;
|
||||
opdebug_data["AI Core"] = ai_core_info;
|
||||
|
||||
// save json to file
|
||||
DumpToFile(out_path + std::to_string(slot) + ".json", opdebug_data.dump());
|
||||
}
|
||||
}
|
||||
#endif // ENABLE_D
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#include "runtime/device/device_address.h"
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "debug/data_dump/dump_utils.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "proto/dump_data.pb.h"
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_DEBUGGER
|
||||
class Debugger;
|
||||
|
@ -59,6 +62,13 @@ class E2eDump {
|
|||
|
||||
static bool DumpDirExists(const std::string &dump_path);
|
||||
|
||||
#ifdef ENABLE_D
|
||||
static void DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data, char *data_ptr);
|
||||
|
||||
static void DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
|
||||
char *data_ptr);
|
||||
#endif
|
||||
|
||||
private:
|
||||
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger);
|
||||
|
||||
|
@ -81,6 +91,13 @@ class E2eDump {
|
|||
|
||||
static void UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_mode);
|
||||
|
||||
#ifdef ENABLE_D
|
||||
static nlohmann::json ParseOverflowInfo(char *data_ptr);
|
||||
|
||||
template <typename T>
|
||||
static bool ConvertFormatForTensorAndDump(std::string dump_path, const T &tensor, char *data_ptr);
|
||||
#endif
|
||||
|
||||
inline static unsigned int starting_graph_id = INT32_MAX;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -1535,4 +1535,16 @@ bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
|
|||
return debug_services_->TensorExistsInCurrent(tensor_name);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_D
|
||||
std::shared_ptr<DumpDataBuilder> Debugger::LoadDumpDataBuilder(const std::string &node_name) {
|
||||
auto iter = dump_data_construct_map_.find(node_name);
|
||||
if (iter == dump_data_construct_map_.end()) {
|
||||
dump_data_construct_map_[node_name] = std::make_shared<DumpDataBuilder>();
|
||||
}
|
||||
return dump_data_construct_map_[node_name];
|
||||
}
|
||||
|
||||
void Debugger::ClearDumpDataBuilder(const std::string &node_name) { dump_data_construct_map_.erase(node_name); }
|
||||
#endif
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#include "debug/debugger/grpc_client.h"
|
||||
#include "debug/debug_services.h"
|
||||
#include "common/trans.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "debug/dump_data_builder.h"
|
||||
#endif
|
||||
|
||||
using debugger::Chunk;
|
||||
using debugger::DataType;
|
||||
|
@ -170,6 +173,12 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// check if dump using debugger backend is enabled
|
||||
bool CheckDebuggerDumpEnabled() const;
|
||||
|
||||
#ifdef ENABLE_D
|
||||
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);
|
||||
|
||||
void ClearDumpDataBuilder(const std::string &node_name);
|
||||
#endif
|
||||
|
||||
private:
|
||||
// private constructor for singleton
|
||||
Debugger();
|
||||
|
@ -289,6 +298,11 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// The vector of graph pointers that have been run in the current step.
|
||||
std::vector<KernelGraphPtr> graph_ptr_step_vec_;
|
||||
|
||||
#ifdef ENABLE_D
|
||||
// to construct kernel data for async dump, key is the dump path to the node
|
||||
std::map<std::string, std::shared_ptr<DumpDataBuilder>> dump_data_construct_map_;
|
||||
#endif
|
||||
|
||||
// singleton
|
||||
static std::mutex instance_lock_;
|
||||
static std::shared_ptr<Debugger> debugger_;
|
||||
|
|
|
@ -23,8 +23,12 @@
|
|||
#include "debug/debugger/debugger.h"
|
||||
#include "runtime/device/gpu/gpu_device_address.h"
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "debug/dump_data_builder.h"
|
||||
#endif
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "debug/data_dump/e2e_dump.h"
|
||||
|
||||
using mindspore::kernel::AddressPtr;
|
||||
using mindspore::kernel::KernelLaunchInfo;
|
||||
|
@ -33,8 +37,6 @@ using KernelGraph = mindspore::session::KernelGraph;
|
|||
using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
|
||||
|
||||
namespace mindspore {
|
||||
static const size_t PARAMETER_OUTPUT_INDEX = 0;
|
||||
|
||||
std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
|
||||
// define a vector containing real output number
|
||||
std::vector<size_t> real_outputs;
|
||||
|
@ -162,4 +164,52 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_
|
|||
bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
|
||||
debugger->PostExecuteNode(cnode, last_kernel);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_D
|
||||
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size) {
|
||||
MS_LOG(DEBUG) << "ADX DumpDataCallBack is called";
|
||||
string file_name = dump_chunk->fileName;
|
||||
uint32_t isLastChunk = dump_chunk->isLastChunk;
|
||||
|
||||
// parse chunk header
|
||||
auto debugger = Debugger::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
auto dump_data_build = debugger->LoadDumpDataBuilder(file_name);
|
||||
if (dump_data_build == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to load dump data builder for node " << file_name;
|
||||
return 0;
|
||||
}
|
||||
if (!dump_data_build->CopyDumpChunk(dump_chunk)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (isLastChunk == 1) {
|
||||
// construct dump data object
|
||||
debugger::dump::DumpData dump_data;
|
||||
std::vector<char> data_buf;
|
||||
if (!dump_data_build->ConstructDumpData(&dump_data, &data_buf)) {
|
||||
MS_LOG(ERROR) << "Failed to parse data for node " << file_name;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// convert and save to files
|
||||
auto separator = file_name.rfind("/");
|
||||
auto path_name = file_name.substr(0, separator);
|
||||
auto file_base_name = file_name.substr(separator + 1);
|
||||
if (file_base_name.rfind("Opdebug.Node_OpDebug.") == 0) {
|
||||
// save overflow data
|
||||
E2eDump::DumpOpDebugToFile(file_name, dump_data, data_buf.data());
|
||||
} else {
|
||||
// save tensor data
|
||||
auto op_type = file_base_name.substr(0, file_base_name.find("."));
|
||||
auto file_base_name_no_scope = GetOpNameWithoutScope(file_base_name, "_");
|
||||
E2eDump::DumpTensorToFile(path_name + "/" + op_type + "." + file_base_name_no_scope, dump_data, data_buf.data());
|
||||
}
|
||||
|
||||
debugger->ClearDumpDataBuilder(file_name);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -19,7 +19,11 @@
|
|||
#include <string>
|
||||
#include "debug/debugger/debugger.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "toolchain/adx_datadump_callback.h"
|
||||
|
||||
using Adx::DumpChunk;
|
||||
#endif
|
||||
using mindspore::kernel::KernelLaunchInfo;
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -36,4 +40,8 @@ bool CheckReadData(const CNodePtr &cnode);
|
|||
|
||||
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
|
||||
|
||||
#ifdef ENABLE_D
|
||||
// Callback function to dump ascend async mode
|
||||
int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size);
|
||||
#endif
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package debugger.dump;
|
||||
|
||||
|
||||
enum OutputDataType {
|
||||
DT_UNDEFINED = 0;
|
||||
DT_FLOAT = 1;
|
||||
DT_FLOAT16 = 2;
|
||||
DT_INT8 = 3;
|
||||
DT_UINT8 = 4;
|
||||
DT_INT16 = 5;
|
||||
DT_UINT16 = 6;
|
||||
DT_INT32 = 7;
|
||||
DT_INT64 = 8;
|
||||
DT_UINT32 = 9;
|
||||
DT_UINT64 = 10;
|
||||
DT_BOOL = 11;
|
||||
DT_DOUBLE = 12;
|
||||
DT_STRING = 13;
|
||||
DT_DUAL_SUB_INT8 = 14;
|
||||
DT_DUAL_SUB_UINT8 = 15;
|
||||
DT_COMPLEX64 = 16;
|
||||
DT_COMPLEX128 = 17;
|
||||
DT_QINT8 = 18;
|
||||
DT_QINT16 = 19;
|
||||
DT_QINT32 = 20;
|
||||
DT_QUINT8 = 21;
|
||||
DT_QUINT16 = 22;
|
||||
DT_RESOURCE = 23;
|
||||
DT_STRING_REF = 24;
|
||||
DT_DUAL = 25;
|
||||
}
|
||||
|
||||
enum OutputFormat {
|
||||
FORMAT_NCHW = 0;
|
||||
FORMAT_NHWC = 1;
|
||||
FORMAT_ND = 2;
|
||||
FORMAT_NC1HWC0 = 3;
|
||||
FORMAT_FRACTAL_Z = 4;
|
||||
FORMAT_NC1C0HWPAD = 5;
|
||||
FORMAT_NHWC1C0 = 6;
|
||||
FORMAT_FSR_NCHW = 7;
|
||||
FORMAT_FRACTAL_DECONV = 8;
|
||||
FORMAT_C1HWNC0 = 9;
|
||||
FORMAT_FRACTAL_DECONV_TRANSPOSE = 10;
|
||||
FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11;
|
||||
FORMAT_NC1HWC0_C04 = 12;
|
||||
FORMAT_FRACTAL_Z_C04 = 13;
|
||||
FORMAT_CHWN = 14;
|
||||
FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15;
|
||||
FORMAT_HWCN = 16;
|
||||
FORMAT_NC1KHKWHWC0 = 17;
|
||||
FORMAT_BN_WEIGHT = 18;
|
||||
FORMAT_FILTER_HWCK = 19;
|
||||
FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20;
|
||||
FORMAT_HASHTABLE_LOOKUP_KEYS = 21;
|
||||
FORMAT_HASHTABLE_LOOKUP_VALUE = 22;
|
||||
FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23;
|
||||
FORMAT_HASHTABLE_LOOKUP_HITS = 24;
|
||||
FORMAT_C1HWNCoC0 = 25;
|
||||
FORMAT_MD = 26;
|
||||
FORMAT_NDHWC = 27;
|
||||
FORMAT_FRACTAL_ZZ = 28;
|
||||
FORMAT_FRACTAL_NZ = 29;
|
||||
FORMAT_NCDHW = 30;
|
||||
FORMAT_DHWCN = 31; // 3D filter input tensor format
|
||||
FORMAT_NDC1HWC0 = 32;
|
||||
FORMAT_FRACTAL_Z_3D=33;
|
||||
FORMAT_CN = 34;
|
||||
FORMAT_NC = 35;
|
||||
FORMAT_DHWNC = 36;
|
||||
FORMAT_FRACTAL_Z_3D_TRANSPOSE = 37; // 3D filter(transpose) input tensor format
|
||||
FORMAT_FRACTAL_ZN_LSTM = 38;
|
||||
FORMAT_FRACTAL_Z_G = 39;
|
||||
FORMAT_RESERVED = 40;
|
||||
// Add new formats definition here
|
||||
FORMAT_MAX = 0xff;
|
||||
}
|
||||
|
||||
message OriginalOp {
|
||||
string name = 1;
|
||||
uint32 output_index = 2;
|
||||
OutputDataType data_type = 3;
|
||||
OutputFormat format = 4;
|
||||
}
|
||||
|
||||
message Shape {
|
||||
repeated uint64 dim = 1;
|
||||
}
|
||||
|
||||
message OpOutput {
|
||||
OutputDataType data_type = 1;
|
||||
OutputFormat format = 2;
|
||||
Shape shape = 3;
|
||||
OriginalOp original_op = 4; // the original op corresponding to the output
|
||||
bytes data = 5;
|
||||
uint64 size = 6;
|
||||
Shape original_shape = 7;
|
||||
int32 sub_format = 8;
|
||||
}
|
||||
|
||||
message OpInput {
|
||||
OutputDataType data_type = 1;
|
||||
OutputFormat format = 2;
|
||||
Shape shape = 3;
|
||||
bytes data = 4;
|
||||
uint64 size = 5;
|
||||
Shape original_shape = 6;
|
||||
int32 sub_format = 7;
|
||||
}
|
||||
|
||||
enum BufferType {
|
||||
L1 = 0;
|
||||
}
|
||||
|
||||
message OpBuffer {
|
||||
BufferType buffer_type = 1;
|
||||
bytes data = 2;
|
||||
uint64 size = 3;
|
||||
}
|
||||
|
||||
message DumpData {
|
||||
string version = 1;
|
||||
uint64 dump_time = 2;
|
||||
repeated OpOutput output = 3;
|
||||
repeated OpInput input = 4;
|
||||
repeated OpBuffer buffer = 5;
|
||||
string op_name = 6;
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "utils/log_adapter.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "proto/dump_data.pb.h"
|
||||
#include "toolchain/adx_datadump_callback.h"
|
||||
|
||||
using Adx::DumpChunk;
|
||||
#endif
|
||||
// This class is for building dump data receiving from adx server. Tensor Data for each kernel will be divided in pieces
|
||||
// and each piece would be wrapped into DumpChunk struct. This class provides function to merge dump chunks and
|
||||
// construct dump data object.
|
||||
class DumpDataBuilder {
|
||||
public:
|
||||
DumpDataBuilder() {}
|
||||
|
||||
~DumpDataBuilder() = default;
|
||||
|
||||
#ifdef ENABLE_D
|
||||
bool CopyDumpChunk(const DumpChunk *dump_chunk) {
|
||||
try {
|
||||
uint32_t buf_sz = dump_chunk->bufLen;
|
||||
std::string buffer_str(reinterpret_cast<const char *>(dump_chunk->dataBuf), buf_sz);
|
||||
chunk_list_.push_back(buffer_str);
|
||||
total_sz_ += buf_sz;
|
||||
} catch (std::bad_alloc &err) {
|
||||
MS_LOG(ERROR) << "Failed to allocate memory for " << dump_chunk->fileName << ", reason: " << err.what();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConstructDumpData(debugger::dump::DumpData *dump_data_proto, std::vector<char> *data_ptr) {
|
||||
if (chunk_list_.empty()) {
|
||||
return false;
|
||||
}
|
||||
// merge several chunks into one piece.
|
||||
std::string dump_proto_str;
|
||||
dump_proto_str.reserve(total_sz_);
|
||||
for (auto item : chunk_list_) {
|
||||
dump_proto_str += item;
|
||||
}
|
||||
chunk_list_.clear();
|
||||
|
||||
const int8_t header_len_offset = 8;
|
||||
uint64_t header_len = *reinterpret_cast<const uint64_t *>(dump_proto_str.c_str());
|
||||
std::string header = dump_proto_str.substr(header_len_offset, header_len);
|
||||
if (!(*dump_data_proto).ParseFromString(header)) {
|
||||
MS_LOG(ERROR) << "Failed to parse dump proto file.";
|
||||
return false;
|
||||
}
|
||||
auto data_sz = total_sz_ - header_len_offset - header_len;
|
||||
data_ptr->resize(data_sz);
|
||||
auto ret = memcpy_s(data_ptr->data(), data_sz, dump_proto_str.c_str() + header_len_offset + header_len, data_sz);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "Failed to get data from Adx";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
std::vector<std::string> chunk_list_;
|
||||
uint64_t total_sz_{0};
|
||||
};
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
|
|
@ -118,7 +118,10 @@ def generate_dump_json(dump_path, json_file_name, test_key):
|
|||
elif test_key == "test_Ascend_async_multi_root_graph_dump":
|
||||
data = async_dump_dict_3
|
||||
data["common_dump_settings"]["path"] = dump_path
|
||||
|
||||
elif test_key == "test_async_dump_file_format":
|
||||
data = async_dump_dict
|
||||
data["common_dump_settings"]["path"] = dump_path
|
||||
data["common_dump_settings"]["file_format"] = "npy"
|
||||
else:
|
||||
raise ValueError(
|
||||
"Failed to generate dump json file. The test name value " + test_key + " is invalid.")
|
||||
|
|
|
@ -51,17 +51,12 @@ x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
|
|||
y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_async_dump():
|
||||
def run_async_dump(test_name):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
|
||||
dump_path = os.path.join(tmp_dir, 'async_dump')
|
||||
dump_config_path = os.path.join(tmp_dir, 'async_dump.json')
|
||||
generate_dump_json(dump_path, dump_config_path, 'test_async_dump')
|
||||
generate_dump_json(dump_path, dump_config_path, test_name)
|
||||
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
|
||||
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
|
||||
if os.path.isdir(dump_path):
|
||||
|
@ -76,6 +71,35 @@ def test_async_dump():
|
|||
del os.environ['MINDSPORE_DUMP_CONFIG']
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_async_dump():
|
||||
"""
|
||||
Feature: async dump on Ascend
|
||||
Description: test async dump with default file_format value
|
||||
Expectation: dump data are generated as protobuf file format (suffix with timestamp)
|
||||
"""
|
||||
run_async_dump("test_async_dump")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="wait for run package updates in Dec 01")
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_async_dump_file_format():
|
||||
"""
|
||||
Feature: async dump on Ascend in npy format
|
||||
Description: test async dump with file_format is configured as npy
|
||||
Expectation: dump data are generated as npy file format
|
||||
"""
|
||||
run_async_dump("test_async_dump_file_format")
|
||||
|
||||
|
||||
def run_e2e_dump():
|
||||
if sys.platform != 'linux':
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue