!15928 unify dump path and support npy format

From: @zhangbuxue
Reviewed-by: @guoqi1024,@zhaizhiqiang
Signed-off-by: @zhaizhiqiang
This commit is contained in:
mindspore-ci-bot 2021-05-08 11:32:24 +08:00 committed by Gitee
commit c74e66e759
19 changed files with 350 additions and 68 deletions

View File

@ -37,7 +37,7 @@ class ReduceCPUKernel : public CPUKernel {
enum ReduceType { kReduceAll, kReduceAny, kReduceMax, kReduceMin, kReduceSum, kReduceMean }; enum ReduceType { kReduceAll, kReduceAny, kReduceMax, kReduceMin, kReduceSum, kReduceMean };
std::vector<size_t> input_shape_; std::vector<size_t> input_shape_;
std::vector<int64_t> axis_; std::vector<int64_t> axis_;
ReduceType reduce_type_; ReduceType reduce_type_{kReduceAll};
std::function<void(const T *, size_t, T *)> reduce_func_; std::function<void(const T *, size_t, T *)> reduce_func_;
}; };

View File

@ -12,6 +12,7 @@ set(_DEBUG_SRC_LIST
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_json_parser.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_json_parser.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/cpu_e2e_dump.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/cpu_e2e_dump.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_utils.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_utils.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/npy_header.cc"
) )
set(_OFFLINE_SRC_LIST set(_OFFLINE_SRC_LIST

View File

@ -19,7 +19,7 @@
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
namespace mindspore { namespace mindspore {
void CPUE2eDump::DumpCNodeData(const CNodePtr &node) { void CPUE2eDump::DumpCNodeData(const CNodePtr &node, uint32_t graph_id) {
MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(node);
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string kernel_name = node->fullname_with_scope(); std::string kernel_name = node->fullname_with_scope();
@ -29,7 +29,7 @@ void CPUE2eDump::DumpCNodeData(const CNodePtr &node) {
MS_LOG(DEBUG) << "E2e dump CNode data start: " << kernel_name << ", current iteration is " MS_LOG(DEBUG) << "E2e dump CNode data start: " << kernel_name << ", current iteration is "
<< dump_json_parser.cur_dump_iter(); << dump_json_parser.cur_dump_iter();
std::string dump_path = GenerateDumpPath(); std::string dump_path = GenerateDumpPath(graph_id);
if (dump_json_parser.InputNeedDump()) { if (dump_json_parser.InputNeedDump()) {
DumpCNodeInputs(node, dump_path); DumpCNodeInputs(node, dump_path);
} }
@ -129,12 +129,12 @@ void CPUE2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t outp
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type); DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type);
} }
void CPUE2eDump::DumpParametersAndConst(const session::KernelGraph *graph) { void CPUE2eDump::DumpParametersAndConst(const session::KernelGraph *graph, uint32_t graph_id) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "Start e2e dump parameters and Const values"; MS_LOG(INFO) << "Start e2e dump parameters and Const values";
std::map<std::string, size_t> const_map; std::map<std::string, size_t> const_map;
GetConstantId(graph, &const_map); GetConstantId(graph, &const_map);
const std::string &dump_path = GenerateDumpPath(); const std::string &dump_path = GenerateDumpPath(graph_id);
// dump parameters // dump parameters
const auto &parameters = graph->inputs(); const auto &parameters = graph->inputs();

View File

@ -29,9 +29,9 @@ class CPUE2eDump {
CPUE2eDump() = default; CPUE2eDump() = default;
~CPUE2eDump() = default; ~CPUE2eDump() = default;
// Dump data when task error. // Dump data when task error.
static void DumpParametersAndConst(const session::KernelGraph *graph); static void DumpParametersAndConst(const session::KernelGraph *graph, uint32_t graph_id);
static void DumpCNodeData(const CNodePtr &node); static void DumpCNodeData(const CNodePtr &node, uint32_t graph_id);
private: private:
static void DumpCNodeInputs(const CNodePtr &node, const std::string &dump_path); static void DumpCNodeInputs(const CNodePtr &node, const std::string &dump_path);

View File

@ -20,12 +20,14 @@
#include "utils/ms_context.h" #include "utils/ms_context.h"
#include "utils/convert_utils_base.h" #include "utils/convert_utils_base.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "debug/data_dump/npy_header.h"
namespace { namespace {
constexpr auto kCommonDumpSettings = "common_dump_settings"; constexpr auto kCommonDumpSettings = "common_dump_settings";
constexpr auto kAsyncDumpSettings = "async_dump_settings"; constexpr auto kAsyncDumpSettings = "async_dump_settings";
constexpr auto kE2eDumpSettings = "e2e_dump_settings"; constexpr auto kE2eDumpSettings = "e2e_dump_settings";
constexpr auto kDumpMode = "dump_mode"; constexpr auto kDumpMode = "dump_mode";
constexpr auto kDumpFormat = "dump_format";
constexpr auto kPath = "path"; constexpr auto kPath = "path";
constexpr auto kNetName = "net_name"; constexpr auto kNetName = "net_name";
constexpr auto kIteration = "iteration"; constexpr auto kIteration = "iteration";
@ -42,6 +44,8 @@ constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG";
} // namespace } // namespace
namespace mindspore { namespace mindspore {
uint32_t DumpJsonParser::dump_format_ = 0;
auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) { auto DumpJsonParser::CheckJsonKeyExist(const nlohmann::json &content, const std::string &key) {
auto iter = content.find(key); auto iter = content.find(key);
if (iter == content.end()) { if (iter == content.end()) {
@ -137,13 +141,15 @@ bool DumpJsonParser::GetIterDumpFlag() {
return e2e_dump_enabled_ && (iteration_ == 0 || cur_dump_iter_ == iteration_); return e2e_dump_enabled_ && (iteration_ == 0 || cur_dump_iter_ == iteration_);
} }
bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len) { bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len, const ShapeVector &shape,
TypeId type) {
if (filename.empty() || data == nullptr || len == 0) { if (filename.empty() || data == nullptr || len == 0) {
MS_LOG(ERROR) << "Incorrect parameter."; MS_LOG(ERROR) << "Incorrect parameter.";
return false; return false;
} }
auto realpath = Common::GetRealPath(filename); std::string file_format = dump_format_ == 1 ? ".npy" : ".bin";
auto realpath = Common::GetRealPath(filename + file_format);
if (!realpath.has_value()) { if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed."; MS_LOG(ERROR) << "Get real path failed.";
return false; return false;
@ -154,6 +160,10 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
return false; return false;
} }
if (dump_format_ == 1) {
std::string npy_header = GenerateNpyHeader(shape, type);
fd << npy_header;
}
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
fd.close(); fd.close();
return true; return true;
@ -176,6 +186,7 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) {
ParseInputOutput(*input_output); ParseInputOutput(*input_output);
ParseKernels(*kernels); ParseKernels(*kernels);
ParseSupportDevice(*support_device); ParseSupportDevice(*support_device);
ParseDumpFormat(*common_dump_settings);
} }
void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) { void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) {
@ -209,19 +220,19 @@ void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) {
void CheckJsonUnsignedType(const nlohmann::json &content, const std::string &key) { void CheckJsonUnsignedType(const nlohmann::json &content, const std::string &key) {
if (!content.is_number_unsigned()) { if (!content.is_number_unsigned()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be unsigned int type"; MS_LOG(EXCEPTION) << "Dump config parse failed, " << key << " should be unsigned int type";
} }
} }
void CheckJsonStringType(const nlohmann::json &content, const std::string &key) { void CheckJsonStringType(const nlohmann::json &content, const std::string &key) {
if (!content.is_string()) { if (!content.is_string()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be string type"; MS_LOG(EXCEPTION) << "Dump config parse failed, " << key << " should be string type";
} }
} }
void CheckJsonArrayType(const nlohmann::json &content, const std::string &key) { void CheckJsonArrayType(const nlohmann::json &content, const std::string &key) {
if (!content.is_array()) { if (!content.is_array()) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed." << key << " should be array type"; MS_LOG(EXCEPTION) << "Dump config parse failed, " << key << " should be array type";
} }
} }
@ -229,7 +240,18 @@ void DumpJsonParser::ParseDumpMode(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kDumpMode); CheckJsonUnsignedType(content, kDumpMode);
dump_mode_ = content; dump_mode_ = content;
if (dump_mode_ != 0 && dump_mode_ != 1) { if (dump_mode_ != 0 && dump_mode_ != 1) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. dump_mode should be 0 or 1"; MS_LOG(EXCEPTION) << "Dump config parse failed, dump_mode should be 0 or 1, but got " << dump_format_;
}
}
void DumpJsonParser::ParseDumpFormat(const nlohmann::json &content) {
auto iter = content.find(kDumpFormat);
if (iter == content.end()) {
return;
}
dump_format_ = *iter;
if (dump_format_ != 0 && dump_format_ != 1) {
MS_LOG(EXCEPTION) << "Dump config parse failed, dump_format should be 0(.bin) or 1(.npy), but got " << dump_format_;
} }
} }

View File

@ -33,7 +33,8 @@ class DumpJsonParser {
} }
void Parse(); void Parse();
static bool DumpToFile(const std::string &filename, const void *data, size_t len); static bool DumpToFile(const std::string &filename, const void *data, size_t len, const ShapeVector &shape,
TypeId type);
void CopyJsonToDir(); void CopyJsonToDir();
bool NeedDump(const std::string &op_full_name) const; bool NeedDump(const std::string &op_full_name) const;
void MatchKernel(const std::string &kernel_name); void MatchKernel(const std::string &kernel_name);
@ -62,6 +63,7 @@ class DumpJsonParser {
DISABLE_COPY_AND_ASSIGN(DumpJsonParser) DISABLE_COPY_AND_ASSIGN(DumpJsonParser)
std::mutex lock_; std::mutex lock_;
static uint32_t dump_format_;
bool async_dump_enabled_{false}; bool async_dump_enabled_{false};
bool e2e_dump_enabled_{false}; bool e2e_dump_enabled_{false};
uint32_t dump_mode_{0}; uint32_t dump_mode_{0};
@ -84,6 +86,7 @@ class DumpJsonParser {
auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key); auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key);
void ParseDumpMode(const nlohmann::json &content); void ParseDumpMode(const nlohmann::json &content);
void ParseDumpFormat(const nlohmann::json &content);
void ParseDumpPath(const nlohmann::json &content); void ParseDumpPath(const nlohmann::json &content);
void ParseNetName(const nlohmann::json &content); void ParseNetName(const nlohmann::json &content);
void ParseIteration(const nlohmann::json &content); void ParseIteration(const nlohmann::json &content);

View File

@ -34,7 +34,7 @@ uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
return kernel_runtime->device_id(); return kernel_runtime->device_id();
} }
std::string GenerateDumpPath(const uint32_t *device_id) { std::string GenerateDumpPath(uint32_t graph_id, const uint32_t *device_id) {
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string net_name = dump_json_parser.net_name(); std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter()); std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
@ -42,12 +42,9 @@ std::string GenerateDumpPath(const uint32_t *device_id) {
if (dump_path.back() != '/') { if (dump_path.back() != '/') {
dump_path += "/"; dump_path += "/";
} }
if (device_id == nullptr) { uint32_t physical_device = device_id == nullptr ? 0 : ConvertPhysicalDeviceId(*device_id);
dump_path += (net_name + "/iteration_" + iterator); dump_path += ("rank_" + std::to_string(physical_device) + "/" + net_name + "/graph_" + std::to_string(graph_id) +
} else { "/iteration_" + iterator);
auto physical_device = ConvertPhysicalDeviceId(*device_id);
dump_path += (net_name + "/device_" + std::to_string(physical_device) + "/iteration_" + iterator);
}
return dump_path; return dump_path;
} }

View File

@ -27,7 +27,7 @@ namespace mindspore {
static const size_t PARAMETER_OUTPUT_INDEX = 0; static const size_t PARAMETER_OUTPUT_INDEX = 0;
static const size_t VALUE_NODE_OUTPUT_INDEX = 0; static const size_t VALUE_NODE_OUTPUT_INDEX = 0;
std::string GenerateDumpPath(const uint32_t *device_id = nullptr); std::string GenerateDumpPath(uint32_t graph_id, const uint32_t *device_id = nullptr);
void GetFileKernelName(NotNull<std::string *> kernel_name); void GetFileKernelName(NotNull<std::string *> kernel_name);

View File

@ -223,18 +223,19 @@ void E2eDump::DumpParametersAndConst(const session::KernelGraph *graph, const st
bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t device_id, const Debugger *debugger) { bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t device_id, const Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
uint32_t graph_id = graph->graph_id();
if (starting_graph_id == INT32_MAX) { if (starting_graph_id == INT32_MAX) {
starting_graph_id = graph->graph_id(); starting_graph_id = graph_id;
} }
if (starting_graph_id == graph->graph_id()) { if (starting_graph_id == graph_id) {
dump_json_parser.UpdateDumpIter(); dump_json_parser.UpdateDumpIter();
} }
if (!dump_json_parser.GetIterDumpFlag()) { if (!dump_json_parser.GetIterDumpFlag()) {
return true; return true;
} }
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter(); MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
MS_LOG(INFO) << "Current graph id is " << graph->graph_id(); MS_LOG(INFO) << "Current graph id is " << graph_id;
std::string dump_path = GenerateDumpPath(&device_id); std::string dump_path = GenerateDumpPath(graph_id, &device_id);
DumpInput(graph, dump_path, debugger); DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger); DumpOutput(graph, dump_path, debugger);

View File

@ -0,0 +1,145 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump/npy_header.h"
#include <unordered_map>
#include <utility>
#include <sstream>
#include <cmath>
#include "mindspore/core/utils/log_adapter.h"
#include "mindspore/core/ir/dtype.h"
namespace mindspore {
namespace {
// npy file header start information
const char kMagicPrefix[] = "\x93NUMPY";
// magical length include kMagicPrefix length and version length
const size_t kMagicLen = 8;
const size_t kArrayAlign = 64;
// first: header_length_type, second: encoding_type
// header_length_type: 1 represents 2 bytes; 2 and 3 represents 4 bytes
// encoding_type: 1 and 2 represents 'latin1'; 3 represents 'utf8'
using version_type = std::pair<int, int>;
// data type description
// byteorder char: '<' is little endian; '>' is big endian; '|' is ignore(no change to byte order)
// type char: 'b' represents bool; 'u' represents uint; 'i' represents int; 'f' represents float
struct DtypeDescr {
char byteorder;
char type;
size_t length;
std::string str() const;
};
// npy file header description, includes data type description, fortran_order and array shape
// fortran_order: true represents the array data Fortran-contiguous; false represents the array data C-contiguity
struct NpyHeader {
public:
DtypeDescr dtype_descr;
bool fortran_order;
ShapeVector shape;
std::string str() const;
private:
std::string fortran_order_to_str() const;
std::string shape_to_str() const;
};
std::string DtypeDescr::str() const {
std::ostringstream buffer;
buffer << "\'" << byteorder << type << length << "\'";
return buffer.str();
}
std::string NpyHeader::str() const {
const std::string first_field = "'descr': ";
const std::string second_field = "'fortran_order': ";
const std::string third_field = "'shape': ";
std::ostringstream buffer;
buffer << "{" << first_field << dtype_descr.str() << ", " << second_field << fortran_order_to_str() << ", "
<< third_field << shape_to_str() << ", }";
return buffer.str();
}
std::string NpyHeader::fortran_order_to_str() const { return fortran_order ? "True" : "False"; }
std::string NpyHeader::shape_to_str() const {
std::ostringstream buffer;
buffer << "(";
for (const auto i : shape) {
buffer << std::to_string(i) << ",";
}
buffer << ")";
return buffer.str();
}
} // namespace
void int_to_byte(size_t number, char *byte, size_t length) {
for (size_t i = 0; i < length; i++) {
byte[i] = (number >> (i * 8)) & 0xff;
}
}
std::string GenerateNpyHeader(const ShapeVector &shape, TypeId type_id, bool fortran_order) {
static std::unordered_map<TypeId, DtypeDescr> type_desc_map = {
{kNumberTypeBool, DtypeDescr{'|', 'b', 1}}, {kNumberTypeInt8, DtypeDescr{'|', 'i', 1}},
{kNumberTypeInt16, DtypeDescr{'<', 'i', 2}}, {kNumberTypeInt32, DtypeDescr{'<', 'i', 4}},
{kNumberTypeInt64, DtypeDescr{'<', 'i', 8}}, {kNumberTypeUInt8, DtypeDescr{'|', 'u', 1}},
{kNumberTypeUInt16, DtypeDescr{'<', 'u', 2}}, {kNumberTypeUInt32, DtypeDescr{'<', 'u', 4}},
{kNumberTypeUInt64, DtypeDescr{'<', 'u', 8}}, {kNumberTypeFloat16, DtypeDescr{'<', 'f', 2}},
{kNumberTypeFloat32, DtypeDescr{'<', 'f', 4}}, {kNumberTypeFloat64, DtypeDescr{'<', 'f', 8}},
};
auto type_desc = type_desc_map.find(type_id);
if (type_desc == type_desc_map.end()) {
MS_LOG(EXCEPTION) << "Not support dump the " << TypeIdToType(type_id)->ToString() << " data to npy file.";
}
NpyHeader npy_header{type_desc->second, fortran_order, shape};
std::string header_str = npy_header.str();
size_t header_len = header_str.length();
version_type version{1, 0};
size_t total_len = kMagicLen + 2 + header_len + 1;
if (total_len > std::pow(2, 16)) {
version = {2, 0};
total_len = kMagicLen + 4 + header_len + 1;
}
std::ostringstream out;
out << kMagicPrefix;
out.put(version.first);
out.put(version.second);
size_t pad_len = kArrayAlign - total_len % kArrayAlign;
size_t padding_header_len = header_len + pad_len + 1;
if (version == version_type{1, 0}) {
char length_byte[2];
int_to_byte(padding_header_len, length_byte, 2);
out.write(length_byte, 2);
} else {
char length_byte[4];
int_to_byte(padding_header_len, length_byte, 4);
out.write(length_byte, 4);
}
std::string padding(pad_len, ' ');
out << header_str << padding << "\n";
return out.str();
}
} // namespace mindspore

View File

@ -0,0 +1,28 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_NPY_HEADER_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_NPY_HEADER_H_
#include <string>
#include "mindspore/core/utils/shape_utils.h"
#include "mindspore/core/ir/dtype/type_id.h"
namespace mindspore {
std::string GenerateNpyHeader(const ShapeVector &shape, TypeId type_id, bool fortran_order = false);
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_NPY_HEADER_H_

View File

@ -172,12 +172,11 @@ class TensorLoader {
} else { } else {
shape = shape + "_0"; shape = shape + "_0";
} }
std::string file_extension = ".bin";
std::string path = ""; std::string path = "";
if (trans_flag) { if (trans_flag) {
path = filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt + file_extension; path = filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt;
} else { } else {
path = filepath + '_' + shape + '_' + TypeIdToType(device_type)->ToString() + '_' + addr_format + file_extension; path = filepath + '_' + shape + '_' + TypeIdToType(device_type)->ToString() + '_' + addr_format;
} }
MS_LOG(INFO) << "Dump path is " << path; MS_LOG(INFO) << "Dump path is " << path;
@ -188,7 +187,7 @@ class TensorLoader {
std::shared_ptr<TensorData> node = iter->second; std::shared_ptr<TensorData> node = iter->second;
size_t host_size = node->GetByteSize(); size_t host_size = node->GetByteSize();
return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size); return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size, host_shape, host_type);
} }
MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map"; MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map";
return true; return true;

View File

@ -664,10 +664,8 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
} else { } else {
shape = shape + "_0"; shape = shape + "_0";
} }
std::string file_extension = ".bin";
if (trans_flag) { if (trans_flag) {
std::string path = std::string path = filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt;
filepath + '_' + shape + '_' + TypeIdToType(host_type)->ToString() + '_' + host_fmt + file_extension;
MS_LOG(INFO) << "E2E Dump path is " << path; MS_LOG(INFO) << "E2E Dump path is " << path;
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape); mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
size_t host_size = out_tensor->data().nbytes(); size_t host_size = out_tensor->data().nbytes();
@ -676,17 +674,16 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
MS_LOG(ERROR) << "Copy device mem to host failed"; MS_LOG(ERROR) << "Copy device mem to host failed";
return ret; return ret;
} }
ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size); ret = DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size, host_shape, host_type);
} else { } else {
auto host_tmp = std::vector<uint8_t>(size_); auto host_tmp = std::vector<uint8_t>(size_);
auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); auto ret_rt_memcpy = rtMemcpy(host_tmp.data(), size_, ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
if (ret_rt_memcpy != RT_ERROR_NONE) { if (ret_rt_memcpy != RT_ERROR_NONE) {
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]"; MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
} }
std::string path = std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_;
filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension;
MS_LOG(INFO) << "E2E Dump path is " << path; MS_LOG(INFO) << "E2E Dump path is " << path;
ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_); ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_, host_shape_, type_id_);
} }
return ret; return ret;

View File

@ -37,10 +37,9 @@ bool CPUDeviceAddress::DumpMemToFile(const std::string &filepath, const std::str
shape += '_' + std::to_string(value); shape += '_' + std::to_string(value);
} }
} }
std::string file_extension = ".bin"; std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_;
std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension;
MS_LOG(DEBUG) << "E2E Dump path is " << path; MS_LOG(DEBUG) << "E2E Dump path is " << path;
ret = DumpJsonParser::DumpToFile(path, ptr_, size_); ret = DumpJsonParser::DumpToFile(path, ptr_, size_, host_shape, host_type);
return ret; return ret;
} }

View File

@ -380,6 +380,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
dump_json_parser.UpdateDumpIter(); dump_json_parser.UpdateDumpIter();
bool iter_dump_flag = dump_json_parser.GetIterDumpFlag(); bool iter_dump_flag = dump_json_parser.GetIterDumpFlag();
uint32_t graph_id = kernel_graph->graph_id();
for (const auto &kernel : kernels) { for (const auto &kernel : kernels) {
#ifdef ENABLE_PROFILE #ifdef ENABLE_PROFILE
@ -421,7 +422,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel); MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel);
} }
if (iter_dump_flag) { if (iter_dump_flag) {
CPUE2eDump::DumpCNodeData(kernel); CPUE2eDump::DumpCNodeData(kernel, graph_id);
} }
if (profiler_inst->GetEnableFlag()) { if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerEnd(); profiler_inst->OpDataProducerEnd();
@ -439,7 +440,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
#endif #endif
} }
if (iter_dump_flag) { if (iter_dump_flag) {
CPUE2eDump::DumpParametersAndConst(kernel_graph); CPUE2eDump::DumpParametersAndConst(kernel_graph, graph_id);
} }
return true; return true;
} }

View File

@ -0,0 +1,16 @@
{
"common_dump_settings": {
"dump_mode": 0,
"path": "/test",
"net_name": "Net",
"iteration": 0,
"input_output": 0,
"kernels": ["Default/Conv-op12"],
"support_device": [0,1,2,3,4,5,6,7],
"dump_format": 1
},
"e2e_dump_settings": {
"enable": true,
"trans_flag": false
}
}

View File

@ -14,8 +14,10 @@
# ============================================================================ # ============================================================================
import os import os
import json import json
import sys
import time import time
import shutil import shutil
import numpy as np import numpy as np
import pytest import pytest
import mindspore.context as context import mindspore.context as context
@ -29,7 +31,6 @@ from mindspore.nn import Momentum
from mindspore.nn import TrainOneStepCell from mindspore.nn import TrainOneStepCell
from mindspore.nn import WithLossCell from mindspore.nn import WithLossCell
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
class Net(nn.Cell): class Net(nn.Cell):
def __init__(self): def __init__(self):
@ -39,8 +40,10 @@ class Net(nn.Cell):
def construct(self, x_, y_): def construct(self, x_, y_):
return self.add(x_, y_) return self.add(x_, y_)
x = np.random.randn(1, 3, 3, 4).astype(np.float32)
y = np.random.randn(1, 3, 3, 4).astype(np.float32) x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
def change_current_dump_json(file_name, dump_path): def change_current_dump_json(file_name, dump_path):
with open(file_name, 'r+') as f: with open(file_name, 'r+') as f:
@ -50,6 +53,7 @@ def change_current_dump_json(file_name, dump_path):
with open(file_name, 'w') as f: with open(file_name, 'w') as f:
json.dump(data, f) json.dump(data, f)
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training
@ -61,7 +65,7 @@ def test_async_dump():
change_current_dump_json('async_dump.json', dump_path) change_current_dump_json('async_dump.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/async_dump.json" os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/async_dump.json"
device_id = context.get_context("device_id") device_id = context.get_context("device_id")
dump_file_path = pwd + '/async_dump/device_{}/Net_graph_0/0/0/'.format(device_id) dump_file_path = dump_path + '/device_{}/Net_graph_0/0/0/'.format(device_id)
if os.path.isdir(dump_path): if os.path.isdir(dump_path):
shutil.rmtree(dump_path) shutil.rmtree(dump_path)
add = Net() add = Net()
@ -69,24 +73,90 @@ def test_async_dump():
time.sleep(5) time.sleep(5)
assert len(os.listdir(dump_file_path)) == 1 assert len(os.listdir(dump_file_path)) == 1
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training def run_e2e_dump_bin():
@pytest.mark.platform_x86_ascend_training if sys.platform != 'linux':
@pytest.mark.env_onecard return
def test_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
pwd = os.getcwd() pwd = os.getcwd()
dump_path = pwd + "/e2e_dump" dump_path = pwd + '/e2e_dump'
change_current_dump_json('e2e_dump.json', dump_path) change_current_dump_json('e2e_dump_bin.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/e2e_dump.json" os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump_bin.json'
device_id = context.get_context("device_id") device_id = context.get_context("device_id")
dump_file_path = pwd + '/e2e_dump/Net/device_{}/iteration_1/'.format(device_id) dump_file_path = dump_path + '/rank_{}/Net/graph_0/iteration_1/'.format(device_id)
if os.path.isdir(dump_path): if os.path.isdir(dump_path):
shutil.rmtree(dump_path) shutil.rmtree(dump_path)
add = Net() add = Net()
add(Tensor(x), Tensor(y)) add(Tensor(x), Tensor(y))
time.sleep(5) if context.get_context("device_target") == "Ascend":
assert len(os.listdir(dump_file_path)) == 5 output_name = "Default--Add-op1_output_0_shape_2_3_Float32_DefaultFormat.bin"
else:
output_name = "Default--Add-op3_output_0_shape_2_3_Float32_DefaultFormat.bin"
output_path = dump_file_path + output_name
real_path = os.path.realpath(output_path)
output = np.fromfile(real_path, dtype=np.float32)
expect = np.array([8, 10, 12, 14, 16, 18], np.float32)
assert output.dtype == expect.dtype
assert np.array_equal(output, expect)
def run_e2e_dump_npy():
if sys.platform != 'linux':
return
pwd = os.getcwd()
dump_path = pwd + '/e2e_dump'
change_current_dump_json('e2e_dump_npy.json', dump_path)
os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump_npy.json'
device_id = context.get_context("device_id")
dump_file_path = dump_path + '/rank_{}/Net/graph_0/iteration_1/'.format(device_id)
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
add = Net()
add(Tensor(x), Tensor(y))
if context.get_context("device_target") == "Ascend":
output_name = "Default--Add-op1_output_0_shape_2_3_Float32_DefaultFormat.npy"
else:
output_name = "Default--Add-op3_output_0_shape_2_3_Float32_DefaultFormat.npy"
output_path = dump_file_path + output_name
real_path = os.path.realpath(output_path)
output = np.load(real_path)
expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
assert output.dtype == expect.dtype
assert np.array_equal(output, expect)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_e2e_dump_bin():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
run_e2e_dump_bin()
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_e2e_dump_npy():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
run_e2e_dump_npy()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cpu_e2e_dump_bin():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
run_e2e_dump_bin()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cpu_e2e_dump_npy():
context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="CPU")
run_e2e_dump_npy()
class ReluReduceMeanDenseRelu(Cell): class ReluReduceMeanDenseRelu(Cell):
def __init__(self, kernel, bias, in_channel, num_class): def __init__(self, kernel, bias, in_channel, num_class):
@ -116,11 +186,13 @@ def search_path(path, keyword):
search_path(each_path, keyword) search_path(each_path, keyword)
return None return None
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_async_dump_net_multi_layer_mode1(): def test_async_dump_net_multi_layer_mode1():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
test_name = "test_async_dump_net_multi_layer_mode1" test_name = "test_async_dump_net_multi_layer_mode1"
json_file = os.path.join(os.getcwd(), "{}.json".format(test_name)) json_file = os.path.join(os.getcwd(), "{}.json".format(test_name))
device_id = context.get_context("device_id") device_id = context.get_context("device_id")
@ -131,7 +203,8 @@ def test_async_dump_net_multi_layer_mode1():
bias = Tensor(np.ones((1000,)).astype(np.float32)) bias = Tensor(np.ones((1000,)).astype(np.float32))
net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000) net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000)
criterion = SoftmaxCrossEntropyWithLogits(sparse=False) criterion = SoftmaxCrossEntropyWithLogits(sparse=False)
optimizer = Momentum(learning_rate=0.1, momentum=0.1, params=filter(lambda x: x.requires_grad, net.get_parameters())) optimizer = Momentum(learning_rate=0.1, momentum=0.1,
params=filter(lambda x: x.requires_grad, net.get_parameters()))
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network = TrainOneStepCell(net_with_criterion, optimizer)
train_network.set_train() train_network.set_train()

View File

@ -37,11 +37,11 @@ TEST_F(TestMemoryDumper, test_DumpToFileAbsPath) {
} }
int ret; int ret;
char filename[] = "/tmp/dumpToFileTestFile"; const std::string filename = "/tmp/dumpToFileTestFile";
ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {10, 100}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open((filename + ".bin").c_str(), O_RDONLY);
int readBack[1000] = {0}; int readBack[1000] = {0};
int readSize = read(fd, readBack, len * sizeof(int)); int readSize = read(fd, readBack, len * sizeof(int));
(void)close(fd); (void)close(fd);
@ -69,11 +69,11 @@ TEST_F(TestMemoryDumper, test_DumpToFileRelativePath) {
} }
int ret; int ret;
char filename[] = "../../dumpToFileTestFile"; const std::string filename = "../../dumpToFileTestFile";
ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector{100, 10}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open((filename + ".bin").c_str(), O_RDONLY);
int readBack[1000] = {0}; int readBack[1000] = {0};
int readSize = read(fd, readBack, len * sizeof(int)); int readSize = read(fd, readBack, len * sizeof(int));
(void)close(fd); (void)close(fd);
@ -101,11 +101,11 @@ TEST_F(TestMemoryDumper, test_DumpToFileNotExistDir) {
data[i] = i % 10; data[i] = i % 10;
} }
char filename[] = "./tmp/dumpToFileTestFile"; const std::string filename = "./tmp/dumpToFileTestFile";
int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int)); int ret = DumpJsonParser::DumpToFile(filename, data, len * sizeof(int), ShapeVector {1,}, kNumberTypeInt32);
ASSERT_EQ(ret, true); ASSERT_EQ(ret, true);
int fd = open(filename, O_RDONLY); int fd = open((filename + ".bin").c_str(), O_RDONLY);
int readBack[1000] = {0}; int readBack[1000] = {0};
int readSize = read(fd, readBack, len * sizeof(int)); int readSize = read(fd, readBack, len * sizeof(int));
(void)close(fd); (void)close(fd);