!14667 add dump data function for CPU

From: @zhangbuxue
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-04-08 20:37:30 +08:00 committed by Gitee
commit 33edd67261
21 changed files with 487 additions and 172 deletions

View File

@ -57,7 +57,7 @@
#include "backend/optimizer/graph_kernel/shape_ops_splitter.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "backend/session/ascend_auto_monad.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/data_dump/e2e_dump.h"
#include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h"
#ifdef ENABLE_DEBUGGER
@ -991,7 +991,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "Start!";
MS_EXCEPTION_IF_NULL(kernel_graph);
E2eDumpUtil::DumpData(kernel_graph.get(), device_id_);
E2eDump::DumpData(kernel_graph.get(), device_id_);
MS_LOG(INFO) << "Finish!";
}

View File

@ -32,6 +32,7 @@
#include "backend/optimizer/pass/erase_visit_attr.h"
#include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h"
#include "debug/data_dump/dump_json_parser.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/util.h"
#include "ps/ps_context.h"
@ -39,6 +40,12 @@
namespace mindspore {
namespace session {
void CPUSession::Init(uint32_t device_id) {
// Dump json config file if dump is enabled
DumpJsonParser::GetInstance().CopyJsonToDir();
InitExecutor(kCPUDevice, device_id);
}
ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf, KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(anf);
MS_EXCEPTION_IF_NULL(graph);

View File

@ -29,7 +29,7 @@ class CPUSession : public SessionBasic {
public:
CPUSession() = default;
~CPUSession() override = default;
void Init(uint32_t device_id) override { InitExecutor(kCPUDevice, device_id); }
void Init(uint32_t device_id) override;
protected:
void UnifyMindIR(const KernelGraphPtr &graph) override { return; }

View File

@ -49,7 +49,7 @@
#include "backend/optimizer/pass/getitem_tuple.h"
#include "common/trans.h"
#include "debug/anf_ir_dump.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/data_dump/e2e_dump.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/proto_exporter.h"
#else
@ -511,7 +511,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
if (debugger_->DebuggerBackendEnabled()) {
MS_EXCEPTION_IF_NULL(kernel_graph);
E2eDumpUtil::DumpData(kernel_graph.get(), device_id_, debugger_.get());
E2eDump::DumpData(kernel_graph.get(), device_id_, debugger_.get());
} else {
DumpJsonParser::GetInstance().UpdateDumpIter();
}

View File

@ -9,6 +9,9 @@ set(_DEBUG_SRC_LIST
"${CMAKE_CURRENT_SOURCE_DIR}/trace.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/env_config_parser.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_json_parser.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/cpu_e2e_dump.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_utils.cc"
)
set(_OFFLINE_SRC_LIST
@ -42,8 +45,7 @@ if(ENABLE_DEBUGGER)
endif()
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
list(APPEND _DEBUG_SRC_LIST "data_dump/dump_json_parser.cc")
list(APPEND _DEBUG_SRC_LIST "data_dump/e2e_dump_util.cc")
list(APPEND _DEBUG_SRC_LIST "data_dump/e2e_dump.cc")
endif()
set_property(SOURCE ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST} PROPERTY COMPILE_DEFINITIONS

View File

@ -0,0 +1,151 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump/cpu_e2e_dump.h"
#include <map>
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
void CPUE2eDump::DumpCNodeData(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string kernel_name = node->fullname_with_scope();
if (!dump_json_parser.NeedDump(kernel_name)) {
return;
}
MS_LOG(DEBUG) << "E2e dump CNode data start: " << kernel_name << ", current iteration is "
<< dump_json_parser.cur_dump_iter();
std::string dump_path = GenerateDumpPath();
if (dump_json_parser.InputNeedDump()) {
DumpCNodeInputs(node, dump_path);
}
if (dump_json_parser.OutputNeedDump()) {
DumpCNodeOutputs(node, dump_path);
}
}
void CPUE2eDump::DumpCNodeInputs(const CNodePtr &node, const std::string &dump_path) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
MS_LOG(DEBUG) << "Start e2e dump CNode inputs data: " << kernel_name;
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
DumpInputImpl(node, dump_path, &kernel_name);
}
void CPUE2eDump::DumpCNodeOutputs(const CNodePtr &node, const std::string &dump_path) {
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = node->fullname_with_scope();
MS_LOG(DEBUG) << "Start e2e dump CNode outputs data: " << kernel_name;
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
DumpOutputImpl(node, dump_path, &kernel_name);
}
void CPUE2eDump::DumpInputImpl(const CNodePtr &node, const std::string &dump_path, std::string *kernel_name) {
MS_EXCEPTION_IF_NULL(node);
GetFileKernelName(NOT_NULL(kernel_name));
auto input_size = AnfAlgo::GetInputTensorNum(node);
for (size_t j = 0; j < input_size; ++j) {
auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(node, j);
auto input = kernel_with_index.first;
auto index = kernel_with_index.second;
if (!AnfAlgo::OutputAddrExist(input, index)) {
continue;
}
auto addr = AnfAlgo::GetOutputAddr(input, index);
std::string tensor_name = node->fullname_with_scope();
ShapeVector int_shapes;
GetDumpIntShape(input, index, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(input, index);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "input_" + std::to_string(j);
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type);
}
}
void CPUE2eDump::DumpOutputImpl(const CNodePtr &node, const std::string &dump_path, std::string *kernel_name) {
MS_EXCEPTION_IF_NULL(node);
GetFileKernelName(NOT_NULL(kernel_name));
auto output_size = AnfAlgo::GetOutputTensorNum(node);
for (size_t j = 0; j < output_size; ++j) {
if (!AnfAlgo::OutputAddrExist(node, j)) {
continue;
}
auto addr = AnfAlgo::GetOutputAddr(node, j);
ShapeVector int_shapes;
GetDumpIntShape(node, j, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(node, j);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "output_" + std::to_string(j);
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type);
}
}
void CPUE2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
std::map<std::string, size_t> *const_map) {
MS_EXCEPTION_IF_NULL(anf_node);
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
return;
}
std::string node_name = anf_node->fullname_with_scope();
std::string dump_name = node_name;
if (anf_node->isa<ValueNode>()) {
auto iter = const_map->find(node_name);
if (iter == const_map->end()) {
return;
}
dump_name = std::string("cst") + std::to_string(iter->second);
}
if (!dump_json_parser.NeedDump(node_name)) {
return;
}
DumpJsonParser::GetInstance().MatchKernel(node_name);
GetFileKernelName(NOT_NULL(&node_name));
// check if output address exists, if not, return;
if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
return;
}
auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
MS_EXCEPTION_IF_NULL(addr);
ShapeVector int_shapes;
GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes));
auto type = AnfAlgo::GetOutputInferDataType(anf_node, output_index);
std::string file_path = dump_path + '/' + dump_name + '_' + "output_0";
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type);
}
void CPUE2eDump::DumpParametersAndConst(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "Start e2e dump parameters and Const values";
std::map<std::string, size_t> const_map;
GetConstantId(graph, &const_map);
const std::string &dump_path = GenerateDumpPath();
// dump parameters
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
DumpSingleAnfNode(item, PARAMETER_OUTPUT_INDEX, dump_path, &const_map);
}
// dump const values
auto value_nodes = graph->graph_value_nodes();
for (const auto &value_node : value_nodes) {
DumpSingleAnfNode(value_node, VALUE_NODE_OUTPUT_INDEX, dump_path, &const_map);
}
}
} // namespace mindspore

View File

@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_CPU_E_2_E_DUMP_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_CPU_E_2_E_DUMP_H_
#include <map>
#include <string>
#include "debug/data_dump/dump_json_parser.h"
#include "debug/data_dump/dump_utils.h"
namespace mindspore {
class CPUE2eDump {
public:
CPUE2eDump() = default;
~CPUE2eDump() = default;
// Dump data when task error.
static void DumpParametersAndConst(const session::KernelGraph *graph);
static void DumpCNodeData(const CNodePtr &node);
private:
static void DumpCNodeInputs(const CNodePtr &node, const std::string &dump_path);
static void DumpCNodeOutputs(const CNodePtr &node, const std::string &dump_path);
static void DumpSingleAnfNode(const AnfNodePtr &anf_node, size_t output_index, const std::string &dump_path,
std::map<std::string, size_t> *const_map);
static void DumpInputImpl(const CNodePtr &node, const std::string &dump_path, std::string *kernel_name);
static void DumpOutputImpl(const CNodePtr &node, const std::string &dump_path, std::string *kernel_name);
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_CPU_E_2_E_DUMP_H_

View File

@ -133,6 +133,9 @@ void DumpJsonParser::CopyJsonToDir() {
ChangeFileMode(realpath.value(), S_IRUSR);
}
}
bool DumpJsonParser::GetIterDumpFlag() {
return e2e_dump_enabled_ && (iteration_ == 0 || cur_dump_iter_ == iteration_);
}
bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len) {
if (filename.empty() || data == nullptr || len == 0) {

View File

@ -50,6 +50,7 @@ class DumpJsonParser {
bool trans_flag() const { return trans_flag_; }
uint32_t cur_dump_iter() const { return cur_dump_iter_; }
void UpdateDumpIter() { ++cur_dump_iter_; }
bool GetIterDumpFlag();
bool InputNeedDump() const;
bool OutputNeedDump() const;
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;

View File

@ -0,0 +1,131 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump/dump_utils.h"
#include <map>
#include <vector>
#include <algorithm>
#include "common/trans.h"
#include "utils/ms_context.h"
#include "debug/data_dump/dump_json_parser.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime_manager.h"
namespace mindspore {
uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto device_target = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
auto kernel_runtime = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(device_target, device_id);
MS_EXCEPTION_IF_NULL(kernel_runtime);
return kernel_runtime->device_id();
}
std::string GenerateDumpPath(uint32_t *device_id) {
auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
std::string dump_path = dump_json_parser.path();
if (dump_path.back() != '/') {
dump_path += "/";
}
if (device_id == nullptr) {
dump_path += (net_name + "/iteration_" + iterator);
} else {
auto physical_device = ConvertPhysicalDeviceId(*device_id);
dump_path += (net_name + "/device_" + std::to_string(physical_device) + "/iteration_" + iterator);
}
return dump_path;
}
void GetFileKernelName(NotNull<std::string *> kernel_name) {
const std::string strsrc = "/";
const std::string strdst = "--";
std::string::size_type pos = 0;
std::string::size_type srclen = strsrc.size();
std::string::size_type dstlen = strdst.size();
while ((pos = kernel_name->find(strsrc, pos)) != std::string::npos) {
kernel_name->replace(pos, srclen, strdst);
pos += dstlen;
}
}
void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const_map) {
if (!node->isa<ValueNode>()) {
return;
}
auto iter = const_map->find(node->fullname_with_scope());
if (iter == const_map->end()) {
auto const_idx = const_map->size() + 1;
(*const_map)[node->fullname_with_scope()] = const_idx;
}
}
void GetCNodeConstantId(const session::KernelGraph *graph, const CNodePtr &node,
std::map<std::string, size_t> *const_map) {
auto &inputs = node->inputs();
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
}
AnfNodePtr op = inputs[0];
// CNode/ConstGraph/Const/Parameter
if (op->isa<CNode>() || IsValueNode<FuncGraph>(op) || op->isa<Parameter>()) {
MS_LOG(WARNING) << "Operator must be a primitive.";
} else {
// process OP inputs
for (size_t i = 1; i < inputs.size(); ++i) {
SetConstNodeId(inputs[i], const_map);
}
}
}
void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size_t> *const_map) {
std::vector<AnfNodePtr> nodes = TopoSort(graph->get_return(), SuccIncoming, AlwaysInclude);
for (const AnfNodePtr &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
continue;
}
auto cnode = node->cast<CNodePtr>();
if (cnode != graph->get_return()) {
GetCNodeConstantId(graph, cnode, const_map);
} else {
SetConstNodeId(cnode->input(1), const_map);
}
}
}
void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull<ShapeVector *> int_shapes, bool trans_flag) {
if (trans_flag) {
*int_shapes = trans::GetRuntimePaddingShape(node, index);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(node, index);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(*int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
}
void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr,
const ShapeVector &int_shapes, const TypeId &type, bool trans_flag) {
auto format = kOpFormat_DEFAULT;
auto ret = addr->DumpMemToFile(file_path, format, int_shapes, type, trans_flag);
if (!ret) {
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << file_path << ", host_format:" << format
<< ".!";
}
}
} // namespace mindspore

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_UTILS_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_UTILS_H_
#include <map>
#include <string>
#include "backend/session/kernel_graph.h"
#include "runtime/device/device_address.h"
namespace mindspore {
static const size_t PARAMETER_OUTPUT_INDEX = 0;
static const size_t VALUE_NODE_OUTPUT_INDEX = 0;
std::string GenerateDumpPath(uint32_t *device_id = nullptr);
void GetFileKernelName(NotNull<std::string *> kernel_name);
void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size_t> *const_map);
void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull<ShapeVector *> int_shapes, bool trans_flag = false);
void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr,
const ShapeVector &int_shapes, const TypeId &type, bool trans_flag = false);
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_UTILS_H_

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/data_dump/e2e_dump.h"
#include <algorithm>
#include <map>
@ -31,44 +31,17 @@
#include "debug/debugger/debugger.h"
#endif
namespace {
const size_t PRAMATER_OUTPUT_INDEX = 0;
const size_t VALUE_NODE_OUTPUT_INDEX = 0;
} // namespace
namespace mindspore {
void E2eDumpUtil::GetFileKernelName(NotNull<std::string *> kernel_name) {
const std::string strsrc = "/";
const std::string strdst = "--";
std::string::size_type pos = 0;
std::string::size_type srclen = strsrc.size();
std::string::size_type dstlen = strdst.size();
while ((pos = kernel_name->find(strsrc, pos)) != std::string::npos) {
kernel_name->replace(pos, srclen, strdst);
pos += dstlen;
}
}
bool E2eDumpUtil::IsDeviceTargetGPU() {
bool E2eDump::IsDeviceTargetGPU() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
}
void E2eDumpUtil::DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr,
bool trans_flag, const ShapeVector &int_shapes, const TypeId &type) {
auto format = kOpFormat_DEFAULT;
auto ret = addr->DumpMemToFile(trans_flag, file_path, format, int_shapes, type);
if (!ret) {
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << file_path << ", host_format:" << format
<< ".!";
}
}
void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type, size_t slot,
const Debugger *debugger) {
void E2eDump::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
NotNull<const device::DeviceAddress *> addr, const ShapeVector &int_shapes,
const TypeId &type, bool trans_flag, size_t slot, const Debugger *debugger) {
#ifdef ENABLE_DEBUGGER
auto format = kOpFormat_DEFAULT;
MS_EXCEPTION_IF_NULL(debugger);
@ -81,18 +54,7 @@ void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::stri
#endif
}
void E2eDumpUtil::GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag,
NotNull<ShapeVector *> int_shapes) {
if (trans_flag) {
*int_shapes = trans::GetRuntimePaddingShape(node, index);
} else {
auto shape = AnfAlgo::GetOutputDeviceShape(node, index);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(*int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
}
}
void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!dump_json_parser.OutputNeedDump()) {
@ -103,7 +65,6 @@ void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::strin
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
@ -113,7 +74,7 @@ void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::strin
}
}
void E2eDumpUtil::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
std::string *kernel_name, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(node);
GetFileKernelName(NOT_NULL(kernel_name));
@ -124,19 +85,19 @@ void E2eDumpUtil::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const st
}
auto addr = AnfAlgo::GetOutputAddr(node, j);
ShapeVector int_shapes;
GetDumpIntShape(node, j, trans_flag, NOT_NULL(&int_shapes));
GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(node, j);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "output_" + std::to_string(j);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), int_shapes, type, trans_flag, j,
debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type, trans_flag);
}
}
}
void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!dump_json_parser.InputNeedDump()) {
@ -147,7 +108,6 @@ void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string
const auto &apply_kernels = graph->execution_order();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
if (!dump_json_parser.NeedDump(kernel_name)) {
continue;
@ -157,7 +117,7 @@ void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string
}
}
void E2eDumpUtil::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
std::string *kernel_name, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(node);
GetFileKernelName(NOT_NULL(kernel_name));
@ -184,64 +144,18 @@ void E2eDumpUtil::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std
}
ShapeVector int_shapes;
GetDumpIntShape(input, index, trans_flag, NOT_NULL(&int_shapes));
GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(input, index);
std::string file_path = dump_path + '/' + *kernel_name + '_' + "input_" + std::to_string(j);
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, tensor_name, NOT_NULL(addr), trans_flag, int_shapes, type, slot, debugger);
DumpGPUMemToFile(file_path, tensor_name, NOT_NULL(addr), int_shapes, type, trans_flag, slot, debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type, trans_flag);
}
}
}
void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const_map) {
if (!node->isa<ValueNode>()) {
return;
}
auto iter = const_map->find(node->fullname_with_scope());
if (iter == const_map->end()) {
auto const_idx = const_map->size() + 1;
(*const_map)[node->fullname_with_scope()] = const_idx;
}
}
void GetCNodeConstantId(const session::KernelGraph *graph, const CNodePtr &node,
std::map<std::string, size_t> *const_map) {
auto &inputs = node->inputs();
if (inputs.size() < 1) {
MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
}
AnfNodePtr op = inputs[0];
// CNode/ConstGraph/Const/Parameter
if (op->isa<CNode>() || IsValueNode<FuncGraph>(op) || op->isa<Parameter>()) {
MS_LOG(WARNING) << "Operator must be a primitive.";
} else {
// process OP inputs
for (size_t i = 1; i < inputs.size(); ++i) {
SetConstNodeId(inputs[i], const_map);
}
}
}
void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size_t> *const_map) {
std::vector<AnfNodePtr> nodes = TopoSort(graph->get_return(), SuccIncoming, AlwaysInclude);
for (const AnfNodePtr &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
continue;
}
auto cnode = node->cast<CNodePtr>();
if (cnode != graph->get_return()) {
GetCNodeConstantId(graph, cnode, const_map);
} else {
SetConstNodeId(cnode->input(1), const_map);
}
}
}
void E2eDumpUtil::DumpSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
bool trans_flag, std::map<std::string, size_t> *const_map, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(anf_node);
auto &dump_json_parser = DumpJsonParser::GetInstance();
@ -270,18 +184,18 @@ void E2eDumpUtil::DumpSingleAnfnode(const AnfNodePtr &anf_node, const size_t out
auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
MS_EXCEPTION_IF_NULL(addr);
ShapeVector int_shapes;
GetDumpIntShape(anf_node, output_index, trans_flag, NOT_NULL(&int_shapes));
GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
auto type = AnfAlgo::GetOutputInferDataType(anf_node, output_index);
std::string file_path = dump_path + '/' + dump_name + '_' + "output_0";
if (IsDeviceTargetGPU()) {
DumpGPUMemToFile(file_path, node_name, NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger);
DumpGPUMemToFile(file_path, node_name, NOT_NULL(addr), int_shapes, type, trans_flag, 0, debugger);
} else {
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
DumpMemToFile(file_path, NOT_NULL(addr), int_shapes, type, trans_flag);
}
}
void E2eDumpUtil::DumpParametersAndConst(const session::KernelGraph *graph, const std::string &dump_path,
void E2eDump::DumpParametersAndConst(const session::KernelGraph *graph, const std::string &dump_path,
Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
@ -293,49 +207,25 @@ void E2eDumpUtil::DumpParametersAndConst(const session::KernelGraph *graph, cons
// dump parameters
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
DumpSingleAnfnode(item, PRAMATER_OUTPUT_INDEX, dump_path, trans_flag, &const_map, debugger);
DumpSingleAnfNode(item, PARAMETER_OUTPUT_INDEX, dump_path, trans_flag, &const_map, debugger);
}
// dump const values
auto value_nodes = graph->graph_value_nodes();
for (const auto &value_node : value_nodes) {
DumpSingleAnfnode(value_node, VALUE_NODE_OUTPUT_INDEX, dump_path, trans_flag, &const_map, debugger);
DumpSingleAnfNode(value_node, VALUE_NODE_OUTPUT_INDEX, dump_path, trans_flag, &const_map, debugger);
}
}
uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto device_target = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
auto kernel_runtime = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(device_target, device_id);
MS_EXCEPTION_IF_NULL(kernel_runtime);
return kernel_runtime->device_id();
}
bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, uint32_t device_id, Debugger *debugger) {
bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t device_id, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
auto &dump_json_parser = DumpJsonParser::GetInstance();
dump_json_parser.UpdateDumpIter();
auto dump_flag = dump_json_parser.e2e_dump_enabled();
if (!dump_flag) {
if (!dump_json_parser.GetIterDumpFlag()) {
return true;
}
MS_LOG(INFO) << "E2e dump data start";
if (dump_json_parser.iteration() != 0) {
if (dump_json_parser.cur_dump_iter() != dump_json_parser.iteration()) {
return true;
}
}
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
auto physical_device = ConvertPhysicalDeviceId(device_id);
std::string dump_path = GenerateDumpPath(&device_id);
std::string net_name = dump_json_parser.net_name();
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
std::string dump_path = dump_json_parser.path();
if (dump_path.back() != '/') {
dump_path += "/";
}
dump_path += (net_name + "/device_" + std::to_string(physical_device) + "/iteration_" + iterator);
DumpInput(graph, dump_path, debugger);
DumpOutput(graph, dump_path, debugger);
DumpParametersAndConst(graph, dump_path, debugger);

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_H_
#include <map>
#include <string>
@ -23,17 +23,17 @@
#include "backend/session/kernel_graph.h"
#include "runtime/device/device_address.h"
#include "debug/data_dump/dump_json_parser.h"
#include "debug/data_dump/dump_utils.h"
#ifndef ENABLE_DEBUGGER
class Debugger;
#endif
namespace mindspore {
class E2eDumpUtil {
class E2eDump {
public:
E2eDumpUtil() = default;
~E2eDumpUtil() = default;
E2eDump() = default;
~E2eDump() = default;
static bool DumpData(const session::KernelGraph *graph, uint32_t device_id, Debugger *debugger = nullptr);
static void GetFileKernelName(NotNull<std::string *> kernel_name);
// Dump data when task error.
static void DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
std::string *kernel_name, Debugger *debugger);
@ -43,18 +43,15 @@ class E2eDumpUtil {
private:
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
static void DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
static void DumpParametersAndConst(const session::KernelGraph *graph, const std::string &dump_path,
Debugger *debugger);
static void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type);
static void DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
const ShapeVector &int_shapes, const TypeId &type, size_t slot,
const Debugger *debugger);
static void GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, NotNull<ShapeVector *> int_shapes);
NotNull<const device::DeviceAddress *> addr, const ShapeVector &int_shapes,
const TypeId &type, bool trans_flag, size_t slot, const Debugger *debugger);
static bool IsDeviceTargetGPU();
static void DumpSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
static void DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
bool trans_flag, std::map<std::string, size_t> *const_map, Debugger *debugger);
};
} // namespace mindspore

View File

@ -31,7 +31,7 @@
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "runtime/device/kernel_runtime.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/data_dump/e2e_dump.h"
#include "utils/config_manager.h"
using debugger::Chunk;
@ -52,8 +52,6 @@ namespace mindspore {
DebuggerPtr Debugger::debugger_ = nullptr;
std::mutex Debugger::instance_lock_;
static const size_t PARAMETER_OUTPUT_INDEX = 0;
static const size_t VALUE_NODE_OUTPUT_INDEX = 0;
Debugger::Debugger()
: grpc_client_(nullptr),
@ -1133,7 +1131,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
// for parameters and value nodes, set its execution order to be 0;
int exec_order = 0;
std::string node_name = anf_node->fullname_with_scope();
E2eDumpUtil::GetFileKernelName(NOT_NULL(&node_name));
GetFileKernelName(NOT_NULL(&node_name));
// check if output adde exists, if not, return;
if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
return;

View File

@ -649,8 +649,8 @@ void AscendDeviceAddress::ClearDeviceMemory() {
AscendDeviceAddress::~AscendDeviceAddress() { ClearDeviceMemory(); }
bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const {
bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, bool trans_flag) const {
bool ret = false;
if (filepath.empty()) {
MS_LOG(ERROR) << "Dump file path is null!";

View File

@ -43,8 +43,8 @@ class AscendDeviceAddress : public DeviceAddress {
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override;
void ClearDeviceMemory() override;
DeviceAddressType DeviceType() const override { return DeviceAddressType::kAscend; }
bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const override;
bool DumpMemToFile(const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape,
TypeId host_type, bool trans_flag) const override;
#ifdef ENABLE_DEBUGGER
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override;

View File

@ -21,7 +21,7 @@
#include <utility>
#include <algorithm>
#include "runtime/device/ascend/signal_util.h"
#include "debug/data_dump/e2e_dump_util.h"
#include "debug/data_dump/e2e_dump.h"
#include "runtime/device/ascend/ascend_device_address.h"
#include "utils/ms_context.h"
#include "utils/context/context_extends.h"
@ -642,8 +642,8 @@ void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *grap
auto full_scope_name = node->fullname_with_scope();
MS_LOG(ERROR) << "Dump node (" << full_scope_name << ") task error input/output data to: " << local_path
<< " trace: " << trace::DumpSourceLines(node);
E2eDumpUtil::DumpInputImpl(node, false, local_path, &full_scope_name, nullptr);
E2eDumpUtil::DumpOutputImpl(node, false, local_path, &full_scope_name, nullptr);
E2eDump::DumpInputImpl(node, false, local_path, &full_scope_name, nullptr);
E2eDump::DumpOutputImpl(node, false, local_path, &full_scope_name, nullptr);
}
}

View File

@ -15,11 +15,41 @@
*/
#include "runtime/device/cpu/cpu_device_address.h"
#include <vector>
#include <memory>
#include "runtime/device/convert_tensor_utils.h"
#include "debug/data_dump/dump_json_parser.h"
namespace mindspore {
namespace device {
namespace cpu {
bool CPUDeviceAddress::DumpMemToFile(const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, bool trans_flag) const {
bool ret = false;
if (filepath.empty()) {
MS_LOG(ERROR) << "Dump file path is null!";
return ret;
}
std::string shape = "shape";
if (host_shape.empty()) {
shape += "_0";
} else {
for (auto &value : host_shape) {
shape += '_' + std::to_string(value);
}
}
std::string file_extension = ".bin";
std::string path = filepath + '_' + shape + '_' + TypeIdLabel(type_id_) + '_' + format_ + file_extension;
MS_LOG(DEBUG) << "E2E Dump path is " << path;
auto host_tmp = std::vector<uint8_t>(size_);
auto ret_code = memcpy_s(host_tmp.data(), size_, ptr_, size_);
if (ret_code != EOK) {
MS_LOG(ERROR) << "Failed to copy tensor!";
return ret;
}
ret = DumpJsonParser::DumpToFile(path, host_tmp.data(), size_);
return ret;
}
bool CPUDeviceAddress::SyncDeviceToHost(const ShapeVector & /*shape*/, size_t size, TypeId type, void *host_ptr) const {
if (ptr_ == nullptr) {
MS_LOG(ERROR) << "The pointer ptr_ is null!";

View File

@ -35,6 +35,8 @@ class CPUDeviceAddress : public DeviceAddress {
bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override;
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override;
bool DumpMemToFile(const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape,
TypeId host_type, bool trans_flag) const override;
void ClearDeviceMemory() override {}
DeviceAddressType DeviceType() const override { return DeviceAddressType::kCPU; }
};

View File

@ -34,6 +34,7 @@
#include "utils/shape_utils.h"
#include "utils/profile.h"
#include "utils/trace_base.h"
#include "debug/data_dump/cpu_e2e_dump.h"
#ifdef MEM_REUSE_DEBUG
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
#endif
@ -373,6 +374,14 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
auto kernels = kernel_graph->execution_order();
auto profiler_inst = profiler::cpu::CPUProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
auto &dump_json_parser = DumpJsonParser::GetInstance();
dump_json_parser.UpdateDumpIter();
bool iter_dump_flag = dump_json_parser.GetIterDumpFlag();
if (iter_dump_flag) {
CPUE2eDump::DumpParametersAndConst(kernel_graph);
}
for (const auto &kernel : kernels) {
#ifdef ENABLE_PROFILE
double start_time = GetTime();
@ -412,6 +421,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
} catch (std::exception &e) {
MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel);
}
if (iter_dump_flag) {
CPUE2eDump::DumpCNodeData(kernel);
}
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerEnd();
}

View File

@ -74,8 +74,8 @@ class DeviceAddress : public mindspore::DeviceSync {
void DecreaseRefCountUsed() { ref_count_dynamic_used_--; }
void ResetRefCountUsed() { ref_count_dynamic_used_ = ref_count_; }
size_t ref_count_dynamic_used() const { return ref_count_dynamic_used_; }
virtual bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type) const {
virtual bool DumpMemToFile(const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape,
TypeId host_type, bool trans_flag) const {
return true;
}
#ifdef ENABLE_DEBUGGER