forked from mindspore-Ecosystem/mindspore
!31482 Fix load tensor into mem twice for ascend kernel by kernel dump
Merge pull request !31482 from TinaMengtingZhang/kernel_dump
This commit is contained in:
commit
d40dc4f997
|
@ -263,6 +263,7 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
|
|||
return false;
|
||||
}
|
||||
const std::string file_path_str = file_path.value();
|
||||
MS_LOG(INFO) << "Dump path is " << file_path_str;
|
||||
ChangeFileMode(file_path_str, S_IWUSR);
|
||||
std::ofstream fd(file_path_str, std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
if (!fd.is_open()) {
|
||||
|
|
|
@ -118,23 +118,23 @@ bool E2eDump::IsDeviceTargetGPU() {
|
|||
return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
|
||||
}
|
||||
|
||||
bool E2eDump::IsMindRTKernelByKernel() {
|
||||
return IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag();
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Target device group: GPU.
|
||||
* Target device group: GPU, Ascend.
|
||||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: This function is for dumping tensor in memory to disk in GPU machine.
|
||||
* Description: This function is for dumping tensor loaded to tensor_loader in memory to disk in GPU and Ascend machine.
|
||||
*/
|
||||
void E2eDump::DumpGPUMemToFile(const Debugger *debugger, const std::string &file_path, bool trans_flag,
|
||||
const device::DeviceAddress &addr, const std::string &original_kernel_name, size_t slot,
|
||||
const ShapeVector &int_shapes, const TypeId &host_type) {
|
||||
void E2eDump::DumpMemFromTensorLoaderToFile(const Debugger *debugger, const std::string &file_path,
|
||||
const std::string &original_kernel_name, size_t slot) {
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
auto ret = debugger->DumpTensorToFile(file_path, trans_flag, format, addr.format(), original_kernel_name, slot,
|
||||
int_shapes, host_type);
|
||||
auto ret = debugger->DumpTensorToFile(file_path, original_kernel_name, slot);
|
||||
if (!ret) {
|
||||
MS_LOG(INFO) << "DumpTensorToFile Failed: flag:" << trans_flag << ", path:" << file_path
|
||||
<< ", host_format:" << format;
|
||||
MS_LOG(INFO) << "DumpTensorToFile Failed: path:" << file_path;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -184,6 +184,7 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
|
|||
continue;
|
||||
}
|
||||
auto addr = AnfAlgo::GetOutputAddr(node, j);
|
||||
std::string node_name = GetKernelNodeName(node);
|
||||
MS_EXCEPTION_IF_NULL(addr);
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
|
||||
|
@ -196,14 +197,13 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
|
|||
std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
|
||||
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
|
||||
std::to_string(j);
|
||||
if (DumpJsonParser::GetInstance().IsStatisticDump() &&
|
||||
(IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
|
||||
if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
|
||||
TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
|
||||
(void)stat_dump.DumpTensorStatsToFile(GetKernelNodeName(node), dump_path, debugger);
|
||||
(void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
|
||||
}
|
||||
if (DumpJsonParser::GetInstance().IsTensorDump()) {
|
||||
if (IsDeviceTargetGPU()) {
|
||||
DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, GetKernelNodeName(node), j, int_shapes, type);
|
||||
if (IsMindRTKernelByKernel()) {
|
||||
DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, j);
|
||||
} else {
|
||||
DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
|
||||
}
|
||||
|
@ -213,10 +213,8 @@ void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::s
|
|||
|
||||
void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
|
||||
std::string *kernel_name) {
|
||||
auto debugger = Debugger::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
|
||||
MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
|
||||
if (IsMindRTKernelByKernel()) {
|
||||
MS_LOG(INFO) << "DumpOutputData is only for graph mode on Ascend";
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
|
@ -256,8 +254,7 @@ void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &du
|
|||
}
|
||||
}
|
||||
|
||||
void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger,
|
||||
const KernelLaunchInfo *launch_info) {
|
||||
void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (!dump_json_parser.InputNeedDump()) {
|
||||
return;
|
||||
|
@ -269,25 +266,11 @@ void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_
|
|||
return;
|
||||
}
|
||||
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
|
||||
DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger, launch_info);
|
||||
}
|
||||
|
||||
std::shared_ptr<device::DeviceAddress> CreateAscendDeviceAddress(const KernelLaunchInfo *launch_info, size_t index,
|
||||
TypeId type) {
|
||||
MS_EXCEPTION_IF_NULL(launch_info);
|
||||
auto addr_ptr = launch_info->inputs_[index];
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({kAscendDevice, device_id});
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
MS_EXCEPTION_IF_NULL(addr_ptr);
|
||||
return device_context->CreateDeviceAddress(addr_ptr->addr, addr_ptr->size, format, type, ShapeVector());
|
||||
DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
|
||||
}
|
||||
|
||||
void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
|
||||
std::string *kernel_name, const Debugger *debugger, const KernelLaunchInfo *launch_info) {
|
||||
std::string *kernel_name, const Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
GetFileKernelName(NOT_NULL(kernel_name));
|
||||
auto input_size = common::AnfAlgo::GetInputTensorNum(node);
|
||||
|
@ -298,12 +281,12 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
|
|||
if (!AnfAlgo::OutputAddrExist(input, index)) {
|
||||
continue;
|
||||
}
|
||||
std::string tensor_name = GetKernelNodeName(node);
|
||||
std::string node_name = GetKernelNodeName(node);
|
||||
size_t slot = j;
|
||||
if (IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
|
||||
if (IsMindRTKernelByKernel()) {
|
||||
auto input_kernel = node->input(j + 1);
|
||||
std::string input_kernel_name = GetKernelNodeName(input_kernel);
|
||||
tensor_name = input_kernel_name;
|
||||
node_name = input_kernel_name;
|
||||
slot = 0;
|
||||
}
|
||||
ShapeVector int_shapes;
|
||||
|
@ -318,18 +301,13 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
|
|||
std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
|
||||
auto addr = AnfAlgo::GetOutputAddr(input, index);
|
||||
MS_EXCEPTION_IF_NULL(addr);
|
||||
if (DumpJsonParser::GetInstance().IsStatisticDump() &&
|
||||
(IsDeviceTargetGPU() || Debugger::GetInstance()->GetAscendKernelByKernelFlag())) {
|
||||
if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
|
||||
TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
|
||||
(void)stat_dump.DumpTensorStatsToFile(tensor_name, dump_path, debugger);
|
||||
(void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
|
||||
}
|
||||
if (DumpJsonParser::GetInstance().IsTensorDump()) {
|
||||
if (IsDeviceTargetGPU()) {
|
||||
DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, tensor_name, slot, int_shapes, type);
|
||||
} else if (Debugger::GetInstance()->GetAscendKernelByKernelFlag()) {
|
||||
// load address from launch_info when it's Ascend Kernel by kernel mode.
|
||||
auto ascend_device_addr = CreateAscendDeviceAddress(launch_info, j, type);
|
||||
DumpMemToFile(file_path, *ascend_device_addr, int_shapes, type, trans_flag);
|
||||
if (IsMindRTKernelByKernel()) {
|
||||
DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, slot);
|
||||
} else {
|
||||
DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
|
||||
}
|
||||
|
@ -339,9 +317,7 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st
|
|||
|
||||
void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
|
||||
std::string *kernel_name) {
|
||||
auto debugger = Debugger::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
if (IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag()) {
|
||||
if (IsMindRTKernelByKernel()) {
|
||||
MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
|
||||
return;
|
||||
}
|
||||
|
@ -409,7 +385,7 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_
|
|||
(void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
|
||||
}
|
||||
if (dump_json_parser.IsTensorDump()) {
|
||||
DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, node_name, 0, int_shapes, type);
|
||||
DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
|
||||
}
|
||||
} else {
|
||||
DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
|
||||
|
@ -452,7 +428,7 @@ void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::str
|
|||
(void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
|
||||
}
|
||||
if (dump_json_parser.IsTensorDump()) {
|
||||
DumpGPUMemToFile(debugger, file_path, trans_flag, *addr, node_name, 0, int_shapes, type);
|
||||
DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
|
||||
}
|
||||
} else {
|
||||
DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
|
||||
|
@ -662,13 +638,12 @@ void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons
|
|||
* Runtime category: MindRT.
|
||||
* Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
|
||||
*/
|
||||
bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger,
|
||||
const KernelLaunchInfo *launch_info) {
|
||||
bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger) {
|
||||
bool success = false;
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (dump_json_parser.DumpEnabledForIter()) {
|
||||
std::string dump_path = GenerateDumpPath(graph_id, rank_id);
|
||||
DumpInputSingleNode(node, dump_path, debugger, launch_info);
|
||||
DumpInputSingleNode(node, dump_path, debugger);
|
||||
DumpOutputSingleNode(node, dump_path, debugger);
|
||||
success = true;
|
||||
}
|
||||
|
@ -761,9 +736,10 @@ void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dum
|
|||
if (dump_tensor_vec.empty()) {
|
||||
return;
|
||||
}
|
||||
// The maximum tensor size to allow convert format in single thread to 1 MB.
|
||||
constexpr int kMaxTensorSize = 1048576;
|
||||
if (offset <= kMaxTensorSize) {
|
||||
// If the total tensor size is less than 1Mb, do it in single thread.
|
||||
// If the total tensor size is less than 1MB, do it in single thread.
|
||||
ConvertFormatForTensors(&dump_tensor_vec, 0, dump_tensor_vec.size() - 1);
|
||||
} else {
|
||||
// In multi_thread process, we only use 1/4 of the total concurrent threads.
|
||||
|
@ -775,7 +751,7 @@ void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dum
|
|||
std::vector<std::thread> threads;
|
||||
threads.reserve(num_threads);
|
||||
MS_LOG(INFO) << "Number of threads used for A+M dump: " << num_threads;
|
||||
for (size_t t = 0; t < threads.capacity(); t++) {
|
||||
for (size_t t = 0; t < num_threads; t++) {
|
||||
uint32_t start_idx = t * task_size;
|
||||
uint32_t end_idx = start_idx + task_size - 1;
|
||||
if (t == num_threads - 1) {
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#endif
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
using mindspore::kernel::KernelLaunchInfo;
|
||||
#ifndef ENABLE_DEBUGGER
|
||||
class Debugger;
|
||||
#endif
|
||||
|
@ -71,12 +70,11 @@ class E2eDump {
|
|||
static void DumpParametersData(uint32_t rank_id, const Debugger *debugger);
|
||||
|
||||
static bool DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id,
|
||||
const Debugger *debugger = nullptr, const KernelLaunchInfo *launch_info = nullptr);
|
||||
const Debugger *debugger = nullptr);
|
||||
|
||||
// Dump data when task error.
|
||||
static void DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
|
||||
std::string *kernel_name, const Debugger *debugger,
|
||||
const KernelLaunchInfo *launch_info = nullptr);
|
||||
std::string *kernel_name, const Debugger *debugger);
|
||||
|
||||
static void DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
|
||||
std::string *kernel_name, const Debugger *debugger);
|
||||
|
@ -93,6 +91,10 @@ class E2eDump {
|
|||
char *data_ptr);
|
||||
#endif
|
||||
|
||||
static bool IsDeviceTargetGPU();
|
||||
|
||||
static bool IsMindRTKernelByKernel();
|
||||
|
||||
private:
|
||||
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger);
|
||||
|
||||
|
@ -100,15 +102,13 @@ class E2eDump {
|
|||
|
||||
static void DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger);
|
||||
|
||||
static void DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger,
|
||||
const KernelLaunchInfo *launch_info = nullptr);
|
||||
static void DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger);
|
||||
|
||||
static void DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger);
|
||||
|
||||
static void DumpGPUMemToFile(const Debugger *debugger, const std::string &file_path, bool trans_flag,
|
||||
const device::DeviceAddress &addr, const std::string &original_kernel_name, size_t slot,
|
||||
const ShapeVector &int_shapes, const TypeId &host_type);
|
||||
static bool IsDeviceTargetGPU();
|
||||
static void DumpMemFromTensorLoaderToFile(const Debugger *debugger, const std::string &file_path,
|
||||
const std::string &original_kernel_name, size_t slot);
|
||||
|
||||
static void DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
|
||||
bool trans_flag, const Debugger *debugger);
|
||||
|
||||
|
|
|
@ -33,11 +33,6 @@ constexpr auto kCsvFileName = "statistic.csv";
|
|||
} // namespace
|
||||
|
||||
namespace mindspore {
|
||||
const std::map<DbgDataType, std::string> kDbgDataTypeToStringMap = {
|
||||
{DT_BOOL, "bool"}, {DT_INT8, "int8"}, {DT_INT16, "int16"}, {DT_INT32, "int32"},
|
||||
{DT_INT64, "int64"}, {DT_UINT8, "uint8"}, {DT_UINT16, "uint16"}, {DT_UINT32, "uint32"},
|
||||
{DT_UINT64, "uint64"}, {DT_FLOAT16, "float16"}, {DT_FLOAT32, "float32"}, {DT_FLOAT64, "float64"}};
|
||||
|
||||
bool CsvWriter::OpenFile(const std::string &path, const std::string &header) {
|
||||
if (file_.is_open() && path == file_path_str_) {
|
||||
return true;
|
||||
|
@ -162,13 +157,10 @@ bool TensorStatDump::DumpTensorStatsToFile(const std::string &dump_path, const s
|
|||
MS_LOG(INFO) << "Tensor data is empty, skipping current statistics";
|
||||
return false;
|
||||
}
|
||||
std::string type;
|
||||
auto iter_type = kDbgDataTypeToStringMap.find(data->GetType());
|
||||
if (iter_type == kDbgDataTypeToStringMap.end()) {
|
||||
std::string type = data->GetTypeString();
|
||||
if (type.empty()) {
|
||||
type = "unsupported(" + std::to_string(data->GetType()) + ")";
|
||||
MS_LOG(INFO) << "Unsupported tensor data_type " << type << " for tensor " << data->GetName();
|
||||
} else {
|
||||
type = iter_type->second;
|
||||
}
|
||||
if (!OpenStatisticsFile(dump_path)) {
|
||||
return false;
|
||||
|
|
|
@ -1836,11 +1836,8 @@ std::shared_ptr<TensorData> DebugServices::GetTensor(const std::string &tensor_n
|
|||
void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DebugServices::DumpTensorToFile(const std::string &filepath, bool trans_flag, const std::string &host_fmt,
|
||||
const std::string &addr_format, const std::string &tensor_name, size_t slot,
|
||||
const std::vector<int64_t> &host_shape, TypeId host_type) const {
|
||||
return tensor_loader_->DumpTensorToFile(filepath, trans_flag, host_fmt, addr_format, tensor_name, slot, host_shape,
|
||||
host_type);
|
||||
bool DebugServices::DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) const {
|
||||
return tensor_loader_->DumpTensorToFile(filepath, tensor_name, slot);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -461,9 +461,7 @@ class DebugServices {
|
|||
void EmptyCurrentTensor();
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
bool DumpTensorToFile(const std::string &filepath, bool trans_flag, const std::string &host_fmt,
|
||||
const std::string &addr_format, const std::string &tensor_name, size_t slot,
|
||||
const std::vector<int64_t> &host_shape, TypeId host_type) const;
|
||||
bool DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) const;
|
||||
#endif
|
||||
|
||||
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev);
|
||||
|
|
|
@ -527,10 +527,10 @@ void Debugger::DumpConstantDataAscend(const KernelGraphPtr &graph) {
|
|||
* Runtime category: MindRT.
|
||||
* Description: Dumps a single node for given graph_id.
|
||||
*/
|
||||
void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id, const KernelLaunchInfo *launch_info) {
|
||||
void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id) {
|
||||
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
|
||||
uint32_t rank_id = GetRankID();
|
||||
(void)E2eDump::DumpSingleNodeData(node, graph_id, rank_id, debugger_.get(), launch_info);
|
||||
(void)E2eDump::DumpSingleNodeData(node, graph_id, rank_id, debugger_.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1335,11 +1335,8 @@ void Debugger::SendWatchpoints(const std::list<WatchpointHit> &points) {
|
|||
}
|
||||
}
|
||||
|
||||
bool Debugger::DumpTensorToFile(const std::string &filepath, bool trans_flag, const std::string &host_fmt,
|
||||
const std::string &addr_format, const std::string &tensor_name, size_t slot,
|
||||
const std::vector<int64_t> &host_shape, TypeId host_type) const {
|
||||
return debug_services_.get()->DumpTensorToFile(filepath, trans_flag, host_fmt, addr_format, tensor_name, slot,
|
||||
host_shape, host_type);
|
||||
bool Debugger::DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) const {
|
||||
return debug_services_.get()->DumpTensorToFile(filepath, tensor_name, slot);
|
||||
}
|
||||
|
||||
bool Debugger::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) {
|
||||
|
@ -1541,7 +1538,8 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
|
|||
} else {
|
||||
keep_prev = false;
|
||||
}
|
||||
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev, root_graph_id, false);
|
||||
bool ret =
|
||||
addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev, root_graph_id, false, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
|
@ -1572,7 +1570,7 @@ void Debugger::LoadSingleParameterMindRT(const AnfNodePtr &node) {
|
|||
}
|
||||
// Keep_prev is True for parameters.
|
||||
// force update for parameters.
|
||||
bool ret = device_addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, true);
|
||||
bool ret = device_addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, true, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
|
@ -1702,7 +1700,8 @@ void Debugger::LoadGraphOutputs() {
|
|||
auto format = kOpFormat_DEFAULT;
|
||||
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j);
|
||||
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id, false);
|
||||
auto ret =
|
||||
addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id, false, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
|
|
|
@ -107,7 +107,7 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
void DumpConstantDataAscend(const KernelGraphPtr &graph);
|
||||
|
||||
void DumpSingleNode(const CNodePtr &node, uint32_t graph_id, const KernelLaunchInfo *launch_info = nullptr);
|
||||
void DumpSingleNode(const CNodePtr &node, uint32_t graph_id);
|
||||
|
||||
void DumpInGraphCompiler(const KernelGraphPtr &kernel_graph);
|
||||
|
||||
|
@ -117,9 +117,7 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
void PostExecuteNode(const CNodePtr &kernel, bool last_kernel);
|
||||
|
||||
bool DumpTensorToFile(const std::string &filepath, bool trans_flag, const std::string &host_fmt,
|
||||
const std::string &addr_format, const std::string &tensor_name, size_t slot,
|
||||
const std::vector<int64_t> &host_shape, TypeId host_type) const;
|
||||
bool DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) const;
|
||||
|
||||
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev);
|
||||
|
||||
|
|
|
@ -66,12 +66,12 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
|
|||
|
||||
/*
|
||||
* Feature group: Dump, Online debugger.
|
||||
* Target device group: GPU.
|
||||
* Target device group: GPU, Ascend.
|
||||
* Runtime category: MindRT.
|
||||
* Description: Get kernel inputs from launch_info and load the inputs from device to host.
|
||||
*/
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
|
||||
const DeviceContext *device_context) {
|
||||
const DeviceContext *device_context, const bool trans_flag) {
|
||||
// get inputs
|
||||
auto kernel_inputs = launch_info->inputs_;
|
||||
auto input_size = common::AnfAlgo::GetInputTensorNum(cnode);
|
||||
|
@ -79,33 +79,40 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
|
|||
auto input_kernel = cnode->input(j + 1);
|
||||
std::string input_kernel_name = GetKernelNodeName(input_kernel);
|
||||
auto addr = kernel_inputs[j];
|
||||
auto type = common::AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto device_type = AnfAlgo::GetOutputDeviceDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto host_type = common::AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto type = trans_flag ? host_type : device_type;
|
||||
// For example, this happens with the Depend op
|
||||
if (type == kMetaTypeNone) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type, ShapeVector());
|
||||
auto host_format = kOpFormat_DEFAULT;
|
||||
auto device_format =
|
||||
E2eDump::IsDeviceTargetGPU() ? kOpFormat_DEFAULT : AnfAlgo::GetOutputFormat(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto device_addr =
|
||||
device_context->CreateDeviceAddress(addr->addr, addr->size, device_format, device_type, ShapeVector());
|
||||
string input_tensor_name = input_kernel_name + ':' + "0";
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto ret = device_addr->LoadMemToHost(input_tensor_name, UintToInt(exec_order), format, int_shapes, type, 0, true,
|
||||
root_graph_id, false);
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(input_kernel, PARAMETER_OUTPUT_INDEX, NOT_NULL(&int_shapes), trans_flag);
|
||||
auto ret = device_addr->LoadMemToHost(input_tensor_name, UintToInt(exec_order), host_format, int_shapes, type, 0,
|
||||
true, root_graph_id, false, trans_flag);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
|
||||
<< ", tensor_name:" << input_tensor_name << ", host_format:" << host_format
|
||||
<< ", device_format:" << device_format << ".";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump, Online debugger.
|
||||
* Target device group: GPU.
|
||||
* Target device group: GPU, Ascend.
|
||||
* Runtime category: MindRT.
|
||||
* Description: Get kernel outputs from launch_info and load the inputs from device to host.
|
||||
*/
|
||||
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id, const DeviceContext *device_context) {
|
||||
uint32_t root_graph_id, const DeviceContext *device_context, const bool trans_flag) {
|
||||
// get outputs
|
||||
auto kernel_outputs = launch_info->outputs_;
|
||||
auto output_size = common::AnfAlgo::GetOutputTensorNum(cnode);
|
||||
|
@ -115,21 +122,27 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
|
|||
|
||||
for (size_t j : real_outputs) {
|
||||
auto addr = kernel_outputs[j];
|
||||
auto type = common::AnfAlgo::GetOutputInferDataType(cnode, j);
|
||||
auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, j);
|
||||
auto host_type = common::AnfAlgo::GetOutputInferDataType(cnode, j);
|
||||
auto type = trans_flag ? host_type : device_type;
|
||||
// For example, this happens with the Depend op
|
||||
if (type == kMetaTypeNone) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type, ShapeVector());
|
||||
auto host_format = kOpFormat_DEFAULT;
|
||||
auto device_format = E2eDump::IsDeviceTargetGPU() ? kOpFormat_DEFAULT : AnfAlgo::GetOutputFormat(cnode, j);
|
||||
auto device_addr =
|
||||
device_context->CreateDeviceAddress(addr->addr, addr->size, device_format, device_type, ShapeVector());
|
||||
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
|
||||
auto ret = device_addr->LoadMemToHost(tensor_name, UintToInt(exec_order), format, int_shapes, type, j, false,
|
||||
root_graph_id, false);
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(cnode, j, NOT_NULL(&int_shapes), trans_flag);
|
||||
auto ret = device_addr->LoadMemToHost(tensor_name, UintToInt(exec_order), host_format, int_shapes, type, j, false,
|
||||
root_graph_id, false, trans_flag);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << host_format
|
||||
<< ", device_format:" << device_format << ".!";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -168,6 +181,13 @@ bool IsDeviceTargetGPU() {
|
|||
return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
|
||||
}
|
||||
|
||||
bool GetTransFlag() {
|
||||
if (Debugger::GetInstance()->debugger_enabled() || IsDeviceTargetGPU()) {
|
||||
return true;
|
||||
}
|
||||
return DumpJsonParser::GetInstance().trans_flag();
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump, Online debugger.
|
||||
* Target device group: Ascend, GPU.
|
||||
|
@ -187,11 +207,12 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
|
|||
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto root_graph_id = kernel_graph->root_graph_id();
|
||||
bool trans_flag = GetTransFlag();
|
||||
if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
|
||||
LoadInputs(cnode, launch_info, exec_order, root_graph_id, device_context);
|
||||
LoadInputs(cnode, launch_info, exec_order, root_graph_id, device_context, trans_flag);
|
||||
}
|
||||
if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
|
||||
LoadOutputs(cnode, launch_info, exec_order, root_graph_id, device_context);
|
||||
LoadOutputs(cnode, launch_info, exec_order, root_graph_id, device_context, trans_flag);
|
||||
}
|
||||
// Dump kernel
|
||||
if (dump_enabled) {
|
||||
|
@ -202,7 +223,7 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
|
|||
debugger->DumpSingleNode(cnode, graph_id);
|
||||
} else {
|
||||
// for Ascend, node are dumped in root_graph_id directory.
|
||||
debugger->DumpSingleNode(cnode, root_graph_id, launch_info);
|
||||
debugger->DumpSingleNode(cnode, root_graph_id);
|
||||
}
|
||||
// Clear Dumped data when online debugger is not enabled
|
||||
if (!debugger->debugger_enabled()) {
|
||||
|
|
|
@ -33,10 +33,10 @@ namespace mindspore {
|
|||
std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);
|
||||
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
|
||||
const DeviceContext *device_context);
|
||||
const DeviceContext *device_context, const bool trans_flag);
|
||||
|
||||
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id, const DeviceContext *device_context);
|
||||
uint32_t root_graph_id, const DeviceContext *device_context, const bool trans_flag);
|
||||
|
||||
bool CheckReadData(const CNodePtr &cnode);
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
@ -203,6 +204,10 @@ class TensorData {
|
|||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
void SetTensor(const mindspore::tensor::TensorPtr &out_tensor) { this->tensor_ptr_ = out_tensor; }
|
||||
|
||||
void SetFormat(const std::string &format) { this->format_ = format; }
|
||||
|
||||
std::string GetFormat() { return this->format_; }
|
||||
#endif
|
||||
|
||||
void SetSlot(size_t slot) { this->slot_ = slot; }
|
||||
|
@ -239,6 +244,19 @@ class TensorData {
|
|||
|
||||
DbgDataType GetType() const { return this->data_type_; }
|
||||
|
||||
std::string GetTypeString() const {
|
||||
const std::map<DbgDataType, std::string> kDbgDataTypeToStringMap = {
|
||||
{DT_BOOL, "bool"}, {DT_INT8, "int8"}, {DT_INT16, "int16"}, {DT_INT32, "int32"},
|
||||
{DT_INT64, "int64"}, {DT_UINT8, "uint8"}, {DT_UINT16, "uint16"}, {DT_UINT32, "uint32"},
|
||||
{DT_UINT64, "uint64"}, {DT_FLOAT16, "float16"}, {DT_FLOAT32, "float32"}, {DT_FLOAT64, "float64"}};
|
||||
auto iter_type = kDbgDataTypeToStringMap.find(data_type_);
|
||||
if (iter_type == kDbgDataTypeToStringMap.end()) {
|
||||
return std::string();
|
||||
} else {
|
||||
return iter_type->second;
|
||||
}
|
||||
}
|
||||
|
||||
void SetType(unsigned int type) { ConvertMsToDbgType(type); }
|
||||
|
||||
void SetType(const std::string &type_name) { ConvertStringToDbgType(type_name); }
|
||||
|
@ -438,6 +456,7 @@ class TensorData {
|
|||
std::string time_stamp_;
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
std::string format_{""};
|
||||
mindspore::tensor::TensorPtr tensor_ptr_{nullptr};
|
||||
#endif
|
||||
};
|
||||
|
|
|
@ -244,29 +244,20 @@ class TensorLoader {
|
|||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Load tensor data from debugger backend cache (tensor_list_map_) and dump to file in npy format.
|
||||
*/
|
||||
bool DumpTensorToFile(const std::string &filepath, bool trans_flag, const std::string &host_fmt,
|
||||
const std::string &addr_format, const std::string &tensor_name, size_t slot,
|
||||
const std::vector<int64_t> &host_shape, TypeId host_type) {
|
||||
bool DumpTensorToFile(const std::string &filepath, const std::string &tensor_name, size_t slot) {
|
||||
if (filepath.empty()) {
|
||||
MS_LOG(ERROR) << "Dump file path is null!";
|
||||
return false;
|
||||
}
|
||||
std::string path = "";
|
||||
if (trans_flag) {
|
||||
path = filepath + '.' + host_fmt;
|
||||
} else {
|
||||
path = filepath + '.' + addr_format;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Dump path is " << path;
|
||||
|
||||
std::string tensor_loader_name = tensor_name + ":" + std::to_string(slot);
|
||||
auto iter = tensor_list_map_.find(tensor_loader_name);
|
||||
if (iter != tensor_list_map_.end()) {
|
||||
std::shared_ptr<TensorData> node = iter->second;
|
||||
size_t host_size = node->GetByteSize();
|
||||
std::string path = filepath + '.' + node->GetFormat();
|
||||
|
||||
return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size, host_shape, host_type);
|
||||
return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), node->GetByteSize(), node->GetShape(),
|
||||
StringToTypeId(node->GetTypeString()));
|
||||
}
|
||||
MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map_";
|
||||
return false;
|
||||
|
|
|
@ -647,9 +647,10 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
|
|||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Load tensor to host and create tensor_data object for the loaded tensor.
|
||||
*/
|
||||
bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
|
||||
uint32_t root_graph_id, bool force_update) const {
|
||||
bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order,
|
||||
const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type,
|
||||
size_t slot, bool keep_prev, uint32_t root_graph_id, bool force_update,
|
||||
bool trans_flag) const {
|
||||
bool ret = false;
|
||||
auto debugger = Debugger::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
|
@ -671,9 +672,14 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
|
|||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
|
||||
MS_EXCEPTION_IF_NULL(out_tensor);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
auto ret_sync = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
|
||||
bool ret_sync = false;
|
||||
if (trans_flag) {
|
||||
ret_sync = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
|
||||
} else {
|
||||
ret_sync = SyncDeviceToHost(host_size, out_tensor->data_c());
|
||||
}
|
||||
if (!ret_sync) {
|
||||
MS_LOG(ERROR) << "Copy device mem to host failed";
|
||||
MS_LOG(ERROR) << "Convert format or Copy device mem to host failed";
|
||||
return ret;
|
||||
}
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
|
@ -683,7 +689,11 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
|
|||
tensor_data->SetType((unsigned int)host_type);
|
||||
tensor_data->SetShape(out_tensor->shape());
|
||||
tensor_data->SetRootGraphId(root_graph_id);
|
||||
std::string tensor_format = trans_flag ? host_fmt : format_;
|
||||
tensor_data->SetFormat(tensor_format);
|
||||
ret = debugger->LoadNewTensor(tensor_data, keep_prev);
|
||||
MS_LOG(INFO) << "Load tensor '" << tensor_name << "' into debugger tensor loader successfully: format("
|
||||
<< tensor_format << ")";
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@ class AscendDeviceAddress : public DeviceAddress {
|
|||
#ifdef ENABLE_DEBUGGER
|
||||
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
|
||||
uint32_t root_graph_id, bool force_update) const override;
|
||||
uint32_t root_graph_id, bool force_update, bool trans_flag) const override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
|
@ -185,7 +185,7 @@ GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); }
|
|||
#ifdef ENABLE_DEBUGGER
|
||||
bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
|
||||
uint32_t root_graph_id, bool force_update) const {
|
||||
uint32_t root_graph_id, bool force_update, bool) const {
|
||||
bool ret = false;
|
||||
if (size_ == 0) {
|
||||
return true;
|
||||
|
@ -219,6 +219,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
|
|||
tensor_data->SetType((unsigned int)host_type);
|
||||
tensor_data->SetShape(out_tensor->shape());
|
||||
tensor_data->SetRootGraphId(root_graph_id);
|
||||
tensor_data->SetFormat(host_fmt);
|
||||
ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev);
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
return ret;
|
||||
|
|
|
@ -56,7 +56,7 @@ class GPUDeviceAddress : public DeviceAddress {
|
|||
#ifdef ENABLE_DEBUGGER
|
||||
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
|
||||
uint32_t root_graph_id, bool force_update) const override;
|
||||
uint32_t root_graph_id, bool force_update, bool trans_flag) const override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
|
@ -183,7 +183,8 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
|
||||
string input_tensor_name = input_kernel_name + ':' + "0";
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, 0, false);
|
||||
auto ret =
|
||||
gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, 0, false, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
|
||||
|
@ -210,7 +211,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
|
||||
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(kernel, j);
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, 0, false);
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, 0, false, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
|
|
|
@ -141,7 +141,7 @@ class DeviceAddress : public mindspore::DeviceSync {
|
|||
#ifdef ENABLE_DEBUGGER
|
||||
virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
|
||||
uint32_t root_graph_id, bool force_update) const {
|
||||
uint32_t root_graph_id, bool force_update, bool trans_flag) const {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue