improve perf, keep consistent tensor state, fix recheck, check weights at step end

This commit is contained in:
Harshvardhan Gupta 2020-12-08 12:00:31 -05:00
parent 5a35e9c56e
commit dd0084c52b
12 changed files with 158 additions and 116 deletions

View File

@ -1003,18 +1003,9 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs)
void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "Start!";
MS_EXCEPTION_IF_NULL(kernel_graph);
#ifdef ENABLE_DEBUGGER
if (debugger_->DebuggerBackendEnabled()) {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
// TensorData will be freed up here
debugger_->EmptyTensor();
uint32_t iter_num = debugger_->GetTensorLoaderIterNum();
debugger_->SetTensorLoaderIterNum(++iter_num);
(void)runtime_instance->LoadData(kernel_graph.get());
debugger_->EmptyPrevTensor();
}
#endif
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
(void)runtime_instance->LoadData(kernel_graph.get());
MS_LOG(INFO) << "Finish!";
}

View File

@ -360,7 +360,9 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
SyncValueNodeDeviceAddr(kernel_graph);
// Load input data from user input
LoadInputData(kernel_graph, inputs);
PreIterationDbg(kernel_graph);
if (debugger_) {
debugger_->PreExecute(kernel_graph, graph_sum_);
}
#if ENABLE_CPU && ENABLE_GPU
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
@ -372,7 +374,6 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
for (int64_t i = 0; i < loopsize; i++) {
Execute(kernel_graph);
}
PostLoadTensor(kernel_graph);
// In pynative mode, device addresses of tensors in value nodes need be clean.
CleanValueNodeDeviceAddr(kernel_graph);
// Summary
@ -443,13 +444,6 @@ bool GPUSession::DumpDataEnabledIteration() const {
return runtime_instance->DumpDataEnabledIteration();
}
void GPUSession::PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const {
if (debugger_) {
debugger_->PreExecute(kernel_graph, graph_sum_);
}
PreLoadTensor(kernel_graph);
}
void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const {
bool dump_enabled = DumpDataEnabledIteration();
// debug used for dump
@ -463,30 +457,6 @@ void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_gra
}
}
void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
bool dump_enabled = DumpDataEnabledIteration();
if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) {
return;
}
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
debugger_->EmptyTensor();
uint32_t iter_num = debugger_->GetTensorLoaderIterNum();
debugger_->SetTensorLoaderIterNum(++iter_num);
}
void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
bool dump_enabled = DumpDataEnabledIteration();
if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) {
return;
}
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
debugger_->EmptyPrevTensor();
}
void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);

View File

@ -75,14 +75,8 @@ class GPUSession : public SessionBasic {
bool DumpDataEnabledIteration() const;
void PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void CleanValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const;

View File

@ -66,7 +66,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
std::vector<std::vector<parameter_t>> *parameters,
std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows,
const std::vector<std::shared_ptr<TensorData>> &tensor_list,
const bool init_dbg_suspend) {
const bool init_dbg_suspend, const bool step_end, const bool recheck) {
std::lock_guard<std::mutex> lg(lock_);
if (watchpoint_table.empty()) return;
@ -75,13 +75,26 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':'));
const auto tensor_slot = std::to_string(tensor->GetSlot());
mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor();
// no elements to analyze
if (tensor_ptr->DataSize() == 0) continue;
int tensor_dtype = tensor_ptr->data_type_c();
std::vector<watchpoint_t> watchpoints_to_check;
std::string qualified_tensor_name;
for (auto w_table_item : watchpoint_table) {
auto wp = std::get<1>(w_table_item);
if (wp.condition.type == INIT && !init_dbg_suspend) continue;
// check ONLY init conditions on intial suspended state.
// skip other conditions on intial suspended state
// skip init condition on all the other states
if ((wp.condition.type == INIT) ^ init_dbg_suspend) continue;
if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue;
// check change conditions only on step end.
if (wp.change_condition() && !step_end) continue;
// if recheck, ignore the cache results and reanalyze everything.
// if not a recheck, check only unanalyzed tensors
if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue;
std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot);
if (!found.empty()) {
qualified_tensor_name = found;
@ -174,6 +187,10 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
error_code = std::get<1>(item);
parameter_list = std::get<2>(item);
}
// add analyzed tensor to cache
if (!recheck) {
wp_id_cache[tensor_name].insert(wp.id);
}
if (is_hit || error_code) {
name->push_back(qualified_tensor_name);
@ -238,28 +255,6 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode
}
}
void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list,
const CNodePtr &kernel) {
if (kernel) {
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
for (size_t j = 0; j < input_size; ++j) {
auto input_kernel = kernel->input(j + 1);
std::string input_kernel_name = input_kernel->fullname_with_scope();
auto found_dot = input_kernel_name.find_last_of('.');
if (found_dot != std::string::npos &&
(input_kernel_name.substr(found_dot + 1) == "weight" || input_kernel_name.substr(found_dot + 1) == "bias")) {
std::string locate_tensor = input_kernel_name + ":0";
std::map<std::string, std::shared_ptr<TensorData>> tensor_map = tensor_loader_->GetTensorMap();
std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
iter = tensor_map.find(locate_tensor);
if (iter != tensor_map.end()) {
tensor_list->push_back(iter->second);
}
}
}
}
}
void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); }
std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); }
@ -292,4 +287,32 @@ std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::Get
return watchpoint_table;
}
void DebugServices::ResetLoadedTensors() {
wp_id_cache.clear();
MS_LOG(INFO) << "Resetting loaded tensors";
tensor_loader_->MoveParametersCurrentToPrev();
tensor_loader_->EmptyCurrentTensor();
// will move parameters from previous to current map
tensor_loader_->SwapCurrentPrev();
}
std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) {
MS_EXCEPTION_IF_NULL(kernel);
std::vector<std::shared_ptr<TensorData>> result;
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
auto kernel_name = kernel->fullname_with_scope();
for (size_t j = 0; j < output_size; ++j) {
auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j);
auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot);
if (tensor) result.push_back(tensor);
}
return result;
}
bool DebugServices::TensorExistsInCurrent(std::string tensor_name) {
return tensor_loader_->TensorExistsInCurrent(tensor_name);
}
void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) {
tensor_loader_->MoveTensorCurrentToPrev(tensor_name);
}
} // namespace mindspore

View File

@ -22,6 +22,7 @@
#include <memory>
#include <tuple>
#include <unordered_map>
#include <set>
#include <mutex>
#include <map>
#include <limits>
@ -160,6 +161,10 @@ class DebugServices {
bool range_enabled() const {
return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled);
}
bool change_condition() const {
return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL || condition.type == NOT_CHANGED;
}
} watchpoint_t;
void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
@ -171,7 +176,8 @@ class DebugServices {
void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters,
std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows,
const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend);
const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend,
const bool step_end, const bool recheck);
void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
@ -181,8 +187,6 @@ class DebugServices {
bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const;
void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel);
void EmptyTensor();
std::vector<std::shared_ptr<TensorData>> GetTensor() const;
@ -205,9 +209,19 @@ class DebugServices {
std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
void ResetLoadedTensors();
std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel);
bool TensorExistsInCurrent(std::string tensor_name);
void MoveTensorCurrentToPrev(std::string tensor_name);
private:
std::mutex lock_;
// to keep track of watchpoints that have been checked already for a tensor in current step
std::unordered_map<std::string, std::set<int32_t>> wp_id_cache;
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
TensorLoader *tensor_loader_;

View File

@ -313,20 +313,16 @@ void Debugger::PostExecute() {
}
if (debugger_->DebuggerBackendEnabled()) {
// analyze tensor data and send the watchpoints been hit
if (run_level_ == "node") {
MS_LOG(INFO) << "Debugger is in node level mode ";
return;
}
if (debugger_enabled_ && !is_dataset_graph_) {
if (device_target_ != kGPUDevice) {
num_step_++;
MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
SendWatchpoints(CheckWatchpoints());
CommandLoop();
} else {
CommandLoop();
}
MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
SendWatchpoints(CheckWatchpoints());
CommandLoop();
}
// Only keep parameters in the current map
debug_services_->ResetLoadedTensors();
}
}
@ -596,7 +592,7 @@ void Debugger::CommandLoop() {
MS_LOG(INFO) << "RunCMD";
if (GetRunLevel(reply) == "recheck") {
MS_LOG(INFO) << "rechecking all watchpoints";
SendWatchpoints(CheckWatchpoints());
SendWatchpoints(CheckWatchpoints("", nullptr, true));
} else {
// no longer the initial suspension.
initial_suspend_ = false;
@ -705,9 +701,6 @@ void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCon
return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()};
});
debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list);
if (initial_suspend_ &&
static_cast<DebugServices::CONDITION_TYPE>(condition.condition()) == DebugServices::CONDITION_TYPE::INIT)
SendWatchpoints(CheckWatchpoints());
}
void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
@ -780,7 +773,8 @@ void Debugger::Exit() {
}
}
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) {
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel,
bool recheck) {
std::vector<std::string> name;
std::vector<std::string> slot;
std::vector<int> condition;
@ -795,11 +789,10 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode
if (watchnode.empty()) {
tensor_list = debug_services_->GetTensor();
} else {
tensor_list = debug_services_->GetNodeTensorMap(watchnode);
debug_services_->AddWeightsBiasInputs(&tensor_list, kernel);
tensor_list = debug_services_->GetNodeTensor(kernel);
}
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, &parameters, &error_codes, overflow_ops,
tensor_list, initial_suspend_);
tensor_list, initial_suspend_, watchnode.empty(), recheck);
std::list<WatchpointHit> hits;
for (unsigned int i = 0; i < name.size(); i++) {
WatchpointHit hit;
@ -1045,7 +1038,7 @@ std::vector<std::string> Debugger::CheckOpOverflow() {
}
closedir(d);
if (op_names.size()) {
if (!op_names.empty()) {
MS_LOG(ERROR) << "These operation overflows are detected " << op_names;
}
@ -1091,12 +1084,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
return;
}
bool keep_prev;
if (anf_node->isa<Parameter>()) {
keep_prev = true;
} else {
keep_prev = false;
}
// for parameters and value nodes, set its execution order to be 0;
int exec_order = 0;
std::string node_name = anf_node->fullname_with_scope();
@ -1114,6 +1101,13 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
bool keep_prev;
if (anf_node->isa<Parameter>()) {
keep_prev = true;
debug_services_->MoveTensorCurrentToPrev(tensor_name);
} else {
keep_prev = false;
}
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
@ -1123,9 +1117,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
void Debugger::LoadParametersAndConst() {
if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
if (!(num_step_ == 0 || device_target_ == kAscendDevice ||
(device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration())))
return;
MS_EXCEPTION_IF_NULL(graph_ptr_);
// load parameters
MS_LOG(INFO) << "Start to load Parameters!";
@ -1199,5 +1190,8 @@ void Debugger::ClearCurrentData() {
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
debug_services_->EmptyCurrentTensor();
}
bool Debugger::TensorExistsInCurrent(std::string tensor_name) {
return debug_services_->TensorExistsInCurrent(tensor_name);
}
} // namespace mindspore

View File

@ -145,6 +145,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
std::list<KernelGraphPtr> GetGraphPtrList() { return graph_ptr_list_; }
bool TensorExistsInCurrent(std::string tensor_name);
private:
// private constructor for singleton
Debugger();
@ -197,7 +199,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// analyze tensors and check watchpoint conditions
// return names of tensors and what condition they hit
std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(),
const CNodePtr &kernel = NULL);
const CNodePtr &kernel = nullptr, bool recheck = false);
// send watchpoints that hit
void SendWatchpoints(const std::list<WatchpointHit> &points);

View File

@ -33,6 +33,44 @@ class TensorLoader {
~TensorLoader() { EmptyTensor(); }
void MoveTensorCurrentToPrev(std::string tensor_name) {
auto handle = tensor_list_map.extract(tensor_name);
if (!handle.empty()) {
MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map";
prev_tensor_list_map.insert(std::move(handle));
}
}
void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); }
bool TensorExistsInCurrent(std::string tensor_name) {
return tensor_list_map.find(tensor_name) != tensor_list_map.end();
}
// only parameters will return true
bool PrevTensorExistsInCurrent(std::string tensor_name) { return TensorExistsInCurrent(tensor_name + ":prev"); }
void MoveParametersCurrentToPrev() {
MS_LOG(INFO) << "Moving parameters from current map to previous map";
auto iter = tensor_list_map.begin();
while (iter != tensor_list_map.end()) {
auto key = iter->first;
if (PrevTensorExistsInCurrent(key)) {
// :prev tensor only exists for parameter. Move it to prev
++iter;
MoveTensorCurrentToPrev(key);
} else {
++iter;
}
}
}
bool IsPrevTensor(std::string tensor_name) {
const std::string suffix = ":prev";
if (tensor_name.length() <= suffix.length()) return false;
return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin());
}
bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
std::lock_guard<std::mutex> lg(lock_);
if (keep_prev) {
@ -43,20 +81,32 @@ class TensorLoader {
tensor_list_map.insert(std::move(handle));
}
}
tensor_list.push_back(tensor);
tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value
auto node_name = tensor->GetName();
node_name = node_name.substr(0, node_name.find_first_of(":"));
node_tensor_map.insert({node_name, tensor});
return true;
}
std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; }
std::vector<std::shared_ptr<TensorData>> GetTensor() {
std::vector<std::shared_ptr<TensorData>> tensor_list;
for (auto &it : tensor_list_map) {
if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second);
}
return tensor_list;
}
std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) {
auto iter = tensor_list_map.find(tensor_name);
if (iter != tensor_list_map.end()) return iter->second;
return nullptr;
}
uint32_t GetIterNum() { return iter_num; }
std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; }
std::shared_ptr<TensorData> GetPrevTensor(std::string tensor_name) {
std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) {
if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) {
return tensor_list_map[tensor_name + ":prev"];
}
@ -91,14 +141,13 @@ class TensorLoader {
prev_tensor_list_map.clear();
node_tensor_map.clear();
tensor_list_map.swap(prev_tensor_list_map);
tensor_list.clear();
}
void EmptyPrevTensor() { prev_tensor_list_map.clear(); }
void EmptyCurrentTensor() {
tensor_list_map.clear();
tensor_list.clear();
node_tensor_map.clear();
}
void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
@ -142,7 +191,6 @@ class TensorLoader {
}
private:
std::vector<std::shared_ptr<TensorData>> tensor_list;
std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map;
std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;

View File

@ -674,6 +674,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type,
size_t slot, bool keep_prev) const {
bool ret = false;
if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
return true;
}
// TensorData is freed up in AscendSession class
auto tensor_data = std::make_shared<mindspore::TensorData>();
tensor_data->SetName(tensor_name);

View File

@ -296,8 +296,6 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
#ifdef ENABLE_DEBUGGER
MS_LOG(INFO) << "Start load step";
uint32_t cur_iter = 0;
MS_LOG(INFO) << "Cur iter is " << cur_iter;
for (auto graph_ptr : debugger_->GetGraphPtrList()) {
debugger_->SetGraphPtr(graph_ptr);
// load output

View File

@ -87,6 +87,11 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
return true;
}
if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
return true;
}
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
size_t host_size = out_tensor->data().nbytes();
auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());

View File

@ -154,8 +154,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
std::vector<int> real_outputs;
real_outputs = CheckRealOutput(node_name, output_size);
for (std::vector<int>::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) {
auto j = *it;
for (int j : real_outputs) {
auto addr = kernel_outputs[j];
auto type = AnfAlgo::GetOutputInferDataType(kernel, j);
auto format = kOpFormat_DEFAULT;