forked from mindspore-Ecosystem/mindspore
improve perf, keep consistent tensor state, fix recheck, check weights at step end
This commit is contained in:
parent
5a35e9c56e
commit
dd0084c52b
|
@ -1003,18 +1003,9 @@ void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs)
|
|||
void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
MS_LOG(INFO) << "Start!";
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
if (debugger_->DebuggerBackendEnabled()) {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
// TensorData will be freed up here
|
||||
debugger_->EmptyTensor();
|
||||
uint32_t iter_num = debugger_->GetTensorLoaderIterNum();
|
||||
debugger_->SetTensorLoaderIterNum(++iter_num);
|
||||
(void)runtime_instance->LoadData(kernel_graph.get());
|
||||
debugger_->EmptyPrevTensor();
|
||||
}
|
||||
#endif
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
(void)runtime_instance->LoadData(kernel_graph.get());
|
||||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
|
|
|
@ -360,7 +360,9 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
|
|||
SyncValueNodeDeviceAddr(kernel_graph);
|
||||
// Load input data from user input
|
||||
LoadInputData(kernel_graph, inputs);
|
||||
PreIterationDbg(kernel_graph);
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(kernel_graph, graph_sum_);
|
||||
}
|
||||
#if ENABLE_CPU && ENABLE_GPU
|
||||
// Initialize parameter server
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
|
@ -372,7 +374,6 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
|
|||
for (int64_t i = 0; i < loopsize; i++) {
|
||||
Execute(kernel_graph);
|
||||
}
|
||||
PostLoadTensor(kernel_graph);
|
||||
// In pynative mode, device addresses of tensors in value nodes need be clean.
|
||||
CleanValueNodeDeviceAddr(kernel_graph);
|
||||
// Summary
|
||||
|
@ -443,13 +444,6 @@ bool GPUSession::DumpDataEnabledIteration() const {
|
|||
return runtime_instance->DumpDataEnabledIteration();
|
||||
}
|
||||
|
||||
void GPUSession::PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(kernel_graph, graph_sum_);
|
||||
}
|
||||
PreLoadTensor(kernel_graph);
|
||||
}
|
||||
|
||||
void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
bool dump_enabled = DumpDataEnabledIteration();
|
||||
// debug used for dump
|
||||
|
@ -463,30 +457,6 @@ void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_gra
|
|||
}
|
||||
}
|
||||
|
||||
void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
bool dump_enabled = DumpDataEnabledIteration();
|
||||
if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) {
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
debugger_->EmptyTensor();
|
||||
uint32_t iter_num = debugger_->GetTensorLoaderIterNum();
|
||||
debugger_->SetTensorLoaderIterNum(++iter_num);
|
||||
}
|
||||
|
||||
void GPUSession::PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
bool dump_enabled = DumpDataEnabledIteration();
|
||||
if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) {
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
debugger_->EmptyPrevTensor();
|
||||
}
|
||||
|
||||
void GPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
|
|
|
@ -75,14 +75,8 @@ class GPUSession : public SessionBasic {
|
|||
|
||||
bool DumpDataEnabledIteration() const;
|
||||
|
||||
void PreIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
||||
void PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
||||
void PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
||||
void PostLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
||||
void SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
||||
void CleanValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||
|
|
|
@ -66,7 +66,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
std::vector<std::vector<parameter_t>> *parameters,
|
||||
std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::shared_ptr<TensorData>> &tensor_list,
|
||||
const bool init_dbg_suspend) {
|
||||
const bool init_dbg_suspend, const bool step_end, const bool recheck) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
if (watchpoint_table.empty()) return;
|
||||
|
||||
|
@ -75,13 +75,26 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':'));
|
||||
const auto tensor_slot = std::to_string(tensor->GetSlot());
|
||||
mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor();
|
||||
// no elements to analyze
|
||||
if (tensor_ptr->DataSize() == 0) continue;
|
||||
int tensor_dtype = tensor_ptr->data_type_c();
|
||||
std::vector<watchpoint_t> watchpoints_to_check;
|
||||
std::string qualified_tensor_name;
|
||||
for (auto w_table_item : watchpoint_table) {
|
||||
auto wp = std::get<1>(w_table_item);
|
||||
if (wp.condition.type == INIT && !init_dbg_suspend) continue;
|
||||
// check ONLY init conditions on intial suspended state.
|
||||
// skip other conditions on intial suspended state
|
||||
// skip init condition on all the other states
|
||||
if ((wp.condition.type == INIT) ^ init_dbg_suspend) continue;
|
||||
|
||||
if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue;
|
||||
|
||||
// check change conditions only on step end.
|
||||
if (wp.change_condition() && !step_end) continue;
|
||||
|
||||
// if recheck, ignore the cache results and reanalyze everything.
|
||||
// if not a recheck, check only unanalyzed tensors
|
||||
if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue;
|
||||
std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot);
|
||||
if (!found.empty()) {
|
||||
qualified_tensor_name = found;
|
||||
|
@ -174,6 +187,10 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
error_code = std::get<1>(item);
|
||||
parameter_list = std::get<2>(item);
|
||||
}
|
||||
// add analyzed tensor to cache
|
||||
if (!recheck) {
|
||||
wp_id_cache[tensor_name].insert(wp.id);
|
||||
}
|
||||
|
||||
if (is_hit || error_code) {
|
||||
name->push_back(qualified_tensor_name);
|
||||
|
@ -238,28 +255,6 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode
|
|||
}
|
||||
}
|
||||
|
||||
void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list,
|
||||
const CNodePtr &kernel) {
|
||||
if (kernel) {
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
|
||||
for (size_t j = 0; j < input_size; ++j) {
|
||||
auto input_kernel = kernel->input(j + 1);
|
||||
std::string input_kernel_name = input_kernel->fullname_with_scope();
|
||||
auto found_dot = input_kernel_name.find_last_of('.');
|
||||
if (found_dot != std::string::npos &&
|
||||
(input_kernel_name.substr(found_dot + 1) == "weight" || input_kernel_name.substr(found_dot + 1) == "bias")) {
|
||||
std::string locate_tensor = input_kernel_name + ":0";
|
||||
std::map<std::string, std::shared_ptr<TensorData>> tensor_map = tensor_loader_->GetTensorMap();
|
||||
std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
|
||||
iter = tensor_map.find(locate_tensor);
|
||||
if (iter != tensor_map.end()) {
|
||||
tensor_list->push_back(iter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); }
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); }
|
||||
|
@ -292,4 +287,32 @@ std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::Get
|
|||
return watchpoint_table;
|
||||
}
|
||||
|
||||
void DebugServices::ResetLoadedTensors() {
|
||||
wp_id_cache.clear();
|
||||
MS_LOG(INFO) << "Resetting loaded tensors";
|
||||
tensor_loader_->MoveParametersCurrentToPrev();
|
||||
tensor_loader_->EmptyCurrentTensor();
|
||||
// will move parameters from previous to current map
|
||||
tensor_loader_->SwapCurrentPrev();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
std::vector<std::shared_ptr<TensorData>> result;
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
|
||||
auto kernel_name = kernel->fullname_with_scope();
|
||||
for (size_t j = 0; j < output_size; ++j) {
|
||||
auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j);
|
||||
auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot);
|
||||
if (tensor) result.push_back(tensor);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
bool DebugServices::TensorExistsInCurrent(std::string tensor_name) {
|
||||
return tensor_loader_->TensorExistsInCurrent(tensor_name);
|
||||
}
|
||||
void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) {
|
||||
tensor_loader_->MoveTensorCurrentToPrev(tensor_name);
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
|
@ -160,6 +161,10 @@ class DebugServices {
|
|||
bool range_enabled() const {
|
||||
return condition.type == RANGE && (!parameter_list[0].disabled || !parameter_list[1].disabled);
|
||||
}
|
||||
|
||||
bool change_condition() const {
|
||||
return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL || condition.type == NOT_CHANGED;
|
||||
}
|
||||
} watchpoint_t;
|
||||
|
||||
void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
|
||||
|
@ -171,7 +176,8 @@ class DebugServices {
|
|||
void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
|
||||
std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters,
|
||||
std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows,
|
||||
const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend);
|
||||
const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend,
|
||||
const bool step_end, const bool recheck);
|
||||
|
||||
void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
|
||||
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
|
||||
|
@ -181,8 +187,6 @@ class DebugServices {
|
|||
|
||||
bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const;
|
||||
|
||||
void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel);
|
||||
|
||||
void EmptyTensor();
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetTensor() const;
|
||||
|
@ -205,9 +209,19 @@ class DebugServices {
|
|||
|
||||
std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
|
||||
|
||||
void ResetLoadedTensors();
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel);
|
||||
|
||||
bool TensorExistsInCurrent(std::string tensor_name);
|
||||
|
||||
void MoveTensorCurrentToPrev(std::string tensor_name);
|
||||
|
||||
private:
|
||||
std::mutex lock_;
|
||||
|
||||
// to keep track of watchpoints that have been checked already for a tensor in current step
|
||||
std::unordered_map<std::string, std::set<int32_t>> wp_id_cache;
|
||||
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
|
||||
|
||||
TensorLoader *tensor_loader_;
|
||||
|
|
|
@ -313,20 +313,16 @@ void Debugger::PostExecute() {
|
|||
}
|
||||
if (debugger_->DebuggerBackendEnabled()) {
|
||||
// analyze tensor data and send the watchpoints been hit
|
||||
if (run_level_ == "node") {
|
||||
MS_LOG(INFO) << "Debugger is in node level mode ";
|
||||
return;
|
||||
}
|
||||
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||
if (device_target_ != kGPUDevice) {
|
||||
num_step_++;
|
||||
MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
|
||||
SendWatchpoints(CheckWatchpoints());
|
||||
CommandLoop();
|
||||
} else {
|
||||
CommandLoop();
|
||||
}
|
||||
MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
|
||||
SendWatchpoints(CheckWatchpoints());
|
||||
CommandLoop();
|
||||
}
|
||||
// Only keep parameters in the current map
|
||||
debug_services_->ResetLoadedTensors();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -596,7 +592,7 @@ void Debugger::CommandLoop() {
|
|||
MS_LOG(INFO) << "RunCMD";
|
||||
if (GetRunLevel(reply) == "recheck") {
|
||||
MS_LOG(INFO) << "rechecking all watchpoints";
|
||||
SendWatchpoints(CheckWatchpoints());
|
||||
SendWatchpoints(CheckWatchpoints("", nullptr, true));
|
||||
} else {
|
||||
// no longer the initial suspension.
|
||||
initial_suspend_ = false;
|
||||
|
@ -705,9 +701,6 @@ void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCon
|
|||
return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()};
|
||||
});
|
||||
debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list);
|
||||
if (initial_suspend_ &&
|
||||
static_cast<DebugServices::CONDITION_TYPE>(condition.condition()) == DebugServices::CONDITION_TYPE::INIT)
|
||||
SendWatchpoints(CheckWatchpoints());
|
||||
}
|
||||
|
||||
void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
|
||||
|
@ -780,7 +773,8 @@ void Debugger::Exit() {
|
|||
}
|
||||
}
|
||||
|
||||
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) {
|
||||
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel,
|
||||
bool recheck) {
|
||||
std::vector<std::string> name;
|
||||
std::vector<std::string> slot;
|
||||
std::vector<int> condition;
|
||||
|
@ -795,11 +789,10 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode
|
|||
if (watchnode.empty()) {
|
||||
tensor_list = debug_services_->GetTensor();
|
||||
} else {
|
||||
tensor_list = debug_services_->GetNodeTensorMap(watchnode);
|
||||
debug_services_->AddWeightsBiasInputs(&tensor_list, kernel);
|
||||
tensor_list = debug_services_->GetNodeTensor(kernel);
|
||||
}
|
||||
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops,
|
||||
tensor_list, initial_suspend_);
|
||||
tensor_list, initial_suspend_, watchnode.empty(), recheck);
|
||||
std::list<WatchpointHit> hits;
|
||||
for (unsigned int i = 0; i < name.size(); i++) {
|
||||
WatchpointHit hit;
|
||||
|
@ -1045,7 +1038,7 @@ std::vector<std::string> Debugger::CheckOpOverflow() {
|
|||
}
|
||||
closedir(d);
|
||||
|
||||
if (op_names.size()) {
|
||||
if (!op_names.empty()) {
|
||||
MS_LOG(ERROR) << "These operation overflows are detected " << op_names;
|
||||
}
|
||||
|
||||
|
@ -1091,12 +1084,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
|
|||
if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
|
||||
return;
|
||||
}
|
||||
bool keep_prev;
|
||||
if (anf_node->isa<Parameter>()) {
|
||||
keep_prev = true;
|
||||
} else {
|
||||
keep_prev = false;
|
||||
}
|
||||
// for parameters and value nodes, set its execution order to be 0;
|
||||
int exec_order = 0;
|
||||
std::string node_name = anf_node->fullname_with_scope();
|
||||
|
@ -1114,6 +1101,13 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
|
|||
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
bool keep_prev;
|
||||
if (anf_node->isa<Parameter>()) {
|
||||
keep_prev = true;
|
||||
debug_services_->MoveTensorCurrentToPrev(tensor_name);
|
||||
} else {
|
||||
keep_prev = false;
|
||||
}
|
||||
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
|
@ -1123,9 +1117,6 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
|
|||
|
||||
void Debugger::LoadParametersAndConst() {
|
||||
if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
|
||||
if (!(num_step_ == 0 || device_target_ == kAscendDevice ||
|
||||
(device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration())))
|
||||
return;
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
// load parameters
|
||||
MS_LOG(INFO) << "Start to load Parameters!";
|
||||
|
@ -1199,5 +1190,8 @@ void Debugger::ClearCurrentData() {
|
|||
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
|
||||
debug_services_->EmptyCurrentTensor();
|
||||
}
|
||||
bool Debugger::TensorExistsInCurrent(std::string tensor_name) {
|
||||
return debug_services_->TensorExistsInCurrent(tensor_name);
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -145,6 +145,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
std::list<KernelGraphPtr> GetGraphPtrList() { return graph_ptr_list_; }
|
||||
|
||||
bool TensorExistsInCurrent(std::string tensor_name);
|
||||
|
||||
private:
|
||||
// private constructor for singleton
|
||||
Debugger();
|
||||
|
@ -197,7 +199,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// analyze tensors and check watchpoint conditions
|
||||
// return names of tensors and what condition they hit
|
||||
std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(),
|
||||
const CNodePtr &kernel = NULL);
|
||||
const CNodePtr &kernel = nullptr, bool recheck = false);
|
||||
|
||||
// send watchpoints that hit
|
||||
void SendWatchpoints(const std::list<WatchpointHit> &points);
|
||||
|
|
|
@ -33,6 +33,44 @@ class TensorLoader {
|
|||
|
||||
~TensorLoader() { EmptyTensor(); }
|
||||
|
||||
void MoveTensorCurrentToPrev(std::string tensor_name) {
|
||||
auto handle = tensor_list_map.extract(tensor_name);
|
||||
if (!handle.empty()) {
|
||||
MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map";
|
||||
prev_tensor_list_map.insert(std::move(handle));
|
||||
}
|
||||
}
|
||||
|
||||
void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); }
|
||||
|
||||
bool TensorExistsInCurrent(std::string tensor_name) {
|
||||
return tensor_list_map.find(tensor_name) != tensor_list_map.end();
|
||||
}
|
||||
|
||||
// only parameters will return true
|
||||
bool PrevTensorExistsInCurrent(std::string tensor_name) { return TensorExistsInCurrent(tensor_name + ":prev"); }
|
||||
|
||||
void MoveParametersCurrentToPrev() {
|
||||
MS_LOG(INFO) << "Moving parameters from current map to previous map";
|
||||
auto iter = tensor_list_map.begin();
|
||||
while (iter != tensor_list_map.end()) {
|
||||
auto key = iter->first;
|
||||
if (PrevTensorExistsInCurrent(key)) {
|
||||
// :prev tensor only exists for parameter. Move it to prev
|
||||
++iter;
|
||||
MoveTensorCurrentToPrev(key);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IsPrevTensor(std::string tensor_name) {
|
||||
const std::string suffix = ":prev";
|
||||
if (tensor_name.length() <= suffix.length()) return false;
|
||||
return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin());
|
||||
}
|
||||
|
||||
bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
if (keep_prev) {
|
||||
|
@ -43,20 +81,32 @@ class TensorLoader {
|
|||
tensor_list_map.insert(std::move(handle));
|
||||
}
|
||||
}
|
||||
tensor_list.push_back(tensor);
|
||||
tensor_list_map[tensor->GetName()] = tensor; // use [] instead of insert to ensure latest value
|
||||
auto node_name = tensor->GetName();
|
||||
node_name = node_name.substr(0, node_name.find_first_of(":"));
|
||||
node_tensor_map.insert({node_name, tensor});
|
||||
return true;
|
||||
}
|
||||
std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; }
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetTensor() {
|
||||
std::vector<std::shared_ptr<TensorData>> tensor_list;
|
||||
for (auto &it : tensor_list_map) {
|
||||
if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second);
|
||||
}
|
||||
return tensor_list;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) {
|
||||
auto iter = tensor_list_map.find(tensor_name);
|
||||
if (iter != tensor_list_map.end()) return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t GetIterNum() { return iter_num; }
|
||||
|
||||
std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; }
|
||||
|
||||
std::shared_ptr<TensorData> GetPrevTensor(std::string tensor_name) {
|
||||
std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) {
|
||||
if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) {
|
||||
return tensor_list_map[tensor_name + ":prev"];
|
||||
}
|
||||
|
@ -91,14 +141,13 @@ class TensorLoader {
|
|||
prev_tensor_list_map.clear();
|
||||
node_tensor_map.clear();
|
||||
tensor_list_map.swap(prev_tensor_list_map);
|
||||
tensor_list.clear();
|
||||
}
|
||||
|
||||
void EmptyPrevTensor() { prev_tensor_list_map.clear(); }
|
||||
|
||||
void EmptyCurrentTensor() {
|
||||
tensor_list_map.clear();
|
||||
tensor_list.clear();
|
||||
node_tensor_map.clear();
|
||||
}
|
||||
|
||||
void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
|
||||
|
@ -142,7 +191,6 @@ class TensorLoader {
|
|||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorData>> tensor_list;
|
||||
std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
|
||||
std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map;
|
||||
std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;
|
||||
|
|
|
@ -674,6 +674,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
|
|||
const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type,
|
||||
size_t slot, bool keep_prev) const {
|
||||
bool ret = false;
|
||||
if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
|
||||
MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
|
||||
return true;
|
||||
}
|
||||
// TensorData is freed up in AscendSession class
|
||||
auto tensor_data = std::make_shared<mindspore::TensorData>();
|
||||
tensor_data->SetName(tensor_name);
|
||||
|
|
|
@ -296,8 +296,6 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) {
|
|||
MS_EXCEPTION_IF_NULL(graph);
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
MS_LOG(INFO) << "Start load step";
|
||||
uint32_t cur_iter = 0;
|
||||
MS_LOG(INFO) << "Cur iter is " << cur_iter;
|
||||
for (auto graph_ptr : debugger_->GetGraphPtrList()) {
|
||||
debugger_->SetGraphPtr(graph_ptr);
|
||||
// load output
|
||||
|
|
|
@ -87,6 +87,11 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
|
|||
return true;
|
||||
}
|
||||
|
||||
if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
|
||||
MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
|
||||
return true;
|
||||
}
|
||||
|
||||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
auto ret_rt_memcpy = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
|
||||
|
|
|
@ -154,8 +154,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
std::vector<int> real_outputs;
|
||||
real_outputs = CheckRealOutput(node_name, output_size);
|
||||
|
||||
for (std::vector<int>::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) {
|
||||
auto j = *it;
|
||||
for (int j : real_outputs) {
|
||||
auto addr = kernel_outputs[j];
|
||||
auto type = AnfAlgo::GetOutputInferDataType(kernel, j);
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
|
|
Loading…
Reference in New Issue