From 8f8c7d2e0e7de295a926dcd21d58d7b054bfbd2b Mon Sep 17 00:00:00 2001 From: louei5 Date: Fri, 12 Mar 2021 16:32:17 +0800 Subject: [PATCH] optimize record gpu memory information --- .../ccsrc/backend/session/gpu_session.cc | 3 ++ .../ccsrc/debug/rdr/mem_address_recorder.cc | 45 +++++++++---------- .../ccsrc/debug/rdr/mem_address_recorder.h | 26 ++++++----- mindspore/ccsrc/debug/rdr/recorder_manager.cc | 23 +++++----- mindspore/ccsrc/debug/rdr/recorder_manager.h | 3 +- .../ccsrc/debug/rdr/running_data_recorder.cc | 25 ++++++++--- .../ccsrc/debug/rdr/running_data_recorder.h | 5 ++- .../runtime/device/gpu/gpu_kernel_runtime.cc | 9 ++-- 8 files changed, 81 insertions(+), 58 deletions(-) diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 79087148550..f3ac8f63372 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -375,6 +375,9 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) { std::string name = "graph_build"; DumpGraphParams dump_params = {true, static_cast(kWholeStack)}; mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb"); + auto &kernels = graph->execution_order(); + std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); + mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_SESSION, exec_order_name, kernels); #endif // Get summary nodes. SetSummaryNodes(graph.get()); diff --git a/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc b/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc index 4badeae0714..23d96f4845e 100644 --- a/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/mem_address_recorder.cc @@ -33,23 +33,19 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info) } } // namespace -void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info) { - std::lock_guard lock(mtx_); - std::ostringstream mem_info_stream; - auto inputs = mem_info.inputs_; - mem_info_stream << op_name << std::endl; - mem_info_stream << MemInfo2String("kernel_inputs", *inputs); - auto workspaces = mem_info.workspaces_; - mem_info_stream << MemInfo2String("kernel_workspaces", *workspaces); - auto outputs = mem_info.outputs_; - mem_info_stream << MemInfo2String("kernel_outputs", *outputs); - mem_info_stream << std::endl; +void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) { + if (op_names_.size() <= id) { + return; + } - std::string mem_info_str = mem_info_stream.str(); - mem_info_container_[op_name] = mem_info_str; + std::lock_guard lock(mtx_); + op_names_[id] = op_name; + mem_info_inputs_[id] = *(mem_info.inputs_); + mem_info_workspaces_[id] = *(mem_info.workspaces_); + mem_info_outputs_[id] = *(mem_info.outputs_); } -void MemAddressRecorder::Export() { +void GPUMemAddressRecorder::Export() { auto realpath = GetFileRealPath(); if (!realpath.has_value()) { return; @@ -62,18 +58,19 @@ void MemAddressRecorder::Export() { MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'."; return; } - for (auto &info : mem_info_container_) { - fout << info.second; + std::ostringstream mem_info_stream; + for (size_t i = 0; i < op_names_.size(); i++) { + mem_info_stream << op_names_[i] << std::endl; + auto inputs = mem_info_inputs_[i]; + mem_info_stream << MemInfo2String("kernel_inputs", inputs); + auto workspaces = mem_info_workspaces_[i]; + mem_info_stream << MemInfo2String("kernel_workspaces", workspaces); + auto outputs = mem_info_outputs_[i]; + mem_info_stream << MemInfo2String("kernel_outputs", outputs); + mem_info_stream << std::endl; } + fout << mem_info_stream.str(); fout.close(); ChangeFileMode(file_path, S_IRUSR); } - -void MemAddressRecorder::UpdateInfo(const BaseRecorder &recorder) { - const MemAddressRecorder *mem_recorder = reinterpret_cast(&recorder); - std::map mem_info = mem_recorder->MemInfo(); - for (const auto &info : mem_info) { - mem_info_container_[info.first] = info.second; - } -} } // namespace mindspore diff --git a/mindspore/ccsrc/debug/rdr/mem_address_recorder.h b/mindspore/ccsrc/debug/rdr/mem_address_recorder.h index ec9824c80a1..dfc2cdcb4ce 100644 --- a/mindspore/ccsrc/debug/rdr/mem_address_recorder.h +++ b/mindspore/ccsrc/debug/rdr/mem_address_recorder.h @@ -34,22 +34,28 @@ struct GPUMemInfo { AddressPtrList *workspaces_; AddressPtrList *outputs_; }; -class MemAddressRecorder : public BaseRecorder { +class GPUMemAddressRecorder : public BaseRecorder { public: - MemAddressRecorder() {} - MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} - ~MemAddressRecorder() {} + GPUMemAddressRecorder() {} + GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {} + ~GPUMemAddressRecorder() {} virtual void Export(); - void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info); - void UpdateInfo(const BaseRecorder &recorder); - std::map MemInfo() const { return mem_info_container_; } + void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id); + void Reset(size_t nsize) { + op_names_.resize(nsize); + mem_info_inputs_.resize(nsize); + mem_info_workspaces_.resize(nsize); + mem_info_outputs_.resize(nsize); + } private: mutable std::mutex mtx_; - - std::map mem_info_container_; + std::vector op_names_; + std::vector mem_info_inputs_; + std::vector mem_info_workspaces_; + std::vector mem_info_outputs_; }; -using MemAddressRecorderPtr = std::shared_ptr; +using GPUMemAddressRecorderPtr = std::shared_ptr; } // namespace mindspore #endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_ diff --git a/mindspore/ccsrc/debug/rdr/recorder_manager.cc b/mindspore/ccsrc/debug/rdr/recorder_manager.cc index 80e15ee3cbc..4e068ca7c38 100644 --- a/mindspore/ccsrc/debug/rdr/recorder_manager.cc +++ b/mindspore/ccsrc/debug/rdr/recorder_manager.cc @@ -42,7 +42,7 @@ void RecorderManager::UpdateRdrEnable() { updated = true; } -bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &replace) { +bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder) { if (!rdr_enable_) { return false; } @@ -55,20 +55,19 @@ bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool & std::string name = recorder->GetName(); std::pair recorder_key(module, name); std::lock_guard lock(mtx_); - if (replace) { - recorder_container_[recorder_key] = recorder; - return true; - } - std::unordered_map, BaseRecorderPtr, pair_hash>::iterator item = - recorder_container_.find(recorder_key); - if (item == recorder_container_.end()) { - recorder_container_[recorder_key] = recorder; - } else { - recorder_container_[recorder_key]->UpdateInfo(*recorder); - } + recorder_container_[recorder_key] = recorder; return true; } +BaseRecorderPtr RecorderManager::GetRecorder(std::string module, std::string name) { + std::pair recorder_key(module, name); + auto item = recorder_container_.find(recorder_key); + if (item != recorder_container_.end()) { + return item->second; + } + return nullptr; +} + void RecorderManager::TriggerAll() { if (!rdr_enable_) { return; diff --git a/mindspore/ccsrc/debug/rdr/recorder_manager.h b/mindspore/ccsrc/debug/rdr/recorder_manager.h index b3a13ffaba8..2d8b24a06f6 100644 --- a/mindspore/ccsrc/debug/rdr/recorder_manager.h +++ b/mindspore/ccsrc/debug/rdr/recorder_manager.h @@ -62,7 +62,8 @@ class RecorderManager { void UpdateRdrEnable(); bool RdrEnable() const { return rdr_enable_; } - bool RecordObject(const BaseRecorderPtr &recorder, const bool &replace = true); + bool RecordObject(const BaseRecorderPtr &recorder); + BaseRecorderPtr GetRecorder(std::string module, std::string name); void TriggerAll(); void ClearAll(); diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc index d28d08ab6e1..b9f8997358a 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc @@ -123,15 +123,30 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co return ans; } -bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, - const GPUMemInfo &mem_info) { +bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) { if (!mindspore::RecorderManager::Instance().RdrEnable()) { return false; } std::string submodule_name = std::string(GetSubModuleName(module)); - MemAddressRecorderPtr mem_info_recorder = std::make_shared(submodule_name, name); - mem_info_recorder->SaveMemInfo(op_name, mem_info); - bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder), false); + GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared(submodule_name, name); + mem_info_recorder->Reset(nsize); + bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder)); + return ans; +} + +bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, + const GPUMemInfo &mem_info, size_t id) { + if (!mindspore::RecorderManager::Instance().RdrEnable()) { + return false; + } + std::string submodule_name = std::string(GetSubModuleName(module)); + auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name); + bool ans = false; + if (recorder != nullptr) { + auto mem_recorder = std::dynamic_pointer_cast(recorder); + mem_recorder->SaveMemInfo(op_name, mem_info, id); + ans = true; + } return ans; } diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.h b/mindspore/ccsrc/debug/rdr/running_data_recorder.h index 3d76c570c71..b42b6f0e2f4 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.h +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.h @@ -52,8 +52,9 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name, const std::vector &final_exec_order); bool RecordString(SubModuleId module, const std::string &name, const std::string &data); bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector &exec_order); -bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, - const GPUMemInfo &mem_info); +bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize); +bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name, + const GPUMemInfo &mem_info, size_t id); #ifdef ENABLE_D bool RecordTaskDebugInfo(SubModuleId module, const std::string &name, const std::vector &task_debug_info_list); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 5bf13e92d92..d77c0949da5 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -42,6 +42,7 @@ #endif #ifdef ENABLE_DUMP_IR #include "debug/rdr/running_data_recorder.h" +#include "debug/rdr/recorder_manager.h" #include "debug/rdr/mem_address_recorder.h" #endif @@ -650,8 +651,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo auto &kernels = graph->execution_order(); int exec_order = 1; #ifdef ENABLE_DUMP_IR - std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id()); - mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_KERNEL, exec_order_name, kernels); + std::string name = "mem_address_list"; + mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size()); + size_t id = 0; #endif auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); MS_EXCEPTION_IF_NULL(profiler_inst); @@ -695,9 +697,8 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo } #ifdef ENABLE_DUMP_IR GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs}; - std::string name = "mem_address_list"; std::string op_name = kernel->fullname_with_scope(); - mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info); + mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++); #endif if (!mock) { if (!profiling) {