optimize record gpu memory information

This commit is contained in:
louei5 2021-03-12 16:32:17 +08:00
parent 67b68c1bd2
commit 8f8c7d2e0e
8 changed files with 81 additions and 58 deletions

View File

@ -375,6 +375,9 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
std::string name = "graph_build";
DumpGraphParams dump_params = {true, static_cast<int>(kWholeStack)};
mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb");
auto &kernels = graph->execution_order();
std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id());
mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_SESSION, exec_order_name, kernels);
#endif
// Get summary nodes.
SetSummaryNodes(graph.get());

View File

@ -33,23 +33,19 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info)
}
} // namespace
void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info) {
std::lock_guard<std::mutex> lock(mtx_);
std::ostringstream mem_info_stream;
auto inputs = mem_info.inputs_;
mem_info_stream << op_name << std::endl;
mem_info_stream << MemInfo2String("kernel_inputs", *inputs);
auto workspaces = mem_info.workspaces_;
mem_info_stream << MemInfo2String("kernel_workspaces", *workspaces);
auto outputs = mem_info.outputs_;
mem_info_stream << MemInfo2String("kernel_outputs", *outputs);
mem_info_stream << std::endl;
void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) {
if (op_names_.size() <= id) {
return;
}
std::string mem_info_str = mem_info_stream.str();
mem_info_container_[op_name] = mem_info_str;
std::lock_guard<std::mutex> lock(mtx_);
op_names_[id] = op_name;
mem_info_inputs_[id] = *(mem_info.inputs_);
mem_info_workspaces_[id] = *(mem_info.workspaces_);
mem_info_outputs_[id] = *(mem_info.outputs_);
}
void MemAddressRecorder::Export() {
void GPUMemAddressRecorder::Export() {
auto realpath = GetFileRealPath();
if (!realpath.has_value()) {
return;
@ -62,18 +58,19 @@ void MemAddressRecorder::Export() {
MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'.";
return;
}
for (auto &info : mem_info_container_) {
fout << info.second;
std::ostringstream mem_info_stream;
for (size_t i = 0; i < op_names_.size(); i++) {
mem_info_stream << op_names_[i] << std::endl;
auto inputs = mem_info_inputs_[i];
mem_info_stream << MemInfo2String("kernel_inputs", inputs);
auto workspaces = mem_info_workspaces_[i];
mem_info_stream << MemInfo2String("kernel_workspaces", workspaces);
auto outputs = mem_info_outputs_[i];
mem_info_stream << MemInfo2String("kernel_outputs", outputs);
mem_info_stream << std::endl;
}
fout << mem_info_stream.str();
fout.close();
ChangeFileMode(file_path, S_IRUSR);
}
void MemAddressRecorder::UpdateInfo(const BaseRecorder &recorder) {
const MemAddressRecorder *mem_recorder = reinterpret_cast<const MemAddressRecorder *>(&recorder);
std::map<std::string, std::string> mem_info = mem_recorder->MemInfo();
for (const auto &info : mem_info) {
mem_info_container_[info.first] = info.second;
}
}
} // namespace mindspore

View File

@ -34,22 +34,28 @@ struct GPUMemInfo {
AddressPtrList *workspaces_;
AddressPtrList *outputs_;
};
class MemAddressRecorder : public BaseRecorder {
class GPUMemAddressRecorder : public BaseRecorder {
public:
MemAddressRecorder() {}
MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
~MemAddressRecorder() {}
GPUMemAddressRecorder() {}
GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
~GPUMemAddressRecorder() {}
virtual void Export();
void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info);
void UpdateInfo(const BaseRecorder &recorder);
std::map<std::string, std::string> MemInfo() const { return mem_info_container_; }
void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id);
void Reset(size_t nsize) {
op_names_.resize(nsize);
mem_info_inputs_.resize(nsize);
mem_info_workspaces_.resize(nsize);
mem_info_outputs_.resize(nsize);
}
private:
mutable std::mutex mtx_;
std::map<std::string, std::string> mem_info_container_;
std::vector<std::string> op_names_;
std::vector<AddressPtrList> mem_info_inputs_;
std::vector<AddressPtrList> mem_info_workspaces_;
std::vector<AddressPtrList> mem_info_outputs_;
};
using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>;
using GPUMemAddressRecorderPtr = std::shared_ptr<GPUMemAddressRecorder>;
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_

View File

@ -42,7 +42,7 @@ void RecorderManager::UpdateRdrEnable() {
updated = true;
}
bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &replace) {
bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder) {
if (!rdr_enable_) {
return false;
}
@ -55,20 +55,19 @@ bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &
std::string name = recorder->GetName();
std::pair<std::string, std::string> recorder_key(module, name);
std::lock_guard<std::mutex> lock(mtx_);
if (replace) {
recorder_container_[recorder_key] = recorder;
return true;
}
std::unordered_map<std::pair<std::string, std::string>, BaseRecorderPtr, pair_hash>::iterator item =
recorder_container_.find(recorder_key);
if (item == recorder_container_.end()) {
recorder_container_[recorder_key] = recorder;
} else {
recorder_container_[recorder_key]->UpdateInfo(*recorder);
}
recorder_container_[recorder_key] = recorder;
return true;
}
BaseRecorderPtr RecorderManager::GetRecorder(std::string module, std::string name) {
std::pair<std::string, std::string> recorder_key(module, name);
auto item = recorder_container_.find(recorder_key);
if (item != recorder_container_.end()) {
return item->second;
}
return nullptr;
}
void RecorderManager::TriggerAll() {
if (!rdr_enable_) {
return;

View File

@ -62,7 +62,8 @@ class RecorderManager {
void UpdateRdrEnable();
bool RdrEnable() const { return rdr_enable_; }
bool RecordObject(const BaseRecorderPtr &recorder, const bool &replace = true);
bool RecordObject(const BaseRecorderPtr &recorder);
BaseRecorderPtr GetRecorder(std::string module, std::string name);
void TriggerAll();
void ClearAll();

View File

@ -123,15 +123,30 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co
return ans;
}
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
const GPUMemInfo &mem_info) {
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) {
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
return false;
}
std::string submodule_name = std::string(GetSubModuleName(module));
MemAddressRecorderPtr mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name);
mem_info_recorder->SaveMemInfo(op_name, mem_info);
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder), false);
GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name);
mem_info_recorder->Reset(nsize);
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder));
return ans;
}
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
const GPUMemInfo &mem_info, size_t id) {
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
return false;
}
std::string submodule_name = std::string(GetSubModuleName(module));
auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name);
bool ans = false;
if (recorder != nullptr) {
auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder);
mem_recorder->SaveMemInfo(op_name, mem_info, id);
ans = true;
}
return ans;
}

View File

@ -52,8 +52,9 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name,
const std::vector<CNodePtr> &final_exec_order);
bool RecordString(SubModuleId module, const std::string &name, const std::string &data);
bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order);
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
const GPUMemInfo &mem_info);
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize);
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
const GPUMemInfo &mem_info, size_t id);
#ifdef ENABLE_D
bool RecordTaskDebugInfo(SubModuleId module, const std::string &name,
const std::vector<TaskDebugInfoPtr> &task_debug_info_list);

View File

@ -42,6 +42,7 @@
#endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#include "debug/rdr/recorder_manager.h"
#include "debug/rdr/mem_address_recorder.h"
#endif
@ -650,8 +651,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
auto &kernels = graph->execution_order();
int exec_order = 1;
#ifdef ENABLE_DUMP_IR
std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id());
mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_KERNEL, exec_order_name, kernels);
std::string name = "mem_address_list";
mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
size_t id = 0;
#endif
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
@ -695,9 +697,8 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
}
#ifdef ENABLE_DUMP_IR
GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs};
std::string name = "mem_address_list";
std::string op_name = kernel->fullname_with_scope();
mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info);
mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
#endif
if (!mock) {
if (!profiling) {