optimize record gpu memory information
This commit is contained in:
parent
67b68c1bd2
commit
8f8c7d2e0e
|
@ -375,6 +375,9 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
|
|||
std::string name = "graph_build";
|
||||
DumpGraphParams dump_params = {true, static_cast<int>(kWholeStack)};
|
||||
mindspore::RDR::RecordAnfGraph(SubModuleId::SM_SESSION, name, graph, dump_params, ".ir,.pb");
|
||||
auto &kernels = graph->execution_order();
|
||||
std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id());
|
||||
mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_SESSION, exec_order_name, kernels);
|
||||
#endif
|
||||
// Get summary nodes.
|
||||
SetSummaryNodes(graph.get());
|
||||
|
|
|
@ -33,23 +33,19 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info)
|
|||
}
|
||||
} // namespace
|
||||
|
||||
void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info) {
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
std::ostringstream mem_info_stream;
|
||||
auto inputs = mem_info.inputs_;
|
||||
mem_info_stream << op_name << std::endl;
|
||||
mem_info_stream << MemInfo2String("kernel_inputs", *inputs);
|
||||
auto workspaces = mem_info.workspaces_;
|
||||
mem_info_stream << MemInfo2String("kernel_workspaces", *workspaces);
|
||||
auto outputs = mem_info.outputs_;
|
||||
mem_info_stream << MemInfo2String("kernel_outputs", *outputs);
|
||||
mem_info_stream << std::endl;
|
||||
void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) {
|
||||
if (op_names_.size() <= id) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string mem_info_str = mem_info_stream.str();
|
||||
mem_info_container_[op_name] = mem_info_str;
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
op_names_[id] = op_name;
|
||||
mem_info_inputs_[id] = *(mem_info.inputs_);
|
||||
mem_info_workspaces_[id] = *(mem_info.workspaces_);
|
||||
mem_info_outputs_[id] = *(mem_info.outputs_);
|
||||
}
|
||||
|
||||
void MemAddressRecorder::Export() {
|
||||
void GPUMemAddressRecorder::Export() {
|
||||
auto realpath = GetFileRealPath();
|
||||
if (!realpath.has_value()) {
|
||||
return;
|
||||
|
@ -62,18 +58,19 @@ void MemAddressRecorder::Export() {
|
|||
MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'.";
|
||||
return;
|
||||
}
|
||||
for (auto &info : mem_info_container_) {
|
||||
fout << info.second;
|
||||
std::ostringstream mem_info_stream;
|
||||
for (size_t i = 0; i < op_names_.size(); i++) {
|
||||
mem_info_stream << op_names_[i] << std::endl;
|
||||
auto inputs = mem_info_inputs_[i];
|
||||
mem_info_stream << MemInfo2String("kernel_inputs", inputs);
|
||||
auto workspaces = mem_info_workspaces_[i];
|
||||
mem_info_stream << MemInfo2String("kernel_workspaces", workspaces);
|
||||
auto outputs = mem_info_outputs_[i];
|
||||
mem_info_stream << MemInfo2String("kernel_outputs", outputs);
|
||||
mem_info_stream << std::endl;
|
||||
}
|
||||
fout << mem_info_stream.str();
|
||||
fout.close();
|
||||
ChangeFileMode(file_path, S_IRUSR);
|
||||
}
|
||||
|
||||
void MemAddressRecorder::UpdateInfo(const BaseRecorder &recorder) {
|
||||
const MemAddressRecorder *mem_recorder = reinterpret_cast<const MemAddressRecorder *>(&recorder);
|
||||
std::map<std::string, std::string> mem_info = mem_recorder->MemInfo();
|
||||
for (const auto &info : mem_info) {
|
||||
mem_info_container_[info.first] = info.second;
|
||||
}
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -34,22 +34,28 @@ struct GPUMemInfo {
|
|||
AddressPtrList *workspaces_;
|
||||
AddressPtrList *outputs_;
|
||||
};
|
||||
class MemAddressRecorder : public BaseRecorder {
|
||||
class GPUMemAddressRecorder : public BaseRecorder {
|
||||
public:
|
||||
MemAddressRecorder() {}
|
||||
MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
|
||||
~MemAddressRecorder() {}
|
||||
GPUMemAddressRecorder() {}
|
||||
GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
|
||||
~GPUMemAddressRecorder() {}
|
||||
|
||||
virtual void Export();
|
||||
void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info);
|
||||
void UpdateInfo(const BaseRecorder &recorder);
|
||||
std::map<std::string, std::string> MemInfo() const { return mem_info_container_; }
|
||||
void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id);
|
||||
void Reset(size_t nsize) {
|
||||
op_names_.resize(nsize);
|
||||
mem_info_inputs_.resize(nsize);
|
||||
mem_info_workspaces_.resize(nsize);
|
||||
mem_info_outputs_.resize(nsize);
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::mutex mtx_;
|
||||
|
||||
std::map<std::string, std::string> mem_info_container_;
|
||||
std::vector<std::string> op_names_;
|
||||
std::vector<AddressPtrList> mem_info_inputs_;
|
||||
std::vector<AddressPtrList> mem_info_workspaces_;
|
||||
std::vector<AddressPtrList> mem_info_outputs_;
|
||||
};
|
||||
using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>;
|
||||
using GPUMemAddressRecorderPtr = std::shared_ptr<GPUMemAddressRecorder>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_
|
||||
|
|
|
@ -42,7 +42,7 @@ void RecorderManager::UpdateRdrEnable() {
|
|||
updated = true;
|
||||
}
|
||||
|
||||
bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &replace) {
|
||||
bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder) {
|
||||
if (!rdr_enable_) {
|
||||
return false;
|
||||
}
|
||||
|
@ -55,20 +55,19 @@ bool RecorderManager::RecordObject(const BaseRecorderPtr &recorder, const bool &
|
|||
std::string name = recorder->GetName();
|
||||
std::pair<std::string, std::string> recorder_key(module, name);
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
if (replace) {
|
||||
recorder_container_[recorder_key] = recorder;
|
||||
return true;
|
||||
}
|
||||
std::unordered_map<std::pair<std::string, std::string>, BaseRecorderPtr, pair_hash>::iterator item =
|
||||
recorder_container_.find(recorder_key);
|
||||
if (item == recorder_container_.end()) {
|
||||
recorder_container_[recorder_key] = recorder;
|
||||
} else {
|
||||
recorder_container_[recorder_key]->UpdateInfo(*recorder);
|
||||
}
|
||||
recorder_container_[recorder_key] = recorder;
|
||||
return true;
|
||||
}
|
||||
|
||||
BaseRecorderPtr RecorderManager::GetRecorder(std::string module, std::string name) {
|
||||
std::pair<std::string, std::string> recorder_key(module, name);
|
||||
auto item = recorder_container_.find(recorder_key);
|
||||
if (item != recorder_container_.end()) {
|
||||
return item->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void RecorderManager::TriggerAll() {
|
||||
if (!rdr_enable_) {
|
||||
return;
|
||||
|
|
|
@ -62,7 +62,8 @@ class RecorderManager {
|
|||
|
||||
void UpdateRdrEnable();
|
||||
bool RdrEnable() const { return rdr_enable_; }
|
||||
bool RecordObject(const BaseRecorderPtr &recorder, const bool &replace = true);
|
||||
bool RecordObject(const BaseRecorderPtr &recorder);
|
||||
BaseRecorderPtr GetRecorder(std::string module, std::string name);
|
||||
void TriggerAll();
|
||||
void ClearAll();
|
||||
|
||||
|
|
|
@ -123,15 +123,30 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co
|
|||
return ans;
|
||||
}
|
||||
|
||||
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info) {
|
||||
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) {
|
||||
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
return false;
|
||||
}
|
||||
std::string submodule_name = std::string(GetSubModuleName(module));
|
||||
MemAddressRecorderPtr mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name);
|
||||
mem_info_recorder->SaveMemInfo(op_name, mem_info);
|
||||
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder), false);
|
||||
GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name);
|
||||
mem_info_recorder->Reset(nsize);
|
||||
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder));
|
||||
return ans;
|
||||
}
|
||||
|
||||
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info, size_t id) {
|
||||
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
return false;
|
||||
}
|
||||
std::string submodule_name = std::string(GetSubModuleName(module));
|
||||
auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name);
|
||||
bool ans = false;
|
||||
if (recorder != nullptr) {
|
||||
auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder);
|
||||
mem_recorder->SaveMemInfo(op_name, mem_info, id);
|
||||
ans = true;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
|
|
@ -52,8 +52,9 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name,
|
|||
const std::vector<CNodePtr> &final_exec_order);
|
||||
bool RecordString(SubModuleId module, const std::string &name, const std::string &data);
|
||||
bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order);
|
||||
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info);
|
||||
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize);
|
||||
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info, size_t id);
|
||||
#ifdef ENABLE_D
|
||||
bool RecordTaskDebugInfo(SubModuleId module, const std::string &name,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list);
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#endif
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
#include "debug/rdr/running_data_recorder.h"
|
||||
#include "debug/rdr/recorder_manager.h"
|
||||
#include "debug/rdr/mem_address_recorder.h"
|
||||
#endif
|
||||
|
||||
|
@ -650,8 +651,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
auto &kernels = graph->execution_order();
|
||||
int exec_order = 1;
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
std::string exec_order_name = "graph_exec_order." + std::to_string(graph->graph_id());
|
||||
mindspore::RDR::RecordGraphExecOrder(SubModuleId::SM_KERNEL, exec_order_name, kernels);
|
||||
std::string name = "mem_address_list";
|
||||
mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
|
||||
size_t id = 0;
|
||||
#endif
|
||||
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst);
|
||||
|
@ -695,9 +697,8 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs};
|
||||
std::string name = "mem_address_list";
|
||||
std::string op_name = kernel->fullname_with_scope();
|
||||
mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info);
|
||||
mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
|
||||
#endif
|
||||
if (!mock) {
|
||||
if (!profiling) {
|
||||
|
|
Loading…
Reference in New Issue