!22365 RDR adapts for CPU dynamic memory allocation
Merge pull request !22365 from liangyongxiong/fix
This commit is contained in:
commit
438169e0b9
|
@ -6,7 +6,6 @@ set(_DEBUG_SRC_LIST
|
|||
"${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/draw.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dump_proto.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dump_utils.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/trace.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/env_config_parser.cc"
|
||||
|
@ -53,7 +52,6 @@ if(NOT ENABLE_SECURITY)
|
|||
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
list(APPEND _DEBUG_SRC_LIST
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dump_utils.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/data_dump/e2e_dump.cc"
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debug/dump_utils.h"
|
||||
|
||||
#include <string>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/comm_manager.h"
|
||||
#include "frontend/parallel/context.h"
|
||||
|
||||
namespace mindspore {
|
||||
uint32_t DumpUtils::GetRankId() {
|
||||
uint32_t rank_id = 0;
|
||||
auto parallel_context = parallel::ParallelContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(parallel_context);
|
||||
auto parallel_mode = parallel_context->parallel_mode();
|
||||
if (parallel_mode == parallel::STAND_ALONE) {
|
||||
MS_LOG(INFO) << "parallel_mode is stand_alone, use 0 as default rank id.";
|
||||
return rank_id;
|
||||
}
|
||||
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
std::string world_group;
|
||||
std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (backend == kAscendDevice) {
|
||||
world_group = kHcclWorldGroup;
|
||||
} else if (backend == kGPUDevice) {
|
||||
world_group = kNcclWorldGroup;
|
||||
} else {
|
||||
return rank_id;
|
||||
}
|
||||
|
||||
if (!CommManager::GetInstance().GetRankID(world_group, &rank_id)) {
|
||||
MS_LOG(WARNING) << "Failed to get rank id.";
|
||||
}
|
||||
|
||||
return rank_id;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_DUMP_UTILS_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_DUMP_UTILS_H_
|
||||
#include <stdint.h>
|
||||
|
||||
namespace mindspore {
|
||||
class DumpUtils {
|
||||
public:
|
||||
DumpUtils() = default;
|
||||
~DumpUtils() = default;
|
||||
static uint32_t GetRankId();
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_DUMP_UTILS_H_
|
|
@ -19,7 +19,6 @@
|
|||
#include "nlohmann/json.hpp"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "debug/common.h"
|
||||
#include "debug/dump_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
|
||||
|
@ -103,19 +102,17 @@ void EnvConfigParser::ParseFromEnv() {
|
|||
has_rdr_setting_ = true;
|
||||
rdr_enabled_ = rdr_enable_env.value();
|
||||
}
|
||||
std::string path = "";
|
||||
auto path_env = GetRdrPathFromEnv();
|
||||
if (path_env.has_value()) {
|
||||
has_rdr_setting_ = true;
|
||||
path = path_env.value();
|
||||
std::string path = path_env.value();
|
||||
if (!path.empty()) {
|
||||
if (path.back() != '/') {
|
||||
path += '/';
|
||||
}
|
||||
rdr_path_ = path;
|
||||
}
|
||||
}
|
||||
uint32_t rank_id = DumpUtils::GetRankId();
|
||||
rdr_path_ = path + "rank_" + std::to_string(rank_id) + "/rdr/";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -18,25 +18,10 @@
|
|||
#include <fstream>
|
||||
#include "debug/common.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/comm_manager.h"
|
||||
|
||||
namespace mindspore {
|
||||
void BaseRecorder::SetDirectory(const std::string &directory) {
|
||||
std::string error_message = module_ + ":" + name_ + " set directory failed.";
|
||||
if (Common::IsPathValid(directory, MAX_DIRECTORY_LENGTH, error_message)) {
|
||||
directory_ = directory;
|
||||
if (directory_.back() != '/') {
|
||||
directory_ += "/";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BaseRecorder::SetFilename(const std::string &filename) {
|
||||
std::string error_message = module_ + ":" + name_ + " set filename failed.";
|
||||
if (Common::IsFilenameValid(filename, MAX_DIRECTORY_LENGTH, error_message)) {
|
||||
filename_ = filename;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<std::string> BaseRecorder::GetFileRealPath(const std::string &suffix) const {
|
||||
std::string filename;
|
||||
if (filename_.empty()) {
|
||||
|
@ -52,6 +37,12 @@ std::optional<std::string> BaseRecorder::GetFileRealPath(const std::string &suff
|
|||
}
|
||||
}
|
||||
std::string file_path = directory_ + filename;
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
auto config_file = context->get_param<std::string>(MS_CTX_ENV_CONFIG_PATH);
|
||||
if (config_file.empty()) {
|
||||
file_path = directory_ + "rank_" + std::to_string(GetRank()) + "/rdr/" + filename;
|
||||
}
|
||||
auto realpath = Common::GetRealPath(file_path);
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed. "
|
||||
|
|
|
@ -59,9 +59,6 @@ class BaseRecorder {
|
|||
std::string GetTimeStamp() const { return timestamp_; }
|
||||
std::optional<std::string> GetFileRealPath(const std::string &suffix = "") const;
|
||||
|
||||
void SetDirectory(const std::string &directory);
|
||||
void SetFilename(const std::string &filename);
|
||||
void SetModule(const std::string &module) { module_ = module; }
|
||||
virtual void Export() {}
|
||||
virtual void UpdateInfo(const BaseRecorder &recorder) {}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ std::string MemInfo2String(const std::string &label, const AddressPtrList &info)
|
|||
}
|
||||
} // namespace
|
||||
|
||||
void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id) {
|
||||
void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const MemInfo &mem_info, size_t id) {
|
||||
if (op_names_.size() <= id) {
|
||||
return;
|
||||
}
|
||||
|
@ -44,10 +44,10 @@ void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const GPUMem
|
|||
mem_info_outputs_[id] = *(mem_info.outputs_);
|
||||
}
|
||||
|
||||
void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const kernel::KernelLaunchInfo *mem_info) {
|
||||
void MemAddressRecorder::SaveMemInfo(const std::string &op_name, const kernel::KernelLaunchInfo *mem_info) {
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
if (!printed) {
|
||||
MS_LOG(INFO) << "RDR update gpu mem info.";
|
||||
MS_LOG(INFO) << "RDR update mem info.";
|
||||
printed = true;
|
||||
}
|
||||
op_names_.emplace_back(op_name);
|
||||
|
@ -56,7 +56,7 @@ void GPUMemAddressRecorder::SaveMemInfo(const std::string &op_name, const kernel
|
|||
mem_info_outputs_.emplace_back(mem_info->outputs_);
|
||||
}
|
||||
|
||||
void GPUMemAddressRecorder::Export() {
|
||||
void MemAddressRecorder::Export() {
|
||||
auto realpath = GetFileRealPath();
|
||||
if (!realpath.has_value()) {
|
||||
return;
|
||||
|
@ -66,10 +66,10 @@ void GPUMemAddressRecorder::Export() {
|
|||
ChangeFileMode(file_path, S_IRWXU);
|
||||
std::ofstream fout(file_path);
|
||||
if (!fout.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file for saving gpu memory information failed. File path: '" << file_path << "'.";
|
||||
MS_LOG(WARNING) << "Open file for saving memory information failed. File path: '" << file_path << "'.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "RDR export gpu mem info.";
|
||||
MS_LOG(INFO) << "RDR export mem info.";
|
||||
std::ostringstream mem_info_stream;
|
||||
for (size_t i = 0; i < op_names_.size(); i++) {
|
||||
mem_info_stream << op_names_[i] << std::endl;
|
||||
|
@ -86,9 +86,9 @@ void GPUMemAddressRecorder::Export() {
|
|||
ChangeFileMode(file_path, S_IRUSR);
|
||||
}
|
||||
|
||||
void GPUMemAddressRecorder::CleanUp() {
|
||||
void MemAddressRecorder::CleanUp() {
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
MS_LOG(INFO) << "RDR clean up gpu mem info, kernel size equals " << op_names_.size();
|
||||
MS_LOG(INFO) << "RDR clean up mem info, kernel size equals " << op_names_.size();
|
||||
op_names_.clear();
|
||||
mem_info_inputs_.clear();
|
||||
mem_info_workspaces_.clear();
|
||||
|
|
|
@ -30,19 +30,19 @@ struct KernelLaunchInfo;
|
|||
using AddressPtr = std::shared_ptr<Address>;
|
||||
} // namespace kernel
|
||||
using AddressPtrList = std::vector<kernel::AddressPtr>;
|
||||
struct GPUMemInfo {
|
||||
struct MemInfo {
|
||||
AddressPtrList *inputs_;
|
||||
AddressPtrList *workspaces_;
|
||||
AddressPtrList *outputs_;
|
||||
};
|
||||
class GPUMemAddressRecorder : public BaseRecorder {
|
||||
class MemAddressRecorder : public BaseRecorder {
|
||||
public:
|
||||
GPUMemAddressRecorder() {}
|
||||
GPUMemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
|
||||
~GPUMemAddressRecorder() {}
|
||||
MemAddressRecorder() {}
|
||||
MemAddressRecorder(const std::string &module, const std::string &name) : BaseRecorder(module, name) {}
|
||||
~MemAddressRecorder() {}
|
||||
|
||||
virtual void Export();
|
||||
void SaveMemInfo(const std::string &op_name, const GPUMemInfo &mem_info, size_t id);
|
||||
void SaveMemInfo(const std::string &op_name, const MemInfo &mem_info, size_t id);
|
||||
void SaveMemInfo(const std::string &op_name, const kernel::KernelLaunchInfo *mem_info);
|
||||
|
||||
void Reset(size_t nsize) {
|
||||
|
@ -61,6 +61,6 @@ class GPUMemAddressRecorder : public BaseRecorder {
|
|||
std::vector<AddressPtrList> mem_info_workspaces_;
|
||||
std::vector<AddressPtrList> mem_info_outputs_;
|
||||
};
|
||||
using GPUMemAddressRecorderPtr = std::shared_ptr<GPUMemAddressRecorder>;
|
||||
using MemAddressRecorderPtr = std::shared_ptr<MemAddressRecorder>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_MEM_ADDRESS_RECORDER_H_
|
||||
|
|
|
@ -76,7 +76,7 @@ bool RecorderManager::RdrEnable() const {
|
|||
return rdr_enable_;
|
||||
}
|
||||
|
||||
bool RecorderManager::CheckRdrGPUMemIsRecord() const {
|
||||
bool RecorderManager::CheckRdrMemIsRecord() const {
|
||||
if (!rdr_enable_) {
|
||||
return false;
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ bool RecorderManager::CheckRdrGPUMemIsRecord() const {
|
|||
return rdr_has_record_mem_;
|
||||
}
|
||||
|
||||
void RecorderManager::SetRdrGPUMemIsRecord(bool is_enable) {
|
||||
void RecorderManager::SetRdrMemIsRecord(bool is_enable) {
|
||||
if (!rdr_enable_) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -62,8 +62,8 @@ class RecorderManager {
|
|||
|
||||
void UpdateRdrEnable();
|
||||
bool RdrEnable() const;
|
||||
bool CheckRdrGPUMemIsRecord() const;
|
||||
void SetRdrGPUMemIsRecord(bool is_enable = true);
|
||||
bool CheckRdrMemIsRecord() const;
|
||||
void SetRdrMemIsRecord(bool is_enable = true);
|
||||
|
||||
bool RecordObject(const BaseRecorderPtr &recorder);
|
||||
BaseRecorderPtr GetRecorder(std::string module, std::string name);
|
||||
|
|
|
@ -89,19 +89,19 @@ bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, co
|
|||
return ans;
|
||||
}
|
||||
|
||||
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) {
|
||||
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize) {
|
||||
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
return false;
|
||||
}
|
||||
std::string submodule_name = std::string(GetSubModuleName(module));
|
||||
GPUMemAddressRecorderPtr mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name);
|
||||
MemAddressRecorderPtr mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name);
|
||||
mem_info_recorder->Reset(nsize);
|
||||
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(mem_info_recorder));
|
||||
return ans;
|
||||
}
|
||||
|
||||
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info, size_t id) {
|
||||
bool UpdateMemAddress(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const MemInfo &mem_info, size_t id) {
|
||||
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name,
|
|||
auto recorder = mindspore::RecorderManager::Instance().GetRecorder(submodule_name, name);
|
||||
bool ans = false;
|
||||
if (recorder != nullptr) {
|
||||
auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder);
|
||||
auto mem_recorder = std::dynamic_pointer_cast<MemAddressRecorder>(recorder);
|
||||
mem_recorder->SaveMemInfo(op_name, mem_info, id);
|
||||
ans = true;
|
||||
}
|
||||
|
@ -120,16 +120,16 @@ void TriggerAll() { mindspore::RecorderManager::Instance().TriggerAll(); }
|
|||
|
||||
void ResetRecorder() { mindspore::RecorderManager::Instance().ClearAll(); }
|
||||
|
||||
void ClearGPUMemAddressInfo() {
|
||||
void ClearMemAddressInfo() {
|
||||
if (!mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
return;
|
||||
}
|
||||
if (RecorderManager::Instance().CheckRdrGPUMemIsRecord()) {
|
||||
if (RecorderManager::Instance().CheckRdrMemIsRecord()) {
|
||||
std::string name = "mem_address_list";
|
||||
std::string submodule_name = "KERNEL";
|
||||
auto recorder = RecorderManager::Instance().GetRecorder(submodule_name, name);
|
||||
if (recorder != nullptr) {
|
||||
auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder);
|
||||
auto mem_recorder = std::dynamic_pointer_cast<MemAddressRecorder>(recorder);
|
||||
mem_recorder->CleanUp();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ class Address;
|
|||
using AddressPtr = std::shared_ptr<Address>;
|
||||
} // namespace kernel
|
||||
using AddressPtrList = std::vector<kernel::AddressPtr>;
|
||||
struct GPUMemInfo;
|
||||
struct MemInfo;
|
||||
#ifdef ENABLE_D
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
|
@ -52,16 +52,16 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &name,
|
|||
const std::vector<CNodePtr> &final_exec_order);
|
||||
bool RecordString(SubModuleId module, const std::string &name, const std::string &data);
|
||||
bool RecordStreamExecOrder(const SubModuleId module, const std::string &name, const std::vector<CNodePtr> &exec_order);
|
||||
bool RecordGPUMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize);
|
||||
bool UpdateGPUMemAddressInfo(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const GPUMemInfo &mem_info, size_t id);
|
||||
bool RecordMemAddressInfo(const SubModuleId module, const std::string &name, size_t nsize);
|
||||
bool UpdateMemAddress(const SubModuleId module, const std::string &name, const std::string &op_name,
|
||||
const MemInfo &mem_info, size_t id);
|
||||
#ifdef ENABLE_D
|
||||
bool RecordTaskDebugInfo(SubModuleId module, const std::string &name,
|
||||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list);
|
||||
#endif // ENABLE_D
|
||||
void TriggerAll();
|
||||
void ResetRecorder();
|
||||
void ClearGPUMemAddressInfo();
|
||||
void ClearMemAddressInfo();
|
||||
} // namespace RDR
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_RUNNING_DATA_RECORDER_H_
|
||||
|
|
|
@ -41,6 +41,8 @@
|
|||
#endif
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
#include "debug/rdr/running_data_recorder.h"
|
||||
#include "debug/rdr/recorder_manager.h"
|
||||
#include "debug/rdr/mem_address_recorder.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -410,7 +412,11 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
|
|||
bool iter_dump_flag = dump_json_parser.GetIterDumpFlag();
|
||||
uint32_t graph_id = kernel_graph->graph_id();
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
std::string name = "mem_address_list";
|
||||
(void)mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
|
||||
size_t id = 0;
|
||||
#endif
|
||||
for (const auto &kernel : kernels) {
|
||||
#ifdef ENABLE_PROFILE
|
||||
double start_time = GetTime();
|
||||
|
@ -445,6 +451,11 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
|
|||
uint32_t pid = getpid();
|
||||
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), pid);
|
||||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
MemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs};
|
||||
std::string op_name = kernel->fullname_with_scope();
|
||||
(void)mindspore::RDR::UpdateMemAddress(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
|
||||
#endif
|
||||
try {
|
||||
ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0);
|
||||
} catch (std::exception &e) {
|
||||
|
|
|
@ -731,7 +731,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
int exec_order = 1;
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
std::string name = "mem_address_list";
|
||||
(void)mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
|
||||
(void)mindspore::RDR::RecordMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
|
||||
size_t id = 0;
|
||||
#endif
|
||||
CNodePtr last_kernel = GetLastKernel(graph);
|
||||
|
@ -769,9 +769,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
return false;
|
||||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
GPUMemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs};
|
||||
MemInfo mem_info = {&kernel_inputs, &kernel_workspaces, &kernel_outputs};
|
||||
std::string op_name = kernel->fullname_with_scope();
|
||||
(void)mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
|
||||
(void)mindspore::RDR::UpdateMemAddress(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
|
||||
#endif
|
||||
if (!mock) {
|
||||
LaunchKernelWithoutMock(graph, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, profiling);
|
||||
|
@ -830,6 +830,9 @@ void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph
|
|||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) {
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
mindspore::RDR::TriggerAll();
|
||||
#endif
|
||||
MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
|
||||
}
|
||||
if (profiler_inst->GetEnableFlag()) {
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
#include "mindrt/include/async/async.h"
|
||||
#include "common/trans.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
#include "debug/rdr/running_data_recorder.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace runtime {
|
||||
|
@ -165,10 +168,16 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
|
|||
auto ret = device_contexts_[0]->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
|
||||
launch_info_.outputs_);
|
||||
if (!ret) {
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
mindspore::RDR::TriggerAll();
|
||||
#endif
|
||||
std::string error_info = "Launch kernel failed: " + data_kernel_->fullname_with_scope();
|
||||
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
mindspore::RDR::TriggerAll();
|
||||
#endif
|
||||
MsException::Instance().SetException();
|
||||
std::string error_info = "Launch kernel exception: " + data_kernel_->fullname_with_scope();
|
||||
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
|
||||
|
|
|
@ -41,23 +41,23 @@ void RecorderActor::RecordInfo(const std::string op_name, const KernelLaunchInfo
|
|||
return;
|
||||
}
|
||||
std::string name = "mem_address_list";
|
||||
if (!RecorderManager::Instance().CheckRdrGPUMemIsRecord()) {
|
||||
if (!RecorderManager::Instance().CheckRdrMemIsRecord()) {
|
||||
std::string submodule_name = "KERNEL";
|
||||
auto mem_info_recorder = std::make_shared<GPUMemAddressRecorder>(submodule_name, name);
|
||||
auto mem_info_recorder = std::make_shared<MemAddressRecorder>(submodule_name, name);
|
||||
if (mem_info_recorder == nullptr) {
|
||||
MS_LOG(ERROR) << "Make GPUMemAddressRecorder shared pointer failed.";
|
||||
MS_LOG(ERROR) << "Make MemAddressRecorder shared pointer failed.";
|
||||
return;
|
||||
}
|
||||
mem_info_recorder->SaveMemInfo(op_name, launch_info_);
|
||||
bool result = RecorderManager::Instance().RecordObject(std::move(mem_info_recorder));
|
||||
if (result) {
|
||||
RecorderManager::Instance().SetRdrGPUMemIsRecord(true);
|
||||
RecorderManager::Instance().SetRdrMemIsRecord(true);
|
||||
}
|
||||
} else {
|
||||
std::string submodule_name = "KERNEL";
|
||||
auto recorder = RecorderManager::Instance().GetRecorder(submodule_name, name);
|
||||
if (recorder != nullptr) {
|
||||
auto mem_recorder = std::dynamic_pointer_cast<GPUMemAddressRecorder>(recorder);
|
||||
auto mem_recorder = std::dynamic_pointer_cast<MemAddressRecorder>(recorder);
|
||||
mem_recorder->SaveMemInfo(op_name, launch_info_);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -464,7 +464,7 @@ bool GPUDeviceContext::SyncStream(size_t stream_id) const {
|
|||
mindspore::RDR::TriggerAll();
|
||||
}
|
||||
// clear RDR gpu memory info
|
||||
mindspore::RDR::ClearGPUMemAddressInfo();
|
||||
mindspore::RDR::ClearMemAddressInfo();
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue