From a42c936df6cfa7aa3b2b3930d91f46e950571bf8 Mon Sep 17 00:00:00 2001 From: LaiYongqiang Date: Thu, 25 Nov 2021 11:42:08 +0800 Subject: [PATCH] log improvement --- .../backend/kernel_compiler/common_utils.h | 1 - .../backend/kernel_compiler/tbe/tbe_utils.cc | 9 +- .../ascend/ascend_backend_optimization.cc | 2 +- .../ccsrc/backend/optimizer/somas/somas.cc | 18 +-- .../optimizer/somas/somas_solver_alg.cc | 2 +- .../ccsrc/backend/session/ascend_session.cc | 6 + mindspore/ccsrc/debug/common.cc | 25 ++++ mindspore/ccsrc/debug/common.h | 4 + .../device/ascend/ascend_memory_adapter.cc | 136 ++++++++++-------- .../device/ascend/ascend_memory_pool.cc | 7 + mindspore/core/utils/convert_utils_base.h | 3 + 11 files changed, 128 insertions(+), 85 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h index e971b7ba208..c49f6894e64 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h @@ -42,7 +42,6 @@ constexpr auto kProcessorUnknown = "unknown"; constexpr auto kJsonSuffix = ".json"; constexpr auto kInfoSuffix = ".info"; constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600; -constexpr auto kCOMPILER_CACHE_PATH = "MS_COMPILER_CACHE_PATH"; const std::vector support_devices = {"aicore", "aicpu", "cuda"}; diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc index 8cdd9b135cf..bd6a262f252 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc @@ -58,7 +58,6 @@ constexpr auto kJOpTuneList = "op_tune_list"; constexpr auto kJPassList = "pass_list"; constexpr auto kRankID = "RANK_ID"; constexpr auto kCOMPILER_OP_LEVEL = "MS_COMPILER_OP_LEVEL"; -constexpr auto kCOMPILER_CACHE_PATH = "MS_COMPILER_CACHE_PATH"; uintptr_t KernelManager::kernel_stub_gen_ = 0; std::unordered_map KernelManager::info_table_ = {}; @@ -109,13 +108,7 @@ std::string TbeUtils::GetOpDebugPath() { if (debug_path != "") { return debug_path; } - auto config_path = GetCompilerCachePath(); - std::string rank_id_str = common::GetEnv(kRankID); - if (rank_id_str.empty()) { - MS_LOG(DEBUG) << "Environment variable 'RANK_ID' is empty, using the default value: 0"; - rank_id_str = "0"; - } - debug_path = config_path + "rank_" + rank_id_str + "/"; + debug_path = Common::GetCompilerCachePath(); return debug_path; } diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc index 123c5d3d7d7..9f1c933f1d4 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc @@ -488,7 +488,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptris_dynamic_shape()) { - MS_LOG(WARNING) << "Dynamic shape skip fusion"; + MS_LOG(INFO) << "Dynamic shape skip fusion"; return; } auto pre_build = common::GetEnv("MS_DISABLE_PREBUILD"); diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas.cc b/mindspore/ccsrc/backend/optimizer/somas/somas.cc index 2fe0c791e78..e37108c10cb 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas.cc @@ -125,8 +125,8 @@ bool Somas::LoadSomasCache(const session::KernelGraph *graph) { bool ret = CalcSomasModelHash(graph); if (ret) { - std::string filename = GetSaveGraphsPathName( - "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json", save_graphs_path_); + std::string filename = Common::GetCompilerCachePath() + "/somas_meta/somas_graph_" + + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json"; ret = LoadSomasResult(graph, filename); if (ret) { MS_LOG(INFO) << "Load Somas Cache file " << filename << " Successfully."; @@ -142,8 +142,8 @@ bool Somas::CalcSomasModelHash(const session::KernelGraph *graph) { auto model_str = SomasInfo(true); hash_id_ = std::to_string(std::hash()(model_str)); MS_LOG(INFO) << "Graph " << graph->graph_id() << "'s SOMAS Model hash id is " << hash_id_; - std::string filename = GetSaveGraphsPathName( - "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".info", save_graphs_path_); + std::string filename = Common::GetCompilerCachePath() + "/somas_meta/somas_graph_" + + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".info"; return Common::SaveStringToFile(filename, model_str); } @@ -179,17 +179,13 @@ bool Somas::SaveSomasResult(const session::KernelGraph *graph) { } somas_json[kTensors] = tensors_json; - std::string filename = GetSaveGraphsPathName( - "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json", save_graphs_path_); + std::string filename = Common::GetCompilerCachePath() + "/somas_meta/somas_graph_" + + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json"; (void)Common::SaveStringToFile(filename, somas_json.dump()); return true; } bool Somas::LoadSomasResult(const session::KernelGraph *graph, const string &filename) { - if (filename.length() <= strlen(".json")) { - MS_LOG(WARNING) << "please check somas cache file path."; - return false; - } std::ifstream somas_json_fs(filename); if (!somas_json_fs.is_open()) { MS_LOG(INFO) << "Open json file: " << filename << " error, Somas Cache Missed."; @@ -200,7 +196,7 @@ bool Somas::LoadSomasResult(const session::KernelGraph *graph, const string &fil somas_json_fs >> somas_json; somas_json_fs.close(); } catch (std::exception &e) { - MS_LOG(WARNING) << "Parse json file error: " << filename << ", sleep 500ms and retry again."; + MS_LOG(INFO) << "Parse json file error: " << filename << ", sleep 500ms and retry again."; somas_json_fs.close(); std::this_thread::sleep_for(std::chrono::milliseconds(kRetryIntervalSeconds)); std::ifstream retry_tmp(filename); diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc index 05596983d1e..69b8ec498ec 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc @@ -263,7 +263,7 @@ bool FastHeuristic::Eval(vector *block_tensors_v, const std::shared if (p->Next() != nullptr) { p = p->Next(); } else if (bpushed == false) { // something went wrong - MS_LOG(WARNING) << "Could not allocate memory for tensor: " << tensor->index_; + MS_LOG(WARNING) << "Internal Error: Could not allocate memory for tensor: " << tensor->index_; return false; } } diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index d4e3605ccac..989f3f9efeb 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -1305,6 +1305,12 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const { device::KernelAdjust::GetInstance().AssignLoopCtrlMemory(*kernel_graph); MS_LOG(INFO) << "Status record: end memory alloc. graph id: " << kernel_graph->graph_id() << ", Memory Statistics:" << device::ascend::AscendMemAdapter::GetInstance().DevMemStatistics(); + MS_LOG(INFO) << "The dynamic memory pool total size is " + << device::ascend::AscendMemoryPool::GetInstance().total_mem_statistics() / kMBToByte + << "M, total used size is " + << device::ascend::AscendMemoryPool::GetInstance().used_mem_statistics() / kMBToByte + << "M, used peak size is " + << device::ascend::AscendMemoryPool::GetInstance().used_mem_peak_statistics() / kMBToByte << "M."; } void AscendSession::RunOpMemoryAlloc(const std::vector &input_tensors, diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc index ca40ba65610..b00e0d521fe 100644 --- a/mindspore/ccsrc/debug/common.cc +++ b/mindspore/ccsrc/debug/common.cc @@ -271,6 +271,31 @@ bool Common::FileExists(const std::string &filepath) { return cache_file_existed; } +std::string Common::GetUserDefineCachePath() { + static std::string config_path = ""; + if (config_path != "") { + return config_path; + } + const char *value = ::getenv(kCOMPILER_CACHE_PATH); + if (value == nullptr) { + config_path = "./"; + } else { + config_path = std::string(value); + FileUtils::CreateNotExistDirs(config_path); + if (config_path[config_path.length() - 1] != '/') { + config_path += "/"; + } + } + return config_path; +} + +std::string Common::GetCompilerCachePath() { + static const std::string user_defined_path = GetUserDefineCachePath(); + static uint32_t rank_id = IsStandAlone() ? 0 : GetRank(); + static const std::string compile_cache_dir = user_defined_path + "rank_" + std::to_string(rank_id) + "/"; + return compile_cache_dir; +} + struct GlogLogDirRegister { GlogLogDirRegister() { const char *logtostderr = std::getenv("GLOG_logtostderr"); diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h index 2786ec34d4a..5dafd097d0e 100644 --- a/mindspore/ccsrc/debug/common.h +++ b/mindspore/ccsrc/debug/common.h @@ -28,6 +28,8 @@ namespace mindspore { static const int MAX_DIRECTORY_LENGTH = 1024; static const int MAX_FILENAME_LENGTH = 128; static const int MAX_OS_FILENAME_LENGTH = 255; +static const char kCOMPILER_CACHE_PATH[] = "MS_COMPILER_CACHE_PATH"; + class Common { public: Common() = default; @@ -43,9 +45,11 @@ class Common { static bool SaveStringToFile(const std::string filename, const std::string string_info); static bool FileExists(const std::string &filepath); static bool CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path, std::string *value); + static std::string GetCompilerCachePath(); private: static bool IsEveryFilenameValid(const std::string &path, size_t length_limit, const std::string &error_message); + static string GetUserDefineCachePath(); }; inline std::string GetSaveGraphsPathName(const std::string &file_name, const std::string &save_path = "") { diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc index 39d073566f2..7f12e25f910 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc @@ -19,13 +19,14 @@ #include #include "runtime/mem.h" #include "utils/ms_context.h" +#include "utils/convert_utils_base.h" #include "graphengine/inc/external/runtime/rt_error_codes.h" namespace mindspore { namespace device { namespace ascend { -constexpr uint64_t kMemSizeGB = 30; -constexpr uint64_t kMaxAvailableMSHBMSizeThreshold = 30; +constexpr float kMSMemoryRatio = 0.9375; // 15/16 +constexpr float kReservedMemoryRatio = 0.0625; // 1/16 bool AscendMemAdapter::Initialize() { if (initialized_) { @@ -34,35 +35,44 @@ bool AscendMemAdapter::Initialize() { size_t free_hbm_size = 0; rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &device_hbm_size_); if (ret != RT_ERROR_NONE || device_hbm_size_ == 0) { - MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << device_hbm_size_; + MS_LOG(EXCEPTION) << "Internal Error: Get Device HBM memory size failed, ret = " << ret + << ", total HBM size :" << device_hbm_size_; } - // reserved memory for HCCL or other component - auto reserved_mem_size_for_others = device_hbm_size_ * 15 / 16; - reserved_mem_size_for_others = - reserved_mem_size_for_others >= (1 << kMemSizeGB) ? (1 << kMemSizeGB) : reserved_mem_size_for_others; - max_available_ms_hbm_size_ = device_hbm_size_ - reserved_mem_size_for_others; - - auto user_define_ms_size_ = GetDeviceMemSizeFromContext(); - size_t default_hbm_size; - if (max_available_ms_hbm_size_ >= (kMaxAvailableMSHBMSizeThreshold << kMemSizeGB)) { - // Some HCCL case on 32GB Ascend device will Out Of Memory when reserved only 1GB memory for HCCL, so resize - // default HBM size for MindSpore to 30GB User can use context variable_memory_max_size="31GB" to Allocate 31GB - // HBM for MindSpore in some extreme case. - default_hbm_size = static_cast(30) << kMemSizeGB; - MS_LOG(INFO) << "Default HBM size is resize to 30GB from Max available HBM Size for MindSpore " - << max_available_ms_hbm_size_; + // get user define max backend memory + auto user_define_ms_size = GetDeviceMemSizeFromContext(); + auto recommend_mem_size_for_others = FloatToSize(device_hbm_size_ * kReservedMemoryRatio); + size_t reserved_mem_size_for_others; + if (user_define_ms_size == 0) { + ms_used_hbm_size_ = FloatToSize(device_hbm_size_ * kMSMemoryRatio); + reserved_mem_size_for_others = device_hbm_size_ - user_define_ms_size; } else { - default_hbm_size = max_available_ms_hbm_size_; - } - ms_used_hbm_size_ = user_define_ms_size_ == 0 ? default_hbm_size : user_define_ms_size_; + if (user_define_ms_size >= device_hbm_size_) { + MS_LOG(EXCEPTION) + << "The Total Device Memory Size is " << (SizeToFloat(device_hbm_size_) / kGBToByte) + << " GB, variable_memory_max_size/max_device_memory should be in range (0-" + << (SizeToFloat(device_hbm_size_) / kGBToByte) << "]GB, but got " + << (SizeToFloat(user_define_ms_size) / kGBToByte) + << "GB, please set the context key 'variable_memory_max_size'/'max_device_memory' in valid range."; + } + ms_used_hbm_size_ = user_define_ms_size; + + reserved_mem_size_for_others = device_hbm_size_ - ms_used_hbm_size_; + if (reserved_mem_size_for_others < recommend_mem_size_for_others) { + MS_LOG(WARNING) << "Reserved memory size for other components(" << reserved_mem_size_for_others + << ") is less than recommend size(" << recommend_mem_size_for_others + << "), It may lead to Out Of Memory in HCCL or other components, Please double check context key " + "'variable_memory_max_size'/'max_device_memory'"; + } + } + + MS_LOG(INFO) << "Device HBM Size:" << device_hbm_size_ / kMBToByte + << "M, Reserved HBM size for Other Components(HCCL/rts/etc.):" + << reserved_mem_size_for_others / kMBToByte + << "M, Recommend Reserved HBM size for Other Components:" << recommend_mem_size_for_others / kMBToByte + << "M, User define MindSpore HBM Size:" << user_define_ms_size / kGBToByte + << "G, MindSpore Used HBM Size:" << ms_used_hbm_size_ / kMBToByte << "M."; - MS_LOG(INFO) << "Device HBM Size:" << device_hbm_size_ - << ", Reserved HBM size for Other Components(HCCL/rts/etc.):" << reserved_mem_size_for_others - << ", Max available HBM Size for MindSpore:" << max_available_ms_hbm_size_ - << ", User define MindSpore HBM Size:" << user_define_ms_size_ - << ", Default MindSpore HBM Size:" << default_hbm_size - << ", MindSpore Used HBM Size:" << ms_used_hbm_size_; device_mem_base_addr_ = MallocFromRts(ms_used_hbm_size_); static_mem_offset_ = ms_used_hbm_size_; cur_dynamic_mem_offset_ = 0; @@ -103,11 +113,10 @@ uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) { std::lock_guard locker(mutex_); auto new_static_offset = static_mem_offset_ - size; if (new_static_offset < max_dynamic_mem_offset_) { - MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics() - << "Please try to reduce 'batch_size' or check whether exists extra large shape. More " - "details can be found in MindSpore's FAQ with keyword 'Out of Memory'."; - MS_LOG(ERROR) << DevMemDetailInfo(); - return nullptr; + MS_LOG(INFO) << DevMemDetailInfo(); + MS_LOG(EXCEPTION) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics() + << "\nPlease try to reduce 'batch_size' or check whether exists extra large shape. For more " + "details, please refer to 'Out of Memory' at https://www.mindspore.cn ."; } auto memory_block_ptr = device_mem_base_addr_ + new_static_offset; @@ -121,11 +130,10 @@ uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) { std::lock_guard locker(mutex_); auto new_dynamic_offset = cur_dynamic_mem_offset_ + size; if (new_dynamic_offset > static_mem_offset_) { - MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics() - << "Please try to reduce 'batch_size' or check whether exists extra large shape. More " - "details can be found in MindSpore's FAQ with keyword 'Out of Memory'."; - MS_LOG(ERROR) << DevMemDetailInfo(); - return nullptr; + MS_LOG(INFO) << DevMemDetailInfo(); + MS_LOG(EXCEPTION) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics() + << "\nPlease try to reduce 'batch_size' or check whether exists extra large shape. For more " + "details, please refer to 'Out of Memory' at https://www.mindspore.cn ."; } auto memory_block_ptr = device_mem_base_addr_ + cur_dynamic_mem_offset_; @@ -140,12 +148,12 @@ void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; } std::string AscendMemAdapter::DevMemStatistics() { std::ostringstream oss; - oss << "\nDevice HBM memory size: " << device_hbm_size_; - oss << "\nMindSpore Used memory size: " << ms_used_hbm_size_; + oss << "\nDevice HBM memory size: " << device_hbm_size_ / kMBToByte << "M"; + oss << "\nMindSpore Used memory size: " << ms_used_hbm_size_ / kMBToByte << "M"; oss << "\nMindSpore memory base address: " << reinterpret_cast(device_mem_base_addr_); - oss << "\nTotal Static Memory size: " << ms_used_hbm_size_ - static_mem_offset_; - oss << "\nTotal Dynamic memory size: " << max_dynamic_mem_offset_; - oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_; + oss << "\nTotal Static Memory size: " << (ms_used_hbm_size_ - static_mem_offset_) / kMBToByte << "M"; + oss << "\nTotal Dynamic memory size: " << max_dynamic_mem_offset_ / kMBToByte << "M"; + oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_ / kMBToByte << "M"; oss << std::endl; return oss.str(); } @@ -170,27 +178,28 @@ std::string AscendMemAdapter::DevMemDetailInfo() { size_t AscendMemAdapter::GetDeviceMemSizeFromContext() { auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); - auto variable_memory_max_size = context->get_param(MS_CTX_VARIABLE_MEMORY_MAX_SIZE); - if (variable_memory_max_size == "0") { - return 0; + size_t size_from_context; + auto max_device_memory = context->get_param(MS_CTX_MAX_DEVICE_MEMORY); + if (max_device_memory != mindspore::kDefaultMaxDeviceMemory) { + MS_LOG(INFO) << "context max_device_memory:" << max_device_memory; + size_from_context = FloatToSize(max_device_memory * kGBToByte); + } else { + auto variable_memory_max_size = context->get_param(MS_CTX_VARIABLE_MEMORY_MAX_SIZE); + if (variable_memory_max_size == "0") { + return 0; + } + MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size; + auto pos = variable_memory_max_size.find('*'); + if (pos == std::string::npos) { + MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size"; + } + auto gb_str = variable_memory_max_size.substr(0, pos); + auto gb_var = std::stoull(gb_str); + MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var; + size_from_context = gb_var * kGBToByte; } - MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size; - auto pos = variable_memory_max_size.find('*'); - if (pos == std::string::npos) { - MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size"; - } - auto gb_str = variable_memory_max_size.substr(0, pos); - auto gb_var = std::stoull(gb_str); - MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var; - auto max_hbm_size_for_ms_GB = max_available_ms_hbm_size_ >> kMemSizeGB; - if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) { - MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (device_hbm_size_ >> kMemSizeGB) - << " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB - << "]GB, but got " << gb_var - << "GB, please set the context key 'variable_memory_max_size' in valid range."; - } - return gb_var << kMemSizeGB; + return size_from_context; } uint8_t *AscendMemAdapter::MallocFromRts(size_t size) { @@ -211,8 +220,9 @@ uint8_t *AscendMemAdapter::MallocFromRts(size_t size) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << size << "] fail, ret[" << ret << "]"; } } else { - MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << size - << " bytes , address : " << reinterpret_cast(ptr); + MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size: " << size + << " bytes, address start: " << reinterpret_cast(ptr) + << " end: " << reinterpret_cast(ptr + size); } return ptr; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc index afbe70d293c..75b21f91613 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc @@ -19,6 +19,7 @@ #include "runtime/device/ascend/ascend_memory_adapter.h" #include "runtime/mem.h" #include "utils/log_adapter.h" +#include "utils/convert_utils_base.h" namespace mindspore { namespace device { @@ -31,6 +32,12 @@ static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20; size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) { auto device_free_mem_size = free_mem_size(); if (device_free_mem_size < size) { + MS_LOG(WARNING) << "The dynamic memory pool total size is " + << device::ascend::AscendMemoryPool::GetInstance().total_mem_statistics() / kMBToByte + << "M, total used size is " + << device::ascend::AscendMemoryPool::GetInstance().used_mem_statistics() / kMBToByte + << "M, used peak size is " + << device::ascend::AscendMemoryPool::GetInstance().used_mem_peak_statistics() / kMBToByte << "M."; MS_LOG(WARNING) << "Out of Memory. Request memory size: " << size << ", Memory Statistic:" << AscendMemAdapter::GetInstance().DevMemStatistics(); return 0; diff --git a/mindspore/core/utils/convert_utils_base.h b/mindspore/core/utils/convert_utils_base.h index ee024dee829..1ddd21eeff3 100644 --- a/mindspore/core/utils/convert_utils_base.h +++ b/mindspore/core/utils/convert_utils_base.h @@ -23,6 +23,9 @@ #include "utils/log_adapter.h" namespace mindspore { +const size_t kGBToByte = 1024 << 20; +const size_t kMBToByte = 1024 << 10; + inline int SizeToInt(size_t u) { if (u > static_cast((std::numeric_limits::max)())) { MS_LOG(EXCEPTION) << "The size_t value(" << u << ") exceeds the maximum value of int.";