diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h index d0d067e7c18..b1023f6195d 100644 --- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h +++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h @@ -114,7 +114,6 @@ class DynamicMemPoolBestFit { virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0; virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0; virtual size_t free_mem_size() = 0; - virtual size_t total_mem_size() = 0; protected: // The real size by memory alloc aligned. diff --git a/mindspore/ccsrc/backend/session/kernel_build_client.h b/mindspore/ccsrc/backend/session/kernel_build_client.h index 9c4a15c6248..64afc489711 100644 --- a/mindspore/ccsrc/backend/session/kernel_build_client.h +++ b/mindspore/ccsrc/backend/session/kernel_build_client.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "common/duplex_pipe.h" #include "utils/log_adapter.h" @@ -88,6 +89,7 @@ class KernelBuildClient { // Send a request and fetch its response std::string SendRequest(std::string data) { + std::lock_guard locker(mutex_); Request(data); return Response(); } @@ -137,6 +139,8 @@ class KernelBuildClient { virtual ~KernelBuildClient() = default; private: + // Support multi-thread. + std::mutex mutex_; bool init_; std::shared_ptr dp_; }; diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index d5021afdaeb..3c613ce1385 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -73,6 +73,9 @@ const std::set kOpNeedTransFormat = { kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D}; void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) { + if (size == 0) { + return; + } auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); auto device_id = ms_context->get_param(MS_CTX_DEVICE_ID); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 55d0f285b1e..92919dd10ef 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -1186,7 +1186,7 @@ std::shared_ptr AscendKernelRuntime::CreateDeviceTimeEvent() { uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const { auto ascend_mem_manager = std::dynamic_pointer_cast(mem_manager_); MS_EXCEPTION_IF_NULL(ascend_mem_manager); - return ascend_mem_manager->GetDeviceMemSize(); + return ascend_mem_manager->GetMsMaxMemSize(); } bool AscendKernelRuntime::DeleteDumpDir(const std::string &path) { diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc new file mode 100644 index 00000000000..4121664de5b --- /dev/null +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc @@ -0,0 +1,211 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/device/ascend/ascend_memory_adapter.h" + +#include +#include "runtime/mem.h" +#include "utils/ms_context.h" +#include "graphengine/inc/external/runtime/rt_error_codes.h" + +namespace mindspore { +namespace device { +namespace ascend { +constexpr uint64_t kMemSizeGB = 30; + +bool AscendMemAdapter::Initialize() { + if (initialized_) { + return true; + } + size_t free_hbm_size = 0; + rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &total_hbm_size_); + if (ret != RT_ERROR_NONE || total_hbm_size_ == 0) { + MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << total_hbm_size_; + } + + max_hbm_size_for_ms_ = total_hbm_size_ * 15 / 16; // reserved memory is 1/16 of total + auto context_mem = GetDeviceMemSizeFromContext(); + device_mem_size_ = context_mem == 0 ? max_hbm_size_for_ms_ : context_mem; + device_mem_base_ = MallocFromRts(device_mem_size_); + static_mem_offset_ = device_mem_size_; + cur_dynamic_mem_offset_ = 0; + max_dynamic_mem_offset_ = 0; + MS_LOG(INFO) << " Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics(); + initialized_ = true; + return true; +} + +bool AscendMemAdapter::DeInitialize() { + if (!initialized_) { + MS_LOG(ERROR) << " DeInitialize Ascend Memory Adapter when it is not initialize"; + return false; + } + + auto ret = FreeToRts(device_mem_base_); + if (ret) { + total_hbm_size_ = 0; + max_hbm_size_for_ms_ = 0; + device_mem_base_ = nullptr; + device_mem_size_ = 0; + + cur_dynamic_mem_offset_ = 0; + max_dynamic_mem_offset_ = 0; + dynamic_memory_block_list_.clear(); + + static_mem_offset_ = 0; + static_memory_block_list_.clear(); + + MS_LOG(INFO) << " Ascend Memory Adapter initialize success, statistics:" << DevMemStatistics(); + initialized_ = false; + } + + return ret; +} + +uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) { + std::lock_guard locker(mutex_); + auto new_static_offset = static_mem_offset_ - size; + if (new_static_offset < max_dynamic_mem_offset_) { + MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics() + << " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More " + "details can be found in MindSpore's FAQ with keyword 'Out of Memory'."; + MS_LOG(ERROR) << DevMemDetailInfo(); + return nullptr; + } + + auto memory_block_ptr = device_mem_base_ + new_static_offset; + static_mem_offset_ = new_static_offset; + static_memory_block_list_.push_back(std::make_shared(memory_block_ptr, size, tag)); + + return memory_block_ptr; +} + +uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) { + std::lock_guard locker(mutex_); + auto new_dynamic_offset = cur_dynamic_mem_offset_ + size; + if (new_dynamic_offset > static_mem_offset_) { + MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics() + << " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More " + "details can be found in MindSpore's FAQ with keyword 'Out of Memory'."; + MS_LOG(ERROR) << DevMemDetailInfo(); + return nullptr; + } + + auto memory_block_ptr = device_mem_base_ + cur_dynamic_mem_offset_; + cur_dynamic_mem_offset_ = new_dynamic_offset; + max_dynamic_mem_offset_ = std::max(cur_dynamic_mem_offset_, max_dynamic_mem_offset_); + dynamic_memory_block_list_.push_back(std::make_shared(memory_block_ptr, size, tag)); + + return memory_block_ptr; +} + +void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; } + +std::string AscendMemAdapter::DevMemStatistics() { + std::ostringstream oss; + oss << "\nHBM memory size: " << total_hbm_size_; + oss << "\nAvailable HBM memory size for MS: " << max_hbm_size_for_ms_; + oss << "\nMS memory base size: " << device_mem_size_; + oss << "\nMS memory base address: " << reinterpret_cast(device_mem_base_); + oss << "\nStatic Memory size: " << device_mem_size_ - static_mem_offset_; + oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_; + oss << "\nMAX Dynamic memory size of all graph: " << max_dynamic_mem_offset_; + oss << "\nMS Static memory offset: " << static_mem_offset_; + oss << std::endl; + return oss.str(); +} + +std::string AscendMemAdapter::DevMemDetailInfo() { + std::ostringstream oss; + oss << "\nMemory Detail Info:"; + oss << "\nStatic Memory Blocks:"; + oss << "\nAddress \t Size \t tag \t"; + for (const auto &blk : static_memory_block_list_) { + oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag; + } + + oss << "\nDynamic Memory Blocks:"; + oss << "\nAddress \t Size \t tag \t"; + for (const auto &blk : dynamic_memory_block_list_) { + oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag; + } + return oss.str(); +} + +size_t AscendMemAdapter::GetDeviceMemSizeFromContext() { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + auto variable_memory_max_size = context->get_param(MS_CTX_VARIABLE_MEMORY_MAX_SIZE); + if (variable_memory_max_size == "0") { + return 0; + } + MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size; + auto pos = variable_memory_max_size.find('*'); + if (pos == std::string::npos) { + MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size"; + } + auto gb_str = variable_memory_max_size.substr(0, pos); + auto gb_var = std::stoull(gb_str); + MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var; + + auto max_hbm_size_for_ms_GB = max_hbm_size_for_ms_ >> kMemSizeGB; + if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) { + MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (total_hbm_size_ >> kMemSizeGB) + << " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB + << "]GB, but got " << gb_var + << "GB, please set the context key 'variable_memory_max_size' in valid range."; + } + return gb_var << kMemSizeGB; +} + +uint8_t *AscendMemAdapter::MallocFromRts(size_t size) { + uint8_t *ptr = nullptr; + auto ret = rtMalloc(reinterpret_cast(&ptr), size, RT_MEMORY_HBM); + if (ret != ACL_RT_SUCCESS) { + if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + unsigned int device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); + size_t free = 0; + size_t total = 0; + (void)rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total); + MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << size << "], ret[" << ret << "], " + << "Device " << device_id << " Available HBM size:" << total << " free size:" << free + << " may be other processes occupying this card, check as: ps -ef|grep python"; + } else { + MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << size << "] fail, ret[" << ret << "]"; + } + } else { + MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << size + << " bytes , address : " << reinterpret_cast(ptr); + } + return ptr; +} + +bool AscendMemAdapter::FreeToRts(void *devPtr) { + if (devPtr != nullptr) { + auto ret = rtFree(devPtr); + if (ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "rtFree mem [" << devPtr << "] fail, ret[" << ret << "]"; + return false; + } + } + return true; +} + +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.h new file mode 100644 index 00000000000..6a2b6d74528 --- /dev/null +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.h @@ -0,0 +1,93 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_ +#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_ + +#include +#include +#include +#include +#include "utils/ms_context.h" + +namespace mindspore { +namespace device { +namespace ascend { + +class AscendMemAdapter { + public: + static AscendMemAdapter &GetInstance() { + static AscendMemAdapter instance; + return instance; + } + + bool Initialize(); + bool DeInitialize(); + + uint8_t *MallocStaticDevMem(size_t size, std::string tag = ""); + uint8_t *MallocDynamicDevMem(size_t size, std::string tag = ""); + bool FreeStaticDevMem(void *devPtr) { return true; } + void ResetDynamicMemory(); + + uint64_t FreeDevMemSize() { return static_mem_offset_ - max_dynamic_mem_offset_; } + uint64_t TotalDevMemSize() { return device_mem_size_; } + uint64_t MaxHbmSizeForMs() { return max_hbm_size_for_ms_; } + + std::string DevMemStatistics(); + std::string DevMemDetailInfo(); + + private: + struct MemoryBlock { + MemoryBlock(void *ptr, const size_t size, const std::string &tag) { + mem_ptr = ptr; + mem_size = size; + mem_tag = tag; + } + + void *mem_ptr{nullptr}; + size_t mem_size{0}; + std::string mem_tag; + }; + + uint8_t *MallocFromRts(size_t size); + bool FreeToRts(void *devPtr); + size_t GetDeviceMemSizeFromContext(); + + bool initialized_{false}; + + // Support multi-thread. + std::mutex mutex_; + + // rts Memory INFO + size_t total_hbm_size_{0}; + size_t max_hbm_size_for_ms_{0}; + uint8_t *device_mem_base_{nullptr}; + uint64_t device_mem_size_{0}; + + // dynamic memory info + uint64_t cur_dynamic_mem_offset_{0}; + uint64_t max_dynamic_mem_offset_{0}; + std::vector> dynamic_memory_block_list_; + + // static memory info + uint64_t static_mem_offset_{0}; + std::vector> static_memory_block_list_; +}; +} // namespace ascend +} // namespace device +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_ diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc index 4f3bded6a82..9fced5ead03 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc @@ -16,6 +16,7 @@ #include #include "runtime/device/ascend/ascend_memory_manager.h" #include "runtime/device/ascend/ascend_memory_pool.h" +#include "runtime/device/ascend/ascend_memory_adapter.h" #include "utils/ms_context.h" #include "runtime/mem.h" #ifndef ENABLE_SECURITY @@ -29,100 +30,17 @@ using mindspore::profiler::ascend::MemoryProfiling; namespace mindspore { namespace device { namespace ascend { -namespace { -constexpr uint64_t kAscendInitDeviceMemGB = 30; -constexpr uint64_t kMemSizeGB = 30; -constexpr uint64_t kAscendDeviceMemSize = (kAscendInitDeviceMemGB << kMemSizeGB); -uint64_t GetDeviceHBMSize() { - size_t free = 0; - size_t total = 0; - rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total); - if (ret != RT_ERROR_NONE || total == 0) { - MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total = " << total; - } - return total; -} +void AscendMemoryManager::MallocDeviceMemory() { (void)AscendMemAdapter::GetInstance().Initialize(); } -uint64_t GetDefaultDeviceMemSize() { - auto total = GetDeviceHBMSize(); - auto ret = total * 15 / 16; // reserved memory is 1/16 of total - MS_LOG(INFO) << "The Device HBM memory size is " << total << ", allocate " << ret << " for backend."; - return ret; -} -} // namespace +void AscendMemoryManager::FreeDeviceMemory() { (void)AscendMemAdapter::GetInstance().DeInitialize(); } -void AscendMemoryManager::MallocDeviceMemory() { - auto context_mem = GetDeviceMemSizeFromContext(); - device_mem_size_ = context_mem == 0 ? GetDefaultDeviceMemSize() : context_mem; - auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), device_mem_size_, RT_MEMORY_HBM); - if (ret != ACL_RT_SUCCESS) { - if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) { - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - unsigned int device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); - MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "], " - << "Device " << device_id - << " may be other processes occupying this card, check as: ps -ef|grep python"; - } else { - MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; - } - } else { - MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << device_mem_size_ - << " bytes , address : " << reinterpret_cast(device_mem_base_); - } - AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_); -} - -uint64_t AscendMemoryManager::GetDeviceMemSize() { - auto mem_size = GetDeviceMemSizeFromContext(); - return mem_size == 0 ? GetDefaultDeviceMemSize() : mem_size; -} - -uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() { - auto context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context); - auto variable_memory_max_size = context->get_param(MS_CTX_VARIABLE_MEMORY_MAX_SIZE); - if (variable_memory_max_size == "0") { - return 0; - } - MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size; - auto pos = variable_memory_max_size.find('*'); - if (pos == std::string::npos) { - MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size"; - } - auto gb_str = variable_memory_max_size.substr(0, pos); - auto gb_var = std::stoull(gb_str); - MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var; - auto total_hbm_size_GB = GetDeviceHBMSize() >> kMemSizeGB; - auto backend_max_size_GB = total_hbm_size_GB - 1; // reserved 1 GB for other component - if (gb_var > backend_max_size_GB || gb_var == 0) { - MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB - << " GB, variable_memory_max_size should be in range (0-" << backend_max_size_GB - << "]GB, but got " << gb_var - << "GB, please set the context key 'variable_memory_max_size' in valid range."; - } - return gb_var << kMemSizeGB; -} - -void AscendMemoryManager::FreeDeviceMemory() { - if (device_mem_base_ != nullptr) { - auto ret = rtFree(device_mem_base_); - if (ret != RT_ERROR_NONE) { - MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; - } - device_mem_base_ = nullptr; - } -} - -void AscendMemoryManager::ResetDynamicMemory() { - total_dynamic_size_ = 0; - dynamic_mem_offset_ = 0; - AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_); -} +void AscendMemoryManager::ResetDynamicMemory() { (void)AscendMemAdapter::GetInstance().ResetDynamicMemory(); } void AscendMemoryManager::ClearGlobalIdleMem() { AscendMemoryPool::GetInstance().ResetIdleMemBuf(); } +uint64_t AscendMemoryManager::GetMsMaxMemSize() { return AscendMemAdapter::GetInstance().MaxHbmSizeForMs(); } + void *AscendMemoryManager::MallocDevice(size_t size) { auto align_size = GetCommonAlignSize(size); return AscendMemoryPool::GetInstance().AllocTensorMem(align_size); @@ -146,12 +64,8 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me } else { align_size = GetCommonAlignSize(size); } - auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset(); - MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "], Memory statistics: total[" << device_mem_size_ - << "] dynamic [" << total_dynamic_size_ << "] static [" << device_mem_size_ - device_mem_pool_offset - << "], Pool statistics: pool total size [" << AscendMemoryPool::GetInstance().total_mem_statistics() - << "] used [" << AscendMemoryPool::GetInstance().used_mem_statistics() - << "] communication_mem:" << communication_mem; + MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "] communication_mem:" << communication_mem; + #ifndef ENABLE_SECURITY if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable() && graph_id != kInvalidGraphId) { auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id); @@ -163,16 +77,11 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me node->AddStaticMemorySize(SizeToUint(align_size)); } #endif - if (communication_mem) { - // create protect area [kMemAlignSize -- data -- kMemAlignSize] - uint8_t *alloc_address = reinterpret_cast(AscendMemoryPool::GetInstance().AllocTensorMem(align_size)); - MS_EXCEPTION_IF_NULL(alloc_address); - return alloc_address + kMemAlignSize; - } else { - uint8_t *alloc_address = reinterpret_cast(AscendMemoryPool::GetInstance().AllocTensorMem(align_size)); - MS_EXCEPTION_IF_NULL(alloc_address); - return alloc_address; - } + + uint8_t *alloc_address = reinterpret_cast(AscendMemoryPool::GetInstance().AllocTensorMem(align_size)); + MS_EXCEPTION_IF_NULL(alloc_address); + // create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory + return communication_mem ? alloc_address + kMemAlignSize : alloc_address; } uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { @@ -182,29 +91,12 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m } else { align_size = GetCommonAlignSize(size); } + MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "] communication_mem: " << communication_mem; - auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset(); - MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "], Memory statistics: total[" << device_mem_size_ - << "] dynamic[" << total_dynamic_size_ << "] static[" << device_mem_size_ - device_mem_pool_offset - << "] communication_mem: " << communication_mem; - auto offset = dynamic_mem_offset_; - auto new_offset = dynamic_mem_offset_ + align_size; - if (new_offset >= device_mem_pool_offset) { - MS_LOG(EXCEPTION) << "Out of Memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_ - << "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])" - << " malloc [" << align_size - << "] failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More " - "details can be found in MindSpore's FAQ with keyword 'Out of Memory'."; - } - total_dynamic_size_ += align_size; - dynamic_mem_offset_ = new_offset; - AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_); - if (communication_mem) { - // create protect area [kMemAlignSize -- data -- kMemAlignSize] - return device_mem_base_ + offset + kMemAlignSize; - } else { - return device_mem_base_ + offset; - } + uint8_t *alloc_address = reinterpret_cast(AscendMemAdapter::GetInstance().MallocDynamicDevMem(align_size)); + MS_EXCEPTION_IF_NULL(alloc_address); + // create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory + return communication_mem ? alloc_address + kMemAlignSize : alloc_address; } void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) { diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h index ea95fbcb98e..a2c18a88eee 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.h @@ -19,7 +19,7 @@ #include #include "runtime/device/memory_manager.h" -#include "graphengine/inc/external/runtime/rt_error_codes.h" + namespace mindspore { namespace device { namespace ascend { @@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager { void *MallocMemFromMemPool(size_t size) override; void *MallocDevice(size_t size) override; void FreeMemFromMemPool(void *device_ptr) override; - uint64_t GetDeviceMemSize(); + uint64_t GetMsMaxMemSize(); void MallocSomasDynamicMem(const session::KernelGraph &graph) override; uint8_t *MallocCommunicationMemFromMemPool(size_t size) override; std::vector MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list) override { @@ -49,12 +49,6 @@ class AscendMemoryManager : public MemoryManager { protected: uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override; uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override; - - private: - uint8_t *device_mem_pool_base_{nullptr}; - uint64_t device_mem_pool_size_{0}; - - uint64_t GetDeviceMemSizeFromContext(); }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc index 32727cbde56..eeb5e887aae 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc @@ -16,8 +16,8 @@ #include #include "runtime/device/ascend/ascend_memory_pool.h" +#include "runtime/device/ascend/ascend_memory_adapter.h" #include "runtime/mem.h" -#include "runtime/device/ascend/ascend_kernel_runtime.h" #include "utils/log_adapter.h" namespace mindspore { @@ -28,31 +28,11 @@ static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE = 256 << 20; // The minimum unit size (8MB) of memory block used for dynamic extend in graph mode. static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20; -void AscendMemoryPool::Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset) { - static bool initialized = false; - if (initialized) { - return; - } - - MS_EXCEPTION_IF_NULL(device_mem_base); - set_device_mem_pool_base(device_mem_base); - - if (dynamic_mem_offset > device_mem_size) { - MS_LOG(EXCEPTION) << "Dynamic memory offset: " << dynamic_mem_offset - << " exceed the device memory size: " << device_mem_size; - } - set_device_mem_size(device_mem_size); - set_device_mem_pool_offset(device_mem_size); - set_graph_dynamic_mem_offset(dynamic_mem_offset); - initialized = true; -} - size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) { auto device_free_mem_size = free_mem_size(); if (device_free_mem_size < size) { MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size - << "] is smaller than required size[" << size << "], dynamic offset [" << graph_dynamic_mem_offset_ - << "] memory pool offset[" << device_mem_size_ - device_mem_pool_offset_ << "])"; + << "] is smaller than required size[" << size << "]"; return 0; } auto alloc_mem_size = ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE; @@ -76,23 +56,12 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) { } size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { - MS_LOG(INFO) << "Malloc Memory: Pool, total[" << device_mem_size_ << "] (dynamic[" << graph_dynamic_mem_offset_ - << "] memory pool[" << device_mem_size_ - device_mem_pool_offset_ << "])" - << " malloc [" << size << "]"; - + MS_LOG(INFO) << "Malloc Memory for Pool, size: " << size; if (size == 0) { MS_LOG(EXCEPTION) << "Failed to alloc memory pool resource, the size is zero!"; } - if (device_mem_pool_offset_ - size < graph_dynamic_mem_offset_) { - MS_LOG(EXCEPTION) << "Out of Memory!!! Failed to alloc memory pool memory, the current device_mem_pool_offset_ [" - << device_mem_pool_offset_ << "], current graph_dynamic_mem_offset_ " << graph_dynamic_mem_offset_ - << "], need memory size [" << size - << "]. Please try to reduce 'batch_size' or check whether exists extra large shape. More details " - "can be found in MindSpore's FAQ with keyword 'Out of Memory'."; - } - device_mem_pool_offset_ -= size; - *addr = device_mem_pool_base_ + device_mem_pool_offset_; + *addr = AscendMemAdapter::GetInstance().MallocStaticDevMem(size); if (*addr == nullptr) { MS_LOG(EXCEPTION) << "Alloc device memory pool address is nullptr, failed to alloc memory pool resource!"; } @@ -101,7 +70,7 @@ size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) { MS_EXCEPTION_IF_NULL(addr); - return true; + return AscendMemAdapter::GetInstance().FreeStaticDevMem(addr); } void AscendMemoryPool::ResetIdleMemBuf() { @@ -112,39 +81,7 @@ void AscendMemoryPool::ResetIdleMemBuf() { } } -size_t AscendMemoryPool::AlignMemorySize(size_t size) const { - if (size == 0) { - MS_LOG(EXCEPTION) << "The align memory size is a zero !"; - } - return size; -} - -void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) { - MS_EXCEPTION_IF_NULL(device_mem_pool_base); - device_mem_pool_base_ = device_mem_pool_base; -} - -void AscendMemoryPool::set_device_mem_size(uint64_t device_mem_size) { device_mem_size_ = device_mem_size; } - -void AscendMemoryPool::set_device_mem_pool_offset(uint64_t device_mem_pool_offset) { - device_mem_pool_offset_ = device_mem_pool_offset; -} - -void AscendMemoryPool::set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset) { - graph_dynamic_mem_offset_ = graph_dynamic_mem_offset; -} - -uint64_t AscendMemoryPool::device_mem_pool_offset() const { return device_mem_pool_offset_; } - -size_t AscendMemoryPool::free_mem_size() { - if (graph_dynamic_mem_offset_ >= device_mem_pool_offset_) { - MS_LOG(EXCEPTION) << "graph dynamic mem offset [" << graph_dynamic_mem_offset_ - << "] less than or equal to device mem pool offset [" << device_mem_pool_offset_ << "]!"; - } - return device_mem_pool_offset_ - graph_dynamic_mem_offset_; -} - -size_t AscendMemoryPool::total_mem_size() { return device_mem_size_ - graph_dynamic_mem_offset_; } +size_t AscendMemoryPool::free_mem_size() { return AscendMemAdapter::GetInstance().FreeDevMemSize(); } } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h index 65162815b31..04f704e8292 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.h @@ -29,18 +29,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit { AscendMemoryPool(const AscendMemoryPool &) = delete; AscendMemoryPool &operator=(const AscendMemoryPool &) = delete; - void Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset); size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override; bool FreeDeviceMem(const DeviceMemPtr &addr) override; - void ResetIdleMemBuf(); - void set_device_mem_size(uint64_t device_mem_size); - void set_device_mem_pool_base(uint8_t *device_mem_pool_base); - void set_device_mem_pool_offset(uint64_t device_mem_pool_offset); - void set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset); - - uint64_t device_mem_pool_offset() const; size_t free_mem_size() override; - size_t total_mem_size() override; + + void ResetIdleMemBuf(); static AscendMemoryPool &GetInstance() { static AscendMemoryPool instance; @@ -48,17 +41,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit { } protected: - // The real size by memory alloc aligned. - size_t AlignMemorySize(size_t size) const override; // Calculate memory block required alloc size when adding the memory block. size_t CalMemBlockAllocSize(size_t size) override; private: AscendMemoryPool() = default; - uint8_t *device_mem_pool_base_{nullptr}; - uint64_t device_mem_size_{0}; - uint64_t device_mem_pool_offset_{0}; - uint64_t graph_dynamic_mem_offset_{0}; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc index 4aa48a1f402..2726dd64bb5 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.cc @@ -27,7 +27,7 @@ namespace gpu { const size_t kGBToByte = 1024 << 20; bool GPUMemoryAllocator::Init() { - size_t total_size = total_mem_size(); + size_t total_size = CudaDriver::total_mem_size(); size_t free_size = CudaDriver::free_mem_size(); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -98,8 +98,6 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); } size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); } - -size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); } } // namespace gpu } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h index dd66a7d5eed..a5625213944 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_allocator.h @@ -35,7 +35,6 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override; bool FreeDeviceMem(const DeviceMemPtr &addr) override; size_t free_mem_size() override; - size_t total_mem_size() override; static GPUMemoryAllocator &GetInstance() { static GPUMemoryAllocator instance; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc index dcf47324117..aad89ec8e48 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_memory_manager.cc @@ -73,61 +73,25 @@ bool GPUMemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList void GPUMemoryManager::MallocDeviceMemory() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); - // If use the dynamic memory pool, then alloc the first memory block to init. - if (context_ptr->get_param(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) { - if (ps::ps_cache_instance.initialized_ps_cache()) { - return; - } - auto device_addr = MallocMemFromMemPool(1); - if (!device_addr) { - MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; - } - } else { - // Need to reserve 20% space for dynamic memory - const float init_gpu_mem_ratio = 0.8; - size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio); - auto alloc_size = - GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast(&device_mem_base_)); - device_mem_size_ = alloc_size; - static_mem_offset_ = device_mem_size_; + if (ps::ps_cache_instance.initialized_ps_cache()) { + return; + } + auto device_addr = MallocMemFromMemPool(1); + if (!device_addr) { + MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; } } -void GPUMemoryManager::FreeDeviceMemory() { - if (device_mem_base_ != nullptr) { - if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) { - MS_LOG(EXCEPTION) << "Could not free gpu device memory."; - } - } - GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); -} +void GPUMemoryManager::FreeDeviceMemory() { GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); } uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); - if (context_ptr->get_param(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) { - auto device_ptr = MallocMemFromMemPool(size); - if (device_ptr == nullptr) { - MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size; - } - return AddressOffset(device_ptr, 0); + auto device_ptr = MallocMemFromMemPool(size); + if (device_ptr == nullptr) { + MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size; } - - auto align_size = GetCommonAlignSize(size); - if (static_mem_offset_ < align_size) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - auto offset = static_mem_offset_ - align_size; - if (dynamic_mem_offset_ > offset) { - MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ - << "] static[" << total_static_size_ << "])" - << " malloc [" << align_size << "] failed!"; - } - total_static_size_ += align_size; - static_mem_offset_ = offset; - return device_mem_base_ + offset; + return AddressOffset(device_ptr, 0); } } // namespace gpu } // namespace device diff --git a/mindspore/ccsrc/runtime/device/memory_manager.h b/mindspore/ccsrc/runtime/device/memory_manager.h index d0ddbd8f73e..6eb0314ef54 100644 --- a/mindspore/ccsrc/runtime/device/memory_manager.h +++ b/mindspore/ccsrc/runtime/device/memory_manager.h @@ -39,10 +39,7 @@ class MemoryManager : public MemHandler { virtual void MallocDeviceMemory() = 0; virtual void FreeDeviceMemory() = 0; - virtual void ResetDynamicMemory() { - total_dynamic_size_ = 0; - dynamic_mem_offset_ = 0; - } + virtual void ResetDynamicMemory() {} virtual void ClearGlobalIdleMem() {} virtual void MallocSomasDynamicMem(const session::KernelGraph &graph); @@ -110,12 +107,6 @@ class MemoryManager : public MemHandler { protected: virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) = 0; virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); - uint8_t *device_mem_base_{nullptr}; - uint64_t device_mem_size_{0}; - uint64_t dynamic_mem_offset_{0}; - uint64_t static_mem_offset_{0}; - size_t total_static_size_ = 0; - size_t total_dynamic_size_ = 0; SomasPtr somas_reuse_util_ptr_{nullptr}; std::map> cached_host_mem_; std::map>> host_mem_block_map_; diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc index c7b1a706ca1..cc3be45a940 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc @@ -90,8 +90,6 @@ bool CPUMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) { } size_t CPUMemoryPool::free_mem_size() { return GetSystemMemorySize("MemAvailable"); } - -size_t CPUMemoryPool::total_mem_size() { return GetSystemMemorySize("MemTotal"); } } // namespace cpu } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.h index f100337bc87..96c1d8f8286 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.h +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.h @@ -36,7 +36,6 @@ class CPUMemoryPool : public DynamicMemPoolBestFit { size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override; bool FreeDeviceMem(const DeviceMemPtr &addr) override; size_t free_mem_size() override; - size_t total_mem_size() override; private: CPUMemoryPool() = default; diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 5c0181af50e..511e13e70ec 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -130,6 +130,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/runtime/device/ascend/ascend_event.cc" "../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc" "../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc" + "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc" "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc" "../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"