add ascend memory adapter for ascend memory management

This commit is contained in:
LaiYongqiang 2021-09-30 15:53:54 +08:00
parent bc37faad4d
commit 79599546ee
17 changed files with 354 additions and 284 deletions

View File

@ -114,7 +114,6 @@ class DynamicMemPoolBestFit {
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
virtual size_t free_mem_size() = 0;
virtual size_t total_mem_size() = 0;
protected:
// The real size by memory alloc aligned.

View File

@ -22,6 +22,7 @@
#include <cstring>
#include <cstdlib>
#include <memory>
#include <mutex>
#include "common/duplex_pipe.h"
#include "utils/log_adapter.h"
@ -88,6 +89,7 @@ class KernelBuildClient {
// Send a request and fetch its response
std::string SendRequest(std::string data) {
std::lock_guard<std::mutex> locker(mutex_);
Request(data);
return Response();
}
@ -137,6 +139,8 @@ class KernelBuildClient {
virtual ~KernelBuildClient() = default;
private:
// Support multi-thread.
std::mutex mutex_;
bool init_;
std::shared_ptr<DuplexPipe> dp_;
};

View File

@ -73,6 +73,9 @@ const std::set<std::string> kOpNeedTransFormat = {
kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D};
void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) {
if (size == 0) {
return;
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);

View File

@ -1186,7 +1186,7 @@ std::shared_ptr<DeviceEvent> AscendKernelRuntime::CreateDeviceTimeEvent() {
uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const {
auto ascend_mem_manager = std::dynamic_pointer_cast<AscendMemoryManager>(mem_manager_);
MS_EXCEPTION_IF_NULL(ascend_mem_manager);
return ascend_mem_manager->GetDeviceMemSize();
return ascend_mem_manager->GetMsMaxMemSize();
}
bool AscendKernelRuntime::DeleteDumpDir(const std::string &path) {

View File

@ -0,0 +1,211 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/device/ascend/ascend_memory_adapter.h"
#include <algorithm>
#include "runtime/mem.h"
#include "utils/ms_context.h"
#include "graphengine/inc/external/runtime/rt_error_codes.h"
namespace mindspore {
namespace device {
namespace ascend {
constexpr uint64_t kMemSizeGB = 30;
bool AscendMemAdapter::Initialize() {
if (initialized_) {
return true;
}
size_t free_hbm_size = 0;
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &total_hbm_size_);
if (ret != RT_ERROR_NONE || total_hbm_size_ == 0) {
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << total_hbm_size_;
}
max_hbm_size_for_ms_ = total_hbm_size_ * 15 / 16; // reserved memory is 1/16 of total
auto context_mem = GetDeviceMemSizeFromContext();
device_mem_size_ = context_mem == 0 ? max_hbm_size_for_ms_ : context_mem;
device_mem_base_ = MallocFromRts(device_mem_size_);
static_mem_offset_ = device_mem_size_;
cur_dynamic_mem_offset_ = 0;
max_dynamic_mem_offset_ = 0;
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
initialized_ = true;
return true;
}
bool AscendMemAdapter::DeInitialize() {
if (!initialized_) {
MS_LOG(ERROR) << " DeInitialize Ascend Memory Adapter when it is not initialize";
return false;
}
auto ret = FreeToRts(device_mem_base_);
if (ret) {
total_hbm_size_ = 0;
max_hbm_size_for_ms_ = 0;
device_mem_base_ = nullptr;
device_mem_size_ = 0;
cur_dynamic_mem_offset_ = 0;
max_dynamic_mem_offset_ = 0;
dynamic_memory_block_list_.clear();
static_mem_offset_ = 0;
static_memory_block_list_.clear();
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, statistics:" << DevMemStatistics();
initialized_ = false;
}
return ret;
}
uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) {
std::lock_guard<std::mutex> locker(mutex_);
auto new_static_offset = static_mem_offset_ - size;
if (new_static_offset < max_dynamic_mem_offset_) {
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
MS_LOG(ERROR) << DevMemDetailInfo();
return nullptr;
}
auto memory_block_ptr = device_mem_base_ + new_static_offset;
static_mem_offset_ = new_static_offset;
static_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
return memory_block_ptr;
}
uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) {
std::lock_guard<std::mutex> locker(mutex_);
auto new_dynamic_offset = cur_dynamic_mem_offset_ + size;
if (new_dynamic_offset > static_mem_offset_) {
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
MS_LOG(ERROR) << DevMemDetailInfo();
return nullptr;
}
auto memory_block_ptr = device_mem_base_ + cur_dynamic_mem_offset_;
cur_dynamic_mem_offset_ = new_dynamic_offset;
max_dynamic_mem_offset_ = std::max(cur_dynamic_mem_offset_, max_dynamic_mem_offset_);
dynamic_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
return memory_block_ptr;
}
void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; }
std::string AscendMemAdapter::DevMemStatistics() {
std::ostringstream oss;
oss << "\nHBM memory size: " << total_hbm_size_;
oss << "\nAvailable HBM memory size for MS: " << max_hbm_size_for_ms_;
oss << "\nMS memory base size: " << device_mem_size_;
oss << "\nMS memory base address: " << reinterpret_cast<void *>(device_mem_base_);
oss << "\nStatic Memory size: " << device_mem_size_ - static_mem_offset_;
oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_;
oss << "\nMAX Dynamic memory size of all graph: " << max_dynamic_mem_offset_;
oss << "\nMS Static memory offset: " << static_mem_offset_;
oss << std::endl;
return oss.str();
}
std::string AscendMemAdapter::DevMemDetailInfo() {
std::ostringstream oss;
oss << "\nMemory Detail Info:";
oss << "\nStatic Memory Blocks:";
oss << "\nAddress \t Size \t tag \t";
for (const auto &blk : static_memory_block_list_) {
oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag;
}
oss << "\nDynamic Memory Blocks:";
oss << "\nAddress \t Size \t tag \t";
for (const auto &blk : dynamic_memory_block_list_) {
oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag;
}
return oss.str();
}
size_t AscendMemAdapter::GetDeviceMemSizeFromContext() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto variable_memory_max_size = context->get_param<std::string>(MS_CTX_VARIABLE_MEMORY_MAX_SIZE);
if (variable_memory_max_size == "0") {
return 0;
}
MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
auto pos = variable_memory_max_size.find('*');
if (pos == std::string::npos) {
MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
}
auto gb_str = variable_memory_max_size.substr(0, pos);
auto gb_var = std::stoull(gb_str);
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
auto max_hbm_size_for_ms_GB = max_hbm_size_for_ms_ >> kMemSizeGB;
if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) {
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (total_hbm_size_ >> kMemSizeGB)
<< " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB
<< "]GB, but got " << gb_var
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
}
return gb_var << kMemSizeGB;
}
uint8_t *AscendMemAdapter::MallocFromRts(size_t size) {
uint8_t *ptr = nullptr;
auto ret = rtMalloc(reinterpret_cast<void **>(&ptr), size, RT_MEMORY_HBM);
if (ret != ACL_RT_SUCCESS) {
if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
size_t free = 0;
size_t total = 0;
(void)rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total);
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << size << "], ret[" << ret << "], "
<< "Device " << device_id << " Available HBM size:" << total << " free size:" << free
<< " may be other processes occupying this card, check as: ps -ef|grep python";
} else {
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << size << "] fail, ret[" << ret << "]";
}
} else {
MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << size
<< " bytes , address : " << reinterpret_cast<void *>(ptr);
}
return ptr;
}
bool AscendMemAdapter::FreeToRts(void *devPtr) {
if (devPtr != nullptr) {
auto ret = rtFree(devPtr);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtFree mem [" << devPtr << "] fail, ret[" << ret << "]";
return false;
}
}
return true;
}
} // namespace ascend
} // namespace device
} // namespace mindspore

View File

@ -0,0 +1,93 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
#include <mutex>
#include <string>
#include <memory>
#include <vector>
#include "utils/ms_context.h"
namespace mindspore {
namespace device {
namespace ascend {
class AscendMemAdapter {
public:
static AscendMemAdapter &GetInstance() {
static AscendMemAdapter instance;
return instance;
}
bool Initialize();
bool DeInitialize();
uint8_t *MallocStaticDevMem(size_t size, std::string tag = "");
uint8_t *MallocDynamicDevMem(size_t size, std::string tag = "");
bool FreeStaticDevMem(void *devPtr) { return true; }
void ResetDynamicMemory();
uint64_t FreeDevMemSize() { return static_mem_offset_ - max_dynamic_mem_offset_; }
uint64_t TotalDevMemSize() { return device_mem_size_; }
uint64_t MaxHbmSizeForMs() { return max_hbm_size_for_ms_; }
std::string DevMemStatistics();
std::string DevMemDetailInfo();
private:
struct MemoryBlock {
MemoryBlock(void *ptr, const size_t size, const std::string &tag) {
mem_ptr = ptr;
mem_size = size;
mem_tag = tag;
}
void *mem_ptr{nullptr};
size_t mem_size{0};
std::string mem_tag;
};
uint8_t *MallocFromRts(size_t size);
bool FreeToRts(void *devPtr);
size_t GetDeviceMemSizeFromContext();
bool initialized_{false};
// Support multi-thread.
std::mutex mutex_;
// rts Memory INFO
size_t total_hbm_size_{0};
size_t max_hbm_size_for_ms_{0};
uint8_t *device_mem_base_{nullptr};
uint64_t device_mem_size_{0};
// dynamic memory info
uint64_t cur_dynamic_mem_offset_{0};
uint64_t max_dynamic_mem_offset_{0};
std::vector<std::shared_ptr<MemoryBlock>> dynamic_memory_block_list_;
// static memory info
uint64_t static_mem_offset_{0};
std::vector<std::shared_ptr<MemoryBlock>> static_memory_block_list_;
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_

View File

@ -16,6 +16,7 @@
#include <string>
#include "runtime/device/ascend/ascend_memory_manager.h"
#include "runtime/device/ascend/ascend_memory_pool.h"
#include "runtime/device/ascend/ascend_memory_adapter.h"
#include "utils/ms_context.h"
#include "runtime/mem.h"
#ifndef ENABLE_SECURITY
@ -29,100 +30,17 @@ using mindspore::profiler::ascend::MemoryProfiling;
namespace mindspore {
namespace device {
namespace ascend {
namespace {
constexpr uint64_t kAscendInitDeviceMemGB = 30;
constexpr uint64_t kMemSizeGB = 30;
constexpr uint64_t kAscendDeviceMemSize = (kAscendInitDeviceMemGB << kMemSizeGB);
uint64_t GetDeviceHBMSize() {
size_t free = 0;
size_t total = 0;
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total);
if (ret != RT_ERROR_NONE || total == 0) {
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total = " << total;
}
return total;
}
void AscendMemoryManager::MallocDeviceMemory() { (void)AscendMemAdapter::GetInstance().Initialize(); }
uint64_t GetDefaultDeviceMemSize() {
auto total = GetDeviceHBMSize();
auto ret = total * 15 / 16; // reserved memory is 1/16 of total
MS_LOG(INFO) << "The Device HBM memory size is " << total << ", allocate " << ret << " for backend.";
return ret;
}
} // namespace
void AscendMemoryManager::FreeDeviceMemory() { (void)AscendMemAdapter::GetInstance().DeInitialize(); }
void AscendMemoryManager::MallocDeviceMemory() {
auto context_mem = GetDeviceMemSizeFromContext();
device_mem_size_ = context_mem == 0 ? GetDefaultDeviceMemSize() : context_mem;
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), device_mem_size_, RT_MEMORY_HBM);
if (ret != ACL_RT_SUCCESS) {
if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "], "
<< "Device " << device_id
<< " may be other processes occupying this card, check as: ps -ef|grep python";
} else {
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
}
} else {
MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << device_mem_size_
<< " bytes , address : " << reinterpret_cast<void *>(device_mem_base_);
}
AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_);
}
uint64_t AscendMemoryManager::GetDeviceMemSize() {
auto mem_size = GetDeviceMemSizeFromContext();
return mem_size == 0 ? GetDefaultDeviceMemSize() : mem_size;
}
uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto variable_memory_max_size = context->get_param<std::string>(MS_CTX_VARIABLE_MEMORY_MAX_SIZE);
if (variable_memory_max_size == "0") {
return 0;
}
MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
auto pos = variable_memory_max_size.find('*');
if (pos == std::string::npos) {
MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
}
auto gb_str = variable_memory_max_size.substr(0, pos);
auto gb_var = std::stoull(gb_str);
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
auto total_hbm_size_GB = GetDeviceHBMSize() >> kMemSizeGB;
auto backend_max_size_GB = total_hbm_size_GB - 1; // reserved 1 GB for other component
if (gb_var > backend_max_size_GB || gb_var == 0) {
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB
<< " GB, variable_memory_max_size should be in range (0-" << backend_max_size_GB
<< "]GB, but got " << gb_var
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
}
return gb_var << kMemSizeGB;
}
void AscendMemoryManager::FreeDeviceMemory() {
if (device_mem_base_ != nullptr) {
auto ret = rtFree(device_mem_base_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
}
device_mem_base_ = nullptr;
}
}
void AscendMemoryManager::ResetDynamicMemory() {
total_dynamic_size_ = 0;
dynamic_mem_offset_ = 0;
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
}
void AscendMemoryManager::ResetDynamicMemory() { (void)AscendMemAdapter::GetInstance().ResetDynamicMemory(); }
void AscendMemoryManager::ClearGlobalIdleMem() { AscendMemoryPool::GetInstance().ResetIdleMemBuf(); }
uint64_t AscendMemoryManager::GetMsMaxMemSize() { return AscendMemAdapter::GetInstance().MaxHbmSizeForMs(); }
void *AscendMemoryManager::MallocDevice(size_t size) {
auto align_size = GetCommonAlignSize(size);
return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
@ -146,12 +64,8 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
} else {
align_size = GetCommonAlignSize(size);
}
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
<< "] dynamic [" << total_dynamic_size_ << "] static [" << device_mem_size_ - device_mem_pool_offset
<< "], Pool statistics: pool total size [" << AscendMemoryPool::GetInstance().total_mem_statistics()
<< "] used [" << AscendMemoryPool::GetInstance().used_mem_statistics()
<< "] communication_mem:" << communication_mem;
MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "] communication_mem:" << communication_mem;
#ifndef ENABLE_SECURITY
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable() && graph_id != kInvalidGraphId) {
auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
@ -163,16 +77,11 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
node->AddStaticMemorySize(SizeToUint(align_size));
}
#endif
if (communication_mem) {
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address);
return alloc_address + kMemAlignSize;
} else {
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address);
return alloc_address;
}
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address);
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
}
uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
@ -182,29 +91,12 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
} else {
align_size = GetCommonAlignSize(size);
}
MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "] communication_mem: " << communication_mem;
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
<< "] dynamic[" << total_dynamic_size_ << "] static[" << device_mem_size_ - device_mem_pool_offset
<< "] communication_mem: " << communication_mem;
auto offset = dynamic_mem_offset_;
auto new_offset = dynamic_mem_offset_ + align_size;
if (new_offset >= device_mem_pool_offset) {
MS_LOG(EXCEPTION) << "Out of Memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
<< "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])"
<< " malloc [" << align_size
<< "] failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
}
total_dynamic_size_ += align_size;
dynamic_mem_offset_ = new_offset;
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
if (communication_mem) {
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
return device_mem_base_ + offset + kMemAlignSize;
} else {
return device_mem_base_ + offset;
}
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemAdapter::GetInstance().MallocDynamicDevMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address);
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
}
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {

View File

@ -19,7 +19,7 @@
#include <vector>
#include "runtime/device/memory_manager.h"
#include "graphengine/inc/external/runtime/rt_error_codes.h"
namespace mindspore {
namespace device {
namespace ascend {
@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager {
void *MallocMemFromMemPool(size_t size) override;
void *MallocDevice(size_t size) override;
void FreeMemFromMemPool(void *device_ptr) override;
uint64_t GetDeviceMemSize();
uint64_t GetMsMaxMemSize();
void MallocSomasDynamicMem(const session::KernelGraph &graph) override;
uint8_t *MallocCommunicationMemFromMemPool(size_t size) override;
std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) override {
@ -49,12 +49,6 @@ class AscendMemoryManager : public MemoryManager {
protected:
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
private:
uint8_t *device_mem_pool_base_{nullptr};
uint64_t device_mem_pool_size_{0};
uint64_t GetDeviceMemSizeFromContext();
};
} // namespace ascend
} // namespace device

View File

@ -16,8 +16,8 @@
#include <algorithm>
#include "runtime/device/ascend/ascend_memory_pool.h"
#include "runtime/device/ascend/ascend_memory_adapter.h"
#include "runtime/mem.h"
#include "runtime/device/ascend/ascend_kernel_runtime.h"
#include "utils/log_adapter.h"
namespace mindspore {
@ -28,31 +28,11 @@ static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE = 256 << 20;
// The minimum unit size (8MB) of memory block used for dynamic extend in graph mode.
static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20;
void AscendMemoryPool::Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset) {
static bool initialized = false;
if (initialized) {
return;
}
MS_EXCEPTION_IF_NULL(device_mem_base);
set_device_mem_pool_base(device_mem_base);
if (dynamic_mem_offset > device_mem_size) {
MS_LOG(EXCEPTION) << "Dynamic memory offset: " << dynamic_mem_offset
<< " exceed the device memory size: " << device_mem_size;
}
set_device_mem_size(device_mem_size);
set_device_mem_pool_offset(device_mem_size);
set_graph_dynamic_mem_offset(dynamic_mem_offset);
initialized = true;
}
size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
auto device_free_mem_size = free_mem_size();
if (device_free_mem_size < size) {
MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
<< "] is smaller than required size[" << size << "], dynamic offset [" << graph_dynamic_mem_offset_
<< "] memory pool offset[" << device_mem_size_ - device_mem_pool_offset_ << "])";
<< "] is smaller than required size[" << size << "]";
return 0;
}
auto alloc_mem_size = ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE;
@ -76,23 +56,12 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
}
size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
MS_LOG(INFO) << "Malloc Memory: Pool, total[" << device_mem_size_ << "] (dynamic[" << graph_dynamic_mem_offset_
<< "] memory pool[" << device_mem_size_ - device_mem_pool_offset_ << "])"
<< " malloc [" << size << "]";
MS_LOG(INFO) << "Malloc Memory for Pool, size: " << size;
if (size == 0) {
MS_LOG(EXCEPTION) << "Failed to alloc memory pool resource, the size is zero!";
}
if (device_mem_pool_offset_ - size < graph_dynamic_mem_offset_) {
MS_LOG(EXCEPTION) << "Out of Memory!!! Failed to alloc memory pool memory, the current device_mem_pool_offset_ ["
<< device_mem_pool_offset_ << "], current graph_dynamic_mem_offset_ " << graph_dynamic_mem_offset_
<< "], need memory size [" << size
<< "]. Please try to reduce 'batch_size' or check whether exists extra large shape. More details "
"can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
}
device_mem_pool_offset_ -= size;
*addr = device_mem_pool_base_ + device_mem_pool_offset_;
*addr = AscendMemAdapter::GetInstance().MallocStaticDevMem(size);
if (*addr == nullptr) {
MS_LOG(EXCEPTION) << "Alloc device memory pool address is nullptr, failed to alloc memory pool resource!";
}
@ -101,7 +70,7 @@ size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
MS_EXCEPTION_IF_NULL(addr);
return true;
return AscendMemAdapter::GetInstance().FreeStaticDevMem(addr);
}
void AscendMemoryPool::ResetIdleMemBuf() {
@ -112,39 +81,7 @@ void AscendMemoryPool::ResetIdleMemBuf() {
}
}
size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
if (size == 0) {
MS_LOG(EXCEPTION) << "The align memory size is a zero !";
}
return size;
}
void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
MS_EXCEPTION_IF_NULL(device_mem_pool_base);
device_mem_pool_base_ = device_mem_pool_base;
}
void AscendMemoryPool::set_device_mem_size(uint64_t device_mem_size) { device_mem_size_ = device_mem_size; }
void AscendMemoryPool::set_device_mem_pool_offset(uint64_t device_mem_pool_offset) {
device_mem_pool_offset_ = device_mem_pool_offset;
}
void AscendMemoryPool::set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset) {
graph_dynamic_mem_offset_ = graph_dynamic_mem_offset;
}
uint64_t AscendMemoryPool::device_mem_pool_offset() const { return device_mem_pool_offset_; }
size_t AscendMemoryPool::free_mem_size() {
if (graph_dynamic_mem_offset_ >= device_mem_pool_offset_) {
MS_LOG(EXCEPTION) << "graph dynamic mem offset [" << graph_dynamic_mem_offset_
<< "] less than or equal to device mem pool offset [" << device_mem_pool_offset_ << "]!";
}
return device_mem_pool_offset_ - graph_dynamic_mem_offset_;
}
size_t AscendMemoryPool::total_mem_size() { return device_mem_size_ - graph_dynamic_mem_offset_; }
size_t AscendMemoryPool::free_mem_size() { return AscendMemAdapter::GetInstance().FreeDevMemSize(); }
} // namespace ascend
} // namespace device
} // namespace mindspore

View File

@ -29,18 +29,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
AscendMemoryPool(const AscendMemoryPool &) = delete;
AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
void Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset);
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
void ResetIdleMemBuf();
void set_device_mem_size(uint64_t device_mem_size);
void set_device_mem_pool_base(uint8_t *device_mem_pool_base);
void set_device_mem_pool_offset(uint64_t device_mem_pool_offset);
void set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset);
uint64_t device_mem_pool_offset() const;
size_t free_mem_size() override;
size_t total_mem_size() override;
void ResetIdleMemBuf();
static AscendMemoryPool &GetInstance() {
static AscendMemoryPool instance;
@ -48,17 +41,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
}
protected:
// The real size by memory alloc aligned.
size_t AlignMemorySize(size_t size) const override;
// Calculate memory block required alloc size when adding the memory block.
size_t CalMemBlockAllocSize(size_t size) override;
private:
AscendMemoryPool() = default;
uint8_t *device_mem_pool_base_{nullptr};
uint64_t device_mem_size_{0};
uint64_t device_mem_pool_offset_{0};
uint64_t graph_dynamic_mem_offset_{0};
};
} // namespace ascend
} // namespace device

View File

@ -27,7 +27,7 @@ namespace gpu {
const size_t kGBToByte = 1024 << 20;
bool GPUMemoryAllocator::Init() {
size_t total_size = total_mem_size();
size_t total_size = CudaDriver::total_mem_size();
size_t free_size = CudaDriver::free_mem_size();
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
@ -98,8 +98,6 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); }
size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); }
size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); }
} // namespace gpu
} // namespace device
} // namespace mindspore

View File

@ -35,7 +35,6 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit {
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
size_t free_mem_size() override;
size_t total_mem_size() override;
static GPUMemoryAllocator &GetInstance() {
static GPUMemoryAllocator instance;

View File

@ -73,61 +73,25 @@ bool GPUMemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList
void GPUMemoryManager::MallocDeviceMemory() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
// If use the dynamic memory pool, then alloc the first memory block to init.
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) {
if (ps::ps_cache_instance.initialized_ps_cache()) {
return;
}
auto device_addr = MallocMemFromMemPool(1);
if (!device_addr) {
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
}
} else {
// Need to reserve 20% space for dynamic memory
const float init_gpu_mem_ratio = 0.8;
size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
auto alloc_size =
GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
device_mem_size_ = alloc_size;
static_mem_offset_ = device_mem_size_;
if (ps::ps_cache_instance.initialized_ps_cache()) {
return;
}
auto device_addr = MallocMemFromMemPool(1);
if (!device_addr) {
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
}
}
void GPUMemoryManager::FreeDeviceMemory() {
if (device_mem_base_ != nullptr) {
if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
}
}
GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
}
void GPUMemoryManager::FreeDeviceMemory() { GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); }
uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) {
auto device_ptr = MallocMemFromMemPool(size);
if (device_ptr == nullptr) {
MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size;
}
return AddressOffset(device_ptr, 0);
auto device_ptr = MallocMemFromMemPool(size);
if (device_ptr == nullptr) {
MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size;
}
auto align_size = GetCommonAlignSize(size);
if (static_mem_offset_ < align_size) {
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
<< "] static[" << total_static_size_ << "])"
<< " malloc [" << align_size << "] failed!";
}
auto offset = static_mem_offset_ - align_size;
if (dynamic_mem_offset_ > offset) {
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
<< "] static[" << total_static_size_ << "])"
<< " malloc [" << align_size << "] failed!";
}
total_static_size_ += align_size;
static_mem_offset_ = offset;
return device_mem_base_ + offset;
return AddressOffset(device_ptr, 0);
}
} // namespace gpu
} // namespace device

View File

@ -39,10 +39,7 @@ class MemoryManager : public MemHandler {
virtual void MallocDeviceMemory() = 0;
virtual void FreeDeviceMemory() = 0;
virtual void ResetDynamicMemory() {
total_dynamic_size_ = 0;
dynamic_mem_offset_ = 0;
}
virtual void ResetDynamicMemory() {}
virtual void ClearGlobalIdleMem() {}
virtual void MallocSomasDynamicMem(const session::KernelGraph &graph);
@ -110,12 +107,6 @@ class MemoryManager : public MemHandler {
protected:
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) = 0;
virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
uint8_t *device_mem_base_{nullptr};
uint64_t device_mem_size_{0};
uint64_t dynamic_mem_offset_{0};
uint64_t static_mem_offset_{0};
size_t total_static_size_ = 0;
size_t total_dynamic_size_ = 0;
SomasPtr somas_reuse_util_ptr_{nullptr};
std::map<size_t, std::queue<void *>> cached_host_mem_;
std::map<void *, std::shared_ptr<std::vector<uint8_t>>> host_mem_block_map_;

View File

@ -90,8 +90,6 @@ bool CPUMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
}
size_t CPUMemoryPool::free_mem_size() { return GetSystemMemorySize("MemAvailable"); }
size_t CPUMemoryPool::total_mem_size() { return GetSystemMemorySize("MemTotal"); }
} // namespace cpu
} // namespace device
} // namespace mindspore

View File

@ -36,7 +36,6 @@ class CPUMemoryPool : public DynamicMemPoolBestFit {
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
size_t free_mem_size() override;
size_t total_mem_size() override;
private:
CPUMemoryPool() = default;

View File

@ -130,6 +130,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_event.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"