forked from mindspore-Ecosystem/mindspore
add ascend memory adapter for ascend memory management
This commit is contained in:
parent
bc37faad4d
commit
79599546ee
|
@ -114,7 +114,6 @@ class DynamicMemPoolBestFit {
|
||||||
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
|
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
|
||||||
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
|
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
|
||||||
virtual size_t free_mem_size() = 0;
|
virtual size_t free_mem_size() = 0;
|
||||||
virtual size_t total_mem_size() = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// The real size by memory alloc aligned.
|
// The real size by memory alloc aligned.
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include "common/duplex_pipe.h"
|
#include "common/duplex_pipe.h"
|
||||||
#include "utils/log_adapter.h"
|
#include "utils/log_adapter.h"
|
||||||
|
@ -88,6 +89,7 @@ class KernelBuildClient {
|
||||||
|
|
||||||
// Send a request and fetch its response
|
// Send a request and fetch its response
|
||||||
std::string SendRequest(std::string data) {
|
std::string SendRequest(std::string data) {
|
||||||
|
std::lock_guard<std::mutex> locker(mutex_);
|
||||||
Request(data);
|
Request(data);
|
||||||
return Response();
|
return Response();
|
||||||
}
|
}
|
||||||
|
@ -137,6 +139,8 @@ class KernelBuildClient {
|
||||||
virtual ~KernelBuildClient() = default;
|
virtual ~KernelBuildClient() = default;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Support multi-thread.
|
||||||
|
std::mutex mutex_;
|
||||||
bool init_;
|
bool init_;
|
||||||
std::shared_ptr<DuplexPipe> dp_;
|
std::shared_ptr<DuplexPipe> dp_;
|
||||||
};
|
};
|
||||||
|
|
|
@ -73,6 +73,9 @@ const std::set<std::string> kOpNeedTransFormat = {
|
||||||
kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D};
|
kOpFormat_FRAC_NZ, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D};
|
||||||
|
|
||||||
void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) {
|
void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) {
|
||||||
|
if (size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
auto ms_context = MsContext::GetInstance();
|
auto ms_context = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(ms_context);
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||||
|
|
|
@ -1186,7 +1186,7 @@ std::shared_ptr<DeviceEvent> AscendKernelRuntime::CreateDeviceTimeEvent() {
|
||||||
uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const {
|
uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const {
|
||||||
auto ascend_mem_manager = std::dynamic_pointer_cast<AscendMemoryManager>(mem_manager_);
|
auto ascend_mem_manager = std::dynamic_pointer_cast<AscendMemoryManager>(mem_manager_);
|
||||||
MS_EXCEPTION_IF_NULL(ascend_mem_manager);
|
MS_EXCEPTION_IF_NULL(ascend_mem_manager);
|
||||||
return ascend_mem_manager->GetDeviceMemSize();
|
return ascend_mem_manager->GetMsMaxMemSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AscendKernelRuntime::DeleteDumpDir(const std::string &path) {
|
bool AscendKernelRuntime::DeleteDumpDir(const std::string &path) {
|
||||||
|
|
|
@ -0,0 +1,211 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "runtime/device/ascend/ascend_memory_adapter.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include "runtime/mem.h"
|
||||||
|
#include "utils/ms_context.h"
|
||||||
|
#include "graphengine/inc/external/runtime/rt_error_codes.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace ascend {
|
||||||
|
constexpr uint64_t kMemSizeGB = 30;
|
||||||
|
|
||||||
|
bool AscendMemAdapter::Initialize() {
|
||||||
|
if (initialized_) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
size_t free_hbm_size = 0;
|
||||||
|
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &total_hbm_size_);
|
||||||
|
if (ret != RT_ERROR_NONE || total_hbm_size_ == 0) {
|
||||||
|
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << total_hbm_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
max_hbm_size_for_ms_ = total_hbm_size_ * 15 / 16; // reserved memory is 1/16 of total
|
||||||
|
auto context_mem = GetDeviceMemSizeFromContext();
|
||||||
|
device_mem_size_ = context_mem == 0 ? max_hbm_size_for_ms_ : context_mem;
|
||||||
|
device_mem_base_ = MallocFromRts(device_mem_size_);
|
||||||
|
static_mem_offset_ = device_mem_size_;
|
||||||
|
cur_dynamic_mem_offset_ = 0;
|
||||||
|
max_dynamic_mem_offset_ = 0;
|
||||||
|
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
|
||||||
|
initialized_ = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AscendMemAdapter::DeInitialize() {
|
||||||
|
if (!initialized_) {
|
||||||
|
MS_LOG(ERROR) << " DeInitialize Ascend Memory Adapter when it is not initialize";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ret = FreeToRts(device_mem_base_);
|
||||||
|
if (ret) {
|
||||||
|
total_hbm_size_ = 0;
|
||||||
|
max_hbm_size_for_ms_ = 0;
|
||||||
|
device_mem_base_ = nullptr;
|
||||||
|
device_mem_size_ = 0;
|
||||||
|
|
||||||
|
cur_dynamic_mem_offset_ = 0;
|
||||||
|
max_dynamic_mem_offset_ = 0;
|
||||||
|
dynamic_memory_block_list_.clear();
|
||||||
|
|
||||||
|
static_mem_offset_ = 0;
|
||||||
|
static_memory_block_list_.clear();
|
||||||
|
|
||||||
|
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, statistics:" << DevMemStatistics();
|
||||||
|
initialized_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) {
|
||||||
|
std::lock_guard<std::mutex> locker(mutex_);
|
||||||
|
auto new_static_offset = static_mem_offset_ - size;
|
||||||
|
if (new_static_offset < max_dynamic_mem_offset_) {
|
||||||
|
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
|
||||||
|
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||||
|
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||||
|
MS_LOG(ERROR) << DevMemDetailInfo();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto memory_block_ptr = device_mem_base_ + new_static_offset;
|
||||||
|
static_mem_offset_ = new_static_offset;
|
||||||
|
static_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
|
||||||
|
|
||||||
|
return memory_block_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) {
|
||||||
|
std::lock_guard<std::mutex> locker(mutex_);
|
||||||
|
auto new_dynamic_offset = cur_dynamic_mem_offset_ + size;
|
||||||
|
if (new_dynamic_offset > static_mem_offset_) {
|
||||||
|
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
|
||||||
|
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||||
|
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||||
|
MS_LOG(ERROR) << DevMemDetailInfo();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto memory_block_ptr = device_mem_base_ + cur_dynamic_mem_offset_;
|
||||||
|
cur_dynamic_mem_offset_ = new_dynamic_offset;
|
||||||
|
max_dynamic_mem_offset_ = std::max(cur_dynamic_mem_offset_, max_dynamic_mem_offset_);
|
||||||
|
dynamic_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
|
||||||
|
|
||||||
|
return memory_block_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; }
|
||||||
|
|
||||||
|
std::string AscendMemAdapter::DevMemStatistics() {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "\nHBM memory size: " << total_hbm_size_;
|
||||||
|
oss << "\nAvailable HBM memory size for MS: " << max_hbm_size_for_ms_;
|
||||||
|
oss << "\nMS memory base size: " << device_mem_size_;
|
||||||
|
oss << "\nMS memory base address: " << reinterpret_cast<void *>(device_mem_base_);
|
||||||
|
oss << "\nStatic Memory size: " << device_mem_size_ - static_mem_offset_;
|
||||||
|
oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_;
|
||||||
|
oss << "\nMAX Dynamic memory size of all graph: " << max_dynamic_mem_offset_;
|
||||||
|
oss << "\nMS Static memory offset: " << static_mem_offset_;
|
||||||
|
oss << std::endl;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string AscendMemAdapter::DevMemDetailInfo() {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "\nMemory Detail Info:";
|
||||||
|
oss << "\nStatic Memory Blocks:";
|
||||||
|
oss << "\nAddress \t Size \t tag \t";
|
||||||
|
for (const auto &blk : static_memory_block_list_) {
|
||||||
|
oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
oss << "\nDynamic Memory Blocks:";
|
||||||
|
oss << "\nAddress \t Size \t tag \t";
|
||||||
|
for (const auto &blk : dynamic_memory_block_list_) {
|
||||||
|
oss << "\n" << blk->mem_ptr << "\t" << blk->mem_size << "\t" << blk->mem_tag;
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AscendMemAdapter::GetDeviceMemSizeFromContext() {
|
||||||
|
auto context = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context);
|
||||||
|
auto variable_memory_max_size = context->get_param<std::string>(MS_CTX_VARIABLE_MEMORY_MAX_SIZE);
|
||||||
|
if (variable_memory_max_size == "0") {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
|
||||||
|
auto pos = variable_memory_max_size.find('*');
|
||||||
|
if (pos == std::string::npos) {
|
||||||
|
MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
|
||||||
|
}
|
||||||
|
auto gb_str = variable_memory_max_size.substr(0, pos);
|
||||||
|
auto gb_var = std::stoull(gb_str);
|
||||||
|
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
|
||||||
|
|
||||||
|
auto max_hbm_size_for_ms_GB = max_hbm_size_for_ms_ >> kMemSizeGB;
|
||||||
|
if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) {
|
||||||
|
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (total_hbm_size_ >> kMemSizeGB)
|
||||||
|
<< " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB
|
||||||
|
<< "]GB, but got " << gb_var
|
||||||
|
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
|
||||||
|
}
|
||||||
|
return gb_var << kMemSizeGB;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *AscendMemAdapter::MallocFromRts(size_t size) {
|
||||||
|
uint8_t *ptr = nullptr;
|
||||||
|
auto ret = rtMalloc(reinterpret_cast<void **>(&ptr), size, RT_MEMORY_HBM);
|
||||||
|
if (ret != ACL_RT_SUCCESS) {
|
||||||
|
if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) {
|
||||||
|
auto context_ptr = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||||
|
size_t free = 0;
|
||||||
|
size_t total = 0;
|
||||||
|
(void)rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total);
|
||||||
|
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << size << "], ret[" << ret << "], "
|
||||||
|
<< "Device " << device_id << " Available HBM size:" << total << " free size:" << free
|
||||||
|
<< " may be other processes occupying this card, check as: ps -ef|grep python";
|
||||||
|
} else {
|
||||||
|
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << size << "] fail, ret[" << ret << "]";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << size
|
||||||
|
<< " bytes , address : " << reinterpret_cast<void *>(ptr);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AscendMemAdapter::FreeToRts(void *devPtr) {
|
||||||
|
if (devPtr != nullptr) {
|
||||||
|
auto ret = rtFree(devPtr);
|
||||||
|
if (ret != RT_ERROR_NONE) {
|
||||||
|
MS_LOG(ERROR) << "rtFree mem [" << devPtr << "] fail, ret[" << ret << "]";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ascend
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,93 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
|
||||||
|
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "utils/ms_context.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace ascend {
|
||||||
|
|
||||||
|
class AscendMemAdapter {
|
||||||
|
public:
|
||||||
|
static AscendMemAdapter &GetInstance() {
|
||||||
|
static AscendMemAdapter instance;
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Initialize();
|
||||||
|
bool DeInitialize();
|
||||||
|
|
||||||
|
uint8_t *MallocStaticDevMem(size_t size, std::string tag = "");
|
||||||
|
uint8_t *MallocDynamicDevMem(size_t size, std::string tag = "");
|
||||||
|
bool FreeStaticDevMem(void *devPtr) { return true; }
|
||||||
|
void ResetDynamicMemory();
|
||||||
|
|
||||||
|
uint64_t FreeDevMemSize() { return static_mem_offset_ - max_dynamic_mem_offset_; }
|
||||||
|
uint64_t TotalDevMemSize() { return device_mem_size_; }
|
||||||
|
uint64_t MaxHbmSizeForMs() { return max_hbm_size_for_ms_; }
|
||||||
|
|
||||||
|
std::string DevMemStatistics();
|
||||||
|
std::string DevMemDetailInfo();
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct MemoryBlock {
|
||||||
|
MemoryBlock(void *ptr, const size_t size, const std::string &tag) {
|
||||||
|
mem_ptr = ptr;
|
||||||
|
mem_size = size;
|
||||||
|
mem_tag = tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *mem_ptr{nullptr};
|
||||||
|
size_t mem_size{0};
|
||||||
|
std::string mem_tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint8_t *MallocFromRts(size_t size);
|
||||||
|
bool FreeToRts(void *devPtr);
|
||||||
|
size_t GetDeviceMemSizeFromContext();
|
||||||
|
|
||||||
|
bool initialized_{false};
|
||||||
|
|
||||||
|
// Support multi-thread.
|
||||||
|
std::mutex mutex_;
|
||||||
|
|
||||||
|
// rts Memory INFO
|
||||||
|
size_t total_hbm_size_{0};
|
||||||
|
size_t max_hbm_size_for_ms_{0};
|
||||||
|
uint8_t *device_mem_base_{nullptr};
|
||||||
|
uint64_t device_mem_size_{0};
|
||||||
|
|
||||||
|
// dynamic memory info
|
||||||
|
uint64_t cur_dynamic_mem_offset_{0};
|
||||||
|
uint64_t max_dynamic_mem_offset_{0};
|
||||||
|
std::vector<std::shared_ptr<MemoryBlock>> dynamic_memory_block_list_;
|
||||||
|
|
||||||
|
// static memory info
|
||||||
|
uint64_t static_mem_offset_{0};
|
||||||
|
std::vector<std::shared_ptr<MemoryBlock>> static_memory_block_list_;
|
||||||
|
};
|
||||||
|
} // namespace ascend
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
|
|
@ -16,6 +16,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "runtime/device/ascend/ascend_memory_manager.h"
|
#include "runtime/device/ascend/ascend_memory_manager.h"
|
||||||
#include "runtime/device/ascend/ascend_memory_pool.h"
|
#include "runtime/device/ascend/ascend_memory_pool.h"
|
||||||
|
#include "runtime/device/ascend/ascend_memory_adapter.h"
|
||||||
#include "utils/ms_context.h"
|
#include "utils/ms_context.h"
|
||||||
#include "runtime/mem.h"
|
#include "runtime/mem.h"
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
|
@ -29,100 +30,17 @@ using mindspore::profiler::ascend::MemoryProfiling;
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
namespace {
|
|
||||||
constexpr uint64_t kAscendInitDeviceMemGB = 30;
|
|
||||||
constexpr uint64_t kMemSizeGB = 30;
|
|
||||||
constexpr uint64_t kAscendDeviceMemSize = (kAscendInitDeviceMemGB << kMemSizeGB);
|
|
||||||
|
|
||||||
uint64_t GetDeviceHBMSize() {
|
void AscendMemoryManager::MallocDeviceMemory() { (void)AscendMemAdapter::GetInstance().Initialize(); }
|
||||||
size_t free = 0;
|
|
||||||
size_t total = 0;
|
|
||||||
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free, &total);
|
|
||||||
if (ret != RT_ERROR_NONE || total == 0) {
|
|
||||||
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total = " << total;
|
|
||||||
}
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t GetDefaultDeviceMemSize() {
|
void AscendMemoryManager::FreeDeviceMemory() { (void)AscendMemAdapter::GetInstance().DeInitialize(); }
|
||||||
auto total = GetDeviceHBMSize();
|
|
||||||
auto ret = total * 15 / 16; // reserved memory is 1/16 of total
|
|
||||||
MS_LOG(INFO) << "The Device HBM memory size is " << total << ", allocate " << ret << " for backend.";
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void AscendMemoryManager::MallocDeviceMemory() {
|
void AscendMemoryManager::ResetDynamicMemory() { (void)AscendMemAdapter::GetInstance().ResetDynamicMemory(); }
|
||||||
auto context_mem = GetDeviceMemSizeFromContext();
|
|
||||||
device_mem_size_ = context_mem == 0 ? GetDefaultDeviceMemSize() : context_mem;
|
|
||||||
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), device_mem_size_, RT_MEMORY_HBM);
|
|
||||||
if (ret != ACL_RT_SUCCESS) {
|
|
||||||
if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) {
|
|
||||||
auto context_ptr = MsContext::GetInstance();
|
|
||||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
|
||||||
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
|
||||||
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "], "
|
|
||||||
<< "Device " << device_id
|
|
||||||
<< " may be other processes occupying this card, check as: ps -ef|grep python";
|
|
||||||
} else {
|
|
||||||
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
MS_LOG(INFO) << "Call rtMalloc to allocate device memory Success, size : " << device_mem_size_
|
|
||||||
<< " bytes , address : " << reinterpret_cast<void *>(device_mem_base_);
|
|
||||||
}
|
|
||||||
AscendMemoryPool::GetInstance().Init(device_mem_base_, device_mem_size_, dynamic_mem_offset_);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t AscendMemoryManager::GetDeviceMemSize() {
|
|
||||||
auto mem_size = GetDeviceMemSizeFromContext();
|
|
||||||
return mem_size == 0 ? GetDefaultDeviceMemSize() : mem_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
|
|
||||||
auto context = MsContext::GetInstance();
|
|
||||||
MS_EXCEPTION_IF_NULL(context);
|
|
||||||
auto variable_memory_max_size = context->get_param<std::string>(MS_CTX_VARIABLE_MEMORY_MAX_SIZE);
|
|
||||||
if (variable_memory_max_size == "0") {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
MS_LOG(INFO) << "context variable_memory_max_size:" << variable_memory_max_size;
|
|
||||||
auto pos = variable_memory_max_size.find('*');
|
|
||||||
if (pos == std::string::npos) {
|
|
||||||
MS_LOG(EXCEPTION) << "Invalid variable_memory_max_size";
|
|
||||||
}
|
|
||||||
auto gb_str = variable_memory_max_size.substr(0, pos);
|
|
||||||
auto gb_var = std::stoull(gb_str);
|
|
||||||
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
|
|
||||||
auto total_hbm_size_GB = GetDeviceHBMSize() >> kMemSizeGB;
|
|
||||||
auto backend_max_size_GB = total_hbm_size_GB - 1; // reserved 1 GB for other component
|
|
||||||
if (gb_var > backend_max_size_GB || gb_var == 0) {
|
|
||||||
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB
|
|
||||||
<< " GB, variable_memory_max_size should be in range (0-" << backend_max_size_GB
|
|
||||||
<< "]GB, but got " << gb_var
|
|
||||||
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
|
|
||||||
}
|
|
||||||
return gb_var << kMemSizeGB;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryManager::FreeDeviceMemory() {
|
|
||||||
if (device_mem_base_ != nullptr) {
|
|
||||||
auto ret = rtFree(device_mem_base_);
|
|
||||||
if (ret != RT_ERROR_NONE) {
|
|
||||||
MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
|
|
||||||
}
|
|
||||||
device_mem_base_ = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryManager::ResetDynamicMemory() {
|
|
||||||
total_dynamic_size_ = 0;
|
|
||||||
dynamic_mem_offset_ = 0;
|
|
||||||
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryManager::ClearGlobalIdleMem() { AscendMemoryPool::GetInstance().ResetIdleMemBuf(); }
|
void AscendMemoryManager::ClearGlobalIdleMem() { AscendMemoryPool::GetInstance().ResetIdleMemBuf(); }
|
||||||
|
|
||||||
|
uint64_t AscendMemoryManager::GetMsMaxMemSize() { return AscendMemAdapter::GetInstance().MaxHbmSizeForMs(); }
|
||||||
|
|
||||||
void *AscendMemoryManager::MallocDevice(size_t size) {
|
void *AscendMemoryManager::MallocDevice(size_t size) {
|
||||||
auto align_size = GetCommonAlignSize(size);
|
auto align_size = GetCommonAlignSize(size);
|
||||||
return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
||||||
|
@ -146,12 +64,8 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
||||||
} else {
|
} else {
|
||||||
align_size = GetCommonAlignSize(size);
|
align_size = GetCommonAlignSize(size);
|
||||||
}
|
}
|
||||||
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
|
MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "] communication_mem:" << communication_mem;
|
||||||
MS_LOG(INFO) << "Malloc Memory for Static: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
|
|
||||||
<< "] dynamic [" << total_dynamic_size_ << "] static [" << device_mem_size_ - device_mem_pool_offset
|
|
||||||
<< "], Pool statistics: pool total size [" << AscendMemoryPool::GetInstance().total_mem_statistics()
|
|
||||||
<< "] used [" << AscendMemoryPool::GetInstance().used_mem_statistics()
|
|
||||||
<< "] communication_mem:" << communication_mem;
|
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable() && graph_id != kInvalidGraphId) {
|
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable() && graph_id != kInvalidGraphId) {
|
||||||
auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
|
auto node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
|
||||||
|
@ -163,16 +77,11 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
||||||
node->AddStaticMemorySize(SizeToUint(align_size));
|
node->AddStaticMemorySize(SizeToUint(align_size));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (communication_mem) {
|
|
||||||
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
|
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||||
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
MS_EXCEPTION_IF_NULL(alloc_address);
|
||||||
MS_EXCEPTION_IF_NULL(alloc_address);
|
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
||||||
return alloc_address + kMemAlignSize;
|
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
|
||||||
} else {
|
|
||||||
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
|
||||||
MS_EXCEPTION_IF_NULL(alloc_address);
|
|
||||||
return alloc_address;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
|
uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
|
||||||
|
@ -182,29 +91,12 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
|
||||||
} else {
|
} else {
|
||||||
align_size = GetCommonAlignSize(size);
|
align_size = GetCommonAlignSize(size);
|
||||||
}
|
}
|
||||||
|
MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "] communication_mem: " << communication_mem;
|
||||||
|
|
||||||
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
|
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemAdapter::GetInstance().MallocDynamicDevMem(align_size));
|
||||||
MS_LOG(INFO) << "Malloc Memory for Dynamic: size[" << align_size << "], Memory statistics: total[" << device_mem_size_
|
MS_EXCEPTION_IF_NULL(alloc_address);
|
||||||
<< "] dynamic[" << total_dynamic_size_ << "] static[" << device_mem_size_ - device_mem_pool_offset
|
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
||||||
<< "] communication_mem: " << communication_mem;
|
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
|
||||||
auto offset = dynamic_mem_offset_;
|
|
||||||
auto new_offset = dynamic_mem_offset_ + align_size;
|
|
||||||
if (new_offset >= device_mem_pool_offset) {
|
|
||||||
MS_LOG(EXCEPTION) << "Out of Memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
|
|
||||||
<< "] memory pool[" << device_mem_size_ - device_mem_pool_offset << "])"
|
|
||||||
<< " malloc [" << align_size
|
|
||||||
<< "] failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
|
||||||
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
|
||||||
}
|
|
||||||
total_dynamic_size_ += align_size;
|
|
||||||
dynamic_mem_offset_ = new_offset;
|
|
||||||
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
|
|
||||||
if (communication_mem) {
|
|
||||||
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
|
|
||||||
return device_mem_base_ + offset + kMemAlignSize;
|
|
||||||
} else {
|
|
||||||
return device_mem_base_ + offset;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
|
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "runtime/device/memory_manager.h"
|
#include "runtime/device/memory_manager.h"
|
||||||
#include "graphengine/inc/external/runtime/rt_error_codes.h"
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
|
@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager {
|
||||||
void *MallocMemFromMemPool(size_t size) override;
|
void *MallocMemFromMemPool(size_t size) override;
|
||||||
void *MallocDevice(size_t size) override;
|
void *MallocDevice(size_t size) override;
|
||||||
void FreeMemFromMemPool(void *device_ptr) override;
|
void FreeMemFromMemPool(void *device_ptr) override;
|
||||||
uint64_t GetDeviceMemSize();
|
uint64_t GetMsMaxMemSize();
|
||||||
void MallocSomasDynamicMem(const session::KernelGraph &graph) override;
|
void MallocSomasDynamicMem(const session::KernelGraph &graph) override;
|
||||||
uint8_t *MallocCommunicationMemFromMemPool(size_t size) override;
|
uint8_t *MallocCommunicationMemFromMemPool(size_t size) override;
|
||||||
std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) override {
|
std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) override {
|
||||||
|
@ -49,12 +49,6 @@ class AscendMemoryManager : public MemoryManager {
|
||||||
protected:
|
protected:
|
||||||
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
|
uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) override;
|
||||||
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
|
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
|
||||||
|
|
||||||
private:
|
|
||||||
uint8_t *device_mem_pool_base_{nullptr};
|
|
||||||
uint64_t device_mem_pool_size_{0};
|
|
||||||
|
|
||||||
uint64_t GetDeviceMemSizeFromContext();
|
|
||||||
};
|
};
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace device
|
} // namespace device
|
||||||
|
|
|
@ -16,8 +16,8 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "runtime/device/ascend/ascend_memory_pool.h"
|
#include "runtime/device/ascend/ascend_memory_pool.h"
|
||||||
|
#include "runtime/device/ascend/ascend_memory_adapter.h"
|
||||||
#include "runtime/mem.h"
|
#include "runtime/mem.h"
|
||||||
#include "runtime/device/ascend/ascend_kernel_runtime.h"
|
|
||||||
#include "utils/log_adapter.h"
|
#include "utils/log_adapter.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -28,31 +28,11 @@ static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE = 256 << 20;
|
||||||
// The minimum unit size (8MB) of memory block used for dynamic extend in graph mode.
|
// The minimum unit size (8MB) of memory block used for dynamic extend in graph mode.
|
||||||
static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20;
|
static const size_t ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20;
|
||||||
|
|
||||||
void AscendMemoryPool::Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset) {
|
|
||||||
static bool initialized = false;
|
|
||||||
if (initialized) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
MS_EXCEPTION_IF_NULL(device_mem_base);
|
|
||||||
set_device_mem_pool_base(device_mem_base);
|
|
||||||
|
|
||||||
if (dynamic_mem_offset > device_mem_size) {
|
|
||||||
MS_LOG(EXCEPTION) << "Dynamic memory offset: " << dynamic_mem_offset
|
|
||||||
<< " exceed the device memory size: " << device_mem_size;
|
|
||||||
}
|
|
||||||
set_device_mem_size(device_mem_size);
|
|
||||||
set_device_mem_pool_offset(device_mem_size);
|
|
||||||
set_graph_dynamic_mem_offset(dynamic_mem_offset);
|
|
||||||
initialized = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
|
size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
|
||||||
auto device_free_mem_size = free_mem_size();
|
auto device_free_mem_size = free_mem_size();
|
||||||
if (device_free_mem_size < size) {
|
if (device_free_mem_size < size) {
|
||||||
MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
|
MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
|
||||||
<< "] is smaller than required size[" << size << "], dynamic offset [" << graph_dynamic_mem_offset_
|
<< "] is smaller than required size[" << size << "]";
|
||||||
<< "] memory pool offset[" << device_mem_size_ - device_mem_pool_offset_ << "])";
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
auto alloc_mem_size = ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE;
|
auto alloc_mem_size = ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE;
|
||||||
|
@ -76,23 +56,12 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
||||||
MS_LOG(INFO) << "Malloc Memory: Pool, total[" << device_mem_size_ << "] (dynamic[" << graph_dynamic_mem_offset_
|
MS_LOG(INFO) << "Malloc Memory for Pool, size: " << size;
|
||||||
<< "] memory pool[" << device_mem_size_ - device_mem_pool_offset_ << "])"
|
|
||||||
<< " malloc [" << size << "]";
|
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
MS_LOG(EXCEPTION) << "Failed to alloc memory pool resource, the size is zero!";
|
MS_LOG(EXCEPTION) << "Failed to alloc memory pool resource, the size is zero!";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_mem_pool_offset_ - size < graph_dynamic_mem_offset_) {
|
*addr = AscendMemAdapter::GetInstance().MallocStaticDevMem(size);
|
||||||
MS_LOG(EXCEPTION) << "Out of Memory!!! Failed to alloc memory pool memory, the current device_mem_pool_offset_ ["
|
|
||||||
<< device_mem_pool_offset_ << "], current graph_dynamic_mem_offset_ " << graph_dynamic_mem_offset_
|
|
||||||
<< "], need memory size [" << size
|
|
||||||
<< "]. Please try to reduce 'batch_size' or check whether exists extra large shape. More details "
|
|
||||||
"can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
|
||||||
}
|
|
||||||
device_mem_pool_offset_ -= size;
|
|
||||||
*addr = device_mem_pool_base_ + device_mem_pool_offset_;
|
|
||||||
if (*addr == nullptr) {
|
if (*addr == nullptr) {
|
||||||
MS_LOG(EXCEPTION) << "Alloc device memory pool address is nullptr, failed to alloc memory pool resource!";
|
MS_LOG(EXCEPTION) << "Alloc device memory pool address is nullptr, failed to alloc memory pool resource!";
|
||||||
}
|
}
|
||||||
|
@ -101,7 +70,7 @@ size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
||||||
|
|
||||||
bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
|
bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
|
||||||
MS_EXCEPTION_IF_NULL(addr);
|
MS_EXCEPTION_IF_NULL(addr);
|
||||||
return true;
|
return AscendMemAdapter::GetInstance().FreeStaticDevMem(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AscendMemoryPool::ResetIdleMemBuf() {
|
void AscendMemoryPool::ResetIdleMemBuf() {
|
||||||
|
@ -112,39 +81,7 @@ void AscendMemoryPool::ResetIdleMemBuf() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
|
size_t AscendMemoryPool::free_mem_size() { return AscendMemAdapter::GetInstance().FreeDevMemSize(); }
|
||||||
if (size == 0) {
|
|
||||||
MS_LOG(EXCEPTION) << "The align memory size is a zero !";
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryPool::set_device_mem_pool_base(uint8_t *device_mem_pool_base) {
|
|
||||||
MS_EXCEPTION_IF_NULL(device_mem_pool_base);
|
|
||||||
device_mem_pool_base_ = device_mem_pool_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryPool::set_device_mem_size(uint64_t device_mem_size) { device_mem_size_ = device_mem_size; }
|
|
||||||
|
|
||||||
void AscendMemoryPool::set_device_mem_pool_offset(uint64_t device_mem_pool_offset) {
|
|
||||||
device_mem_pool_offset_ = device_mem_pool_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AscendMemoryPool::set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset) {
|
|
||||||
graph_dynamic_mem_offset_ = graph_dynamic_mem_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t AscendMemoryPool::device_mem_pool_offset() const { return device_mem_pool_offset_; }
|
|
||||||
|
|
||||||
size_t AscendMemoryPool::free_mem_size() {
|
|
||||||
if (graph_dynamic_mem_offset_ >= device_mem_pool_offset_) {
|
|
||||||
MS_LOG(EXCEPTION) << "graph dynamic mem offset [" << graph_dynamic_mem_offset_
|
|
||||||
<< "] less than or equal to device mem pool offset [" << device_mem_pool_offset_ << "]!";
|
|
||||||
}
|
|
||||||
return device_mem_pool_offset_ - graph_dynamic_mem_offset_;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AscendMemoryPool::total_mem_size() { return device_mem_size_ - graph_dynamic_mem_offset_; }
|
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -29,18 +29,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
|
||||||
AscendMemoryPool(const AscendMemoryPool &) = delete;
|
AscendMemoryPool(const AscendMemoryPool &) = delete;
|
||||||
AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
|
AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
|
||||||
|
|
||||||
void Init(uint8_t *device_mem_base, uint64_t device_mem_size, uint64_t dynamic_mem_offset);
|
|
||||||
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
||||||
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
||||||
void ResetIdleMemBuf();
|
|
||||||
void set_device_mem_size(uint64_t device_mem_size);
|
|
||||||
void set_device_mem_pool_base(uint8_t *device_mem_pool_base);
|
|
||||||
void set_device_mem_pool_offset(uint64_t device_mem_pool_offset);
|
|
||||||
void set_graph_dynamic_mem_offset(uint64_t graph_dynamic_mem_offset);
|
|
||||||
|
|
||||||
uint64_t device_mem_pool_offset() const;
|
|
||||||
size_t free_mem_size() override;
|
size_t free_mem_size() override;
|
||||||
size_t total_mem_size() override;
|
|
||||||
|
void ResetIdleMemBuf();
|
||||||
|
|
||||||
static AscendMemoryPool &GetInstance() {
|
static AscendMemoryPool &GetInstance() {
|
||||||
static AscendMemoryPool instance;
|
static AscendMemoryPool instance;
|
||||||
|
@ -48,17 +41,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// The real size by memory alloc aligned.
|
|
||||||
size_t AlignMemorySize(size_t size) const override;
|
|
||||||
// Calculate memory block required alloc size when adding the memory block.
|
// Calculate memory block required alloc size when adding the memory block.
|
||||||
size_t CalMemBlockAllocSize(size_t size) override;
|
size_t CalMemBlockAllocSize(size_t size) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
AscendMemoryPool() = default;
|
AscendMemoryPool() = default;
|
||||||
uint8_t *device_mem_pool_base_{nullptr};
|
|
||||||
uint64_t device_mem_size_{0};
|
|
||||||
uint64_t device_mem_pool_offset_{0};
|
|
||||||
uint64_t graph_dynamic_mem_offset_{0};
|
|
||||||
};
|
};
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace device
|
} // namespace device
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace gpu {
|
||||||
const size_t kGBToByte = 1024 << 20;
|
const size_t kGBToByte = 1024 << 20;
|
||||||
|
|
||||||
bool GPUMemoryAllocator::Init() {
|
bool GPUMemoryAllocator::Init() {
|
||||||
size_t total_size = total_mem_size();
|
size_t total_size = CudaDriver::total_mem_size();
|
||||||
size_t free_size = CudaDriver::free_mem_size();
|
size_t free_size = CudaDriver::free_mem_size();
|
||||||
auto context_ptr = MsContext::GetInstance();
|
auto context_ptr = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
@ -98,8 +98,6 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
||||||
bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); }
|
bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); }
|
||||||
|
|
||||||
size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); }
|
size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); }
|
||||||
|
|
||||||
size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); }
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -35,7 +35,6 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit {
|
||||||
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
||||||
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
||||||
size_t free_mem_size() override;
|
size_t free_mem_size() override;
|
||||||
size_t total_mem_size() override;
|
|
||||||
|
|
||||||
static GPUMemoryAllocator &GetInstance() {
|
static GPUMemoryAllocator &GetInstance() {
|
||||||
static GPUMemoryAllocator instance;
|
static GPUMemoryAllocator instance;
|
||||||
|
|
|
@ -73,61 +73,25 @@ bool GPUMemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList
|
||||||
void GPUMemoryManager::MallocDeviceMemory() {
|
void GPUMemoryManager::MallocDeviceMemory() {
|
||||||
auto context_ptr = MsContext::GetInstance();
|
auto context_ptr = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
// If use the dynamic memory pool, then alloc the first memory block to init.
|
if (ps::ps_cache_instance.initialized_ps_cache()) {
|
||||||
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) {
|
return;
|
||||||
if (ps::ps_cache_instance.initialized_ps_cache()) {
|
}
|
||||||
return;
|
auto device_addr = MallocMemFromMemPool(1);
|
||||||
}
|
if (!device_addr) {
|
||||||
auto device_addr = MallocMemFromMemPool(1);
|
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
|
||||||
if (!device_addr) {
|
|
||||||
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Need to reserve 20% space for dynamic memory
|
|
||||||
const float init_gpu_mem_ratio = 0.8;
|
|
||||||
size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
|
|
||||||
auto alloc_size =
|
|
||||||
GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
|
|
||||||
device_mem_size_ = alloc_size;
|
|
||||||
static_mem_offset_ = device_mem_size_;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUMemoryManager::FreeDeviceMemory() {
|
void GPUMemoryManager::FreeDeviceMemory() { GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); }
|
||||||
if (device_mem_base_ != nullptr) {
|
|
||||||
if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
|
|
||||||
MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
|
uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
|
||||||
auto context_ptr = MsContext::GetInstance();
|
auto context_ptr = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL)) {
|
auto device_ptr = MallocMemFromMemPool(size);
|
||||||
auto device_ptr = MallocMemFromMemPool(size);
|
if (device_ptr == nullptr) {
|
||||||
if (device_ptr == nullptr) {
|
MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size;
|
||||||
MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << size;
|
|
||||||
}
|
|
||||||
return AddressOffset(device_ptr, 0);
|
|
||||||
}
|
}
|
||||||
|
return AddressOffset(device_ptr, 0);
|
||||||
auto align_size = GetCommonAlignSize(size);
|
|
||||||
if (static_mem_offset_ < align_size) {
|
|
||||||
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
|
|
||||||
<< "] static[" << total_static_size_ << "])"
|
|
||||||
<< " malloc [" << align_size << "] failed!";
|
|
||||||
}
|
|
||||||
auto offset = static_mem_offset_ - align_size;
|
|
||||||
if (dynamic_mem_offset_ > offset) {
|
|
||||||
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
|
|
||||||
<< "] static[" << total_static_size_ << "])"
|
|
||||||
<< " malloc [" << align_size << "] failed!";
|
|
||||||
}
|
|
||||||
total_static_size_ += align_size;
|
|
||||||
static_mem_offset_ = offset;
|
|
||||||
return device_mem_base_ + offset;
|
|
||||||
}
|
}
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace device
|
} // namespace device
|
||||||
|
|
|
@ -39,10 +39,7 @@ class MemoryManager : public MemHandler {
|
||||||
|
|
||||||
virtual void MallocDeviceMemory() = 0;
|
virtual void MallocDeviceMemory() = 0;
|
||||||
virtual void FreeDeviceMemory() = 0;
|
virtual void FreeDeviceMemory() = 0;
|
||||||
virtual void ResetDynamicMemory() {
|
virtual void ResetDynamicMemory() {}
|
||||||
total_dynamic_size_ = 0;
|
|
||||||
dynamic_mem_offset_ = 0;
|
|
||||||
}
|
|
||||||
virtual void ClearGlobalIdleMem() {}
|
virtual void ClearGlobalIdleMem() {}
|
||||||
|
|
||||||
virtual void MallocSomasDynamicMem(const session::KernelGraph &graph);
|
virtual void MallocSomasDynamicMem(const session::KernelGraph &graph);
|
||||||
|
@ -110,12 +107,6 @@ class MemoryManager : public MemHandler {
|
||||||
protected:
|
protected:
|
||||||
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) = 0;
|
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id = kInvalidGraphId) = 0;
|
||||||
virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
|
virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
|
||||||
uint8_t *device_mem_base_{nullptr};
|
|
||||||
uint64_t device_mem_size_{0};
|
|
||||||
uint64_t dynamic_mem_offset_{0};
|
|
||||||
uint64_t static_mem_offset_{0};
|
|
||||||
size_t total_static_size_ = 0;
|
|
||||||
size_t total_dynamic_size_ = 0;
|
|
||||||
SomasPtr somas_reuse_util_ptr_{nullptr};
|
SomasPtr somas_reuse_util_ptr_{nullptr};
|
||||||
std::map<size_t, std::queue<void *>> cached_host_mem_;
|
std::map<size_t, std::queue<void *>> cached_host_mem_;
|
||||||
std::map<void *, std::shared_ptr<std::vector<uint8_t>>> host_mem_block_map_;
|
std::map<void *, std::shared_ptr<std::vector<uint8_t>>> host_mem_block_map_;
|
||||||
|
|
|
@ -90,8 +90,6 @@ bool CPUMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CPUMemoryPool::free_mem_size() { return GetSystemMemorySize("MemAvailable"); }
|
size_t CPUMemoryPool::free_mem_size() { return GetSystemMemorySize("MemAvailable"); }
|
||||||
|
|
||||||
size_t CPUMemoryPool::total_mem_size() { return GetSystemMemorySize("MemTotal"); }
|
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -36,7 +36,6 @@ class CPUMemoryPool : public DynamicMemPoolBestFit {
|
||||||
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
||||||
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
||||||
size_t free_mem_size() override;
|
size_t free_mem_size() override;
|
||||||
size_t total_mem_size() override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CPUMemoryPool() = default;
|
CPUMemoryPool() = default;
|
||||||
|
|
|
@ -130,6 +130,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_event.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_event.cc"
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc"
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc"
|
||||||
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_adapter.cc"
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc"
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc"
|
||||||
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"
|
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_memory_pool.cc"
|
||||||
|
|
Loading…
Reference in New Issue