forked from mindspore-Ecosystem/mindspore
add ascend mem pool
This commit is contained in:
parent
0565e4641e
commit
bef62db128
|
@ -262,8 +262,8 @@ AscendDeviceAddress::~AscendDeviceAddress() {
|
|||
if (ptr_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (mem_dynamic_alloc_) {
|
||||
AscendMemoryAllocator::GetInstance().FreeTensorMem(ptr_);
|
||||
if (from_mem_pool_) {
|
||||
AscendMemoryPool::GetInstance().FreeTensorMem(ptr_);
|
||||
ptr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include "device/device_address.h"
|
||||
#include "device/ascend/ascend_memory_allocator.h"
|
||||
#include "device/ascend/ascend_memory_pool.h"
|
||||
#include "ir/dtype.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "hccl/hcom.h"
|
||||
#include "runtime/context.h"
|
||||
#include "device/ascend/ascend_stream_assign.h"
|
||||
#include "device/ascend/ascend_memory_allocator.h"
|
||||
#include "device/ascend/ascend_memory_pool.h"
|
||||
#include "framework/ge_runtime/model_runner.h"
|
||||
#include "device/ascend/tasksink/task_generator.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
|
|
|
@ -15,29 +15,31 @@
|
|||
*/
|
||||
|
||||
#include "device/ascend/ascend_memory_manager.h"
|
||||
#include "device/ascend/ascend_memory_allocator.h"
|
||||
#include "device/ascend/ascend_memory_pool.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
#include "runtime/mem.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
static const uint64_t ASCEND_MEM_SIZE = 20;
|
||||
static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
|
||||
const uint64_t kAscendDeviceMemGB = 20;
|
||||
const uint64_t kAscendMemPoolGB = 5;
|
||||
const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30);
|
||||
const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30);
|
||||
|
||||
void AscendMemoryManager::MallocDeviceMemory() {
|
||||
device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
|
||||
static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
|
||||
device_mem_size_ = kAscendDeviceMemSize;
|
||||
static_mem_offset_ = device_mem_size_;
|
||||
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
|
||||
}
|
||||
device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
|
||||
device_mem_pool_size_ = kAscendMemPoolSize;
|
||||
ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
|
||||
}
|
||||
AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
|
||||
AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
|
||||
AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
|
||||
AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
|
||||
}
|
||||
|
||||
void AscendMemoryManager::FreeDeviceMemory() {
|
||||
|
@ -57,8 +59,8 @@ void AscendMemoryManager::FreeDeviceMemory() {
|
|||
}
|
||||
}
|
||||
|
||||
void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) {
|
||||
return AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
|
||||
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
||||
return AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
|
|
|
@ -27,7 +27,11 @@ class AscendMemoryManager : public MemoryManager {
|
|||
|
||||
void MallocDeviceMemory() override;
|
||||
void FreeDeviceMemory() override;
|
||||
void *AllocTensorMemDynamic(size_t size) override;
|
||||
void *MallocMemFromMemPool(size_t size) override;
|
||||
|
||||
private:
|
||||
uint8_t *device_mem_pool_base_{nullptr};
|
||||
uint64_t device_mem_pool_size_{0};
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
|
|
|
@ -14,24 +14,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/ascend/ascend_memory_allocator.h"
|
||||
#include "device/ascend/ascend_memory_pool.h"
|
||||
#include "device/ascend/ascend_kernel_runtime.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
const uint64_t MEM_SIZE = 20;
|
||||
const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30);
|
||||
|
||||
AscendMemoryAllocator::AscendMemoryAllocator() {
|
||||
hasMalloc_ = false;
|
||||
free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
|
||||
total_mem_size_ = free_mem_size_;
|
||||
}
|
||||
|
||||
size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
|
||||
if (hasMalloc_) {
|
||||
size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
|
||||
if (has_malloc_) {
|
||||
MS_LOG(EXCEPTION) << "Has alloc memory pool memory !";
|
||||
}
|
||||
if (size == 0 || size > free_mem_size_) {
|
||||
|
@ -41,35 +32,35 @@ size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) {
|
|||
if (*addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!";
|
||||
}
|
||||
hasMalloc_ = true;
|
||||
has_malloc_ = true;
|
||||
free_mem_size_ -= size;
|
||||
return size;
|
||||
}
|
||||
|
||||
bool AscendMemoryAllocator::FreeDeviceMem(const DeviceMemPtr& addr) {
|
||||
bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr& addr) {
|
||||
MS_EXCEPTION_IF_NULL(addr);
|
||||
hasMalloc_ = false;
|
||||
has_malloc_ = false;
|
||||
free_mem_size_ = total_mem_size_;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t AscendMemoryAllocator::AlignMemorySize(size_t size) const {
|
||||
size_t AscendMemoryPool::AlignMemorySize(size_t size) const {
|
||||
if (size == 0) {
|
||||
return DYNAMIC_MEM_ALIGN_SIZE;
|
||||
}
|
||||
return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE;
|
||||
}
|
||||
|
||||
size_t AscendMemoryAllocator::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
|
||||
size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; }
|
||||
|
||||
void AscendMemoryAllocator::set_device_mem_pool_base(uint8_t* device_mem_pool_base) {
|
||||
void AscendMemoryPool::set_device_mem_pool_base(uint8_t* device_mem_pool_base) {
|
||||
MS_EXCEPTION_IF_NULL(device_mem_pool_base);
|
||||
device_mem_pool_base_ = device_mem_pool_base;
|
||||
}
|
||||
|
||||
size_t AscendMemoryAllocator::free_mem_size() { return free_mem_size_; }
|
||||
size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; }
|
||||
|
||||
size_t AscendMemoryAllocator::total_mem_size() { return total_mem_size_; }
|
||||
size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; }
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
|
||||
#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
|
||||
#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
|
||||
#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
|
||||
|
||||
#include <memory>
|
||||
#include "pre_activate/mem_reuse/mem_dynamic_allocator.h"
|
||||
|
@ -23,22 +23,23 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
// The fraction of total ascend memory used to compute the graph.
|
||||
static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8;
|
||||
|
||||
class AscendMemoryAllocator : public DynamicMemPoolBestFit {
|
||||
class AscendMemoryPool : public DynamicMemPoolBestFit {
|
||||
public:
|
||||
~AscendMemoryAllocator() override = default;
|
||||
~AscendMemoryPool() override = default;
|
||||
|
||||
size_t AllocDeviceMem(size_t size, DeviceMemPtr* addr) override;
|
||||
bool FreeDeviceMem(const DeviceMemPtr& addr) override;
|
||||
void set_device_mem_pool_base(uint8_t* device_mem_pool_base);
|
||||
void set_device_mem_pool_size(uint64_t device_mem_pool_size) { device_mem_pool_size_ = device_mem_pool_size; }
|
||||
void set_device_mem_pool_size(uint64_t device_mem_pool_size) {
|
||||
device_mem_pool_size_ = device_mem_pool_size;
|
||||
free_mem_size_ = device_mem_pool_size_;
|
||||
total_mem_size_ = free_mem_size_;
|
||||
}
|
||||
size_t free_mem_size() override;
|
||||
size_t total_mem_size() override;
|
||||
|
||||
static AscendMemoryAllocator& GetInstance() {
|
||||
static AscendMemoryAllocator instance;
|
||||
static AscendMemoryPool& GetInstance() {
|
||||
static AscendMemoryPool instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
@ -49,10 +50,10 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit {
|
|||
size_t mem_alloc_unit_size() const override;
|
||||
|
||||
private:
|
||||
AscendMemoryAllocator();
|
||||
AscendMemoryAllocator(const AscendMemoryAllocator&) = delete;
|
||||
AscendMemoryAllocator& operator=(const AscendMemoryAllocator&) = delete;
|
||||
bool hasMalloc_;
|
||||
AscendMemoryPool() = default;
|
||||
AscendMemoryPool(const AscendMemoryPool&) = delete;
|
||||
AscendMemoryPool& operator=(const AscendMemoryPool&) = delete;
|
||||
bool has_malloc_{false};
|
||||
uint8_t* device_mem_pool_base_{nullptr};
|
||||
uint64_t device_mem_pool_size_{0};
|
||||
size_t free_mem_size_;
|
||||
|
@ -62,4 +63,4 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit {
|
|||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_
|
||||
#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
|
|
@ -70,7 +70,7 @@ class DeviceAddress {
|
|||
size_t ref_count_{0};
|
||||
string format_{"DefaultFormat"};
|
||||
TypeId type_id_{kNumberTypeFloat16};
|
||||
bool mem_dynamic_alloc_{false};
|
||||
bool from_mem_pool_{false};
|
||||
friend class KernelRuntime;
|
||||
friend class MemoryManager;
|
||||
friend class mindspore::device::ascend::tasksink::TaskGenerator;
|
||||
|
|
|
@ -46,7 +46,7 @@ GPUDeviceAddress::~GPUDeviceAddress() {
|
|||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (mem_dynamic_alloc_) {
|
||||
if (from_mem_pool_) {
|
||||
GPUMemoryAllocator::GetInstance().FreeTensorMem(ptr_);
|
||||
ptr_ = nullptr;
|
||||
}
|
||||
|
|
|
@ -227,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
|
|||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
auto device_ptr = device_address->ptr_;
|
||||
if (device_ptr == nullptr) {
|
||||
device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]);
|
||||
device_ptr = mem_manager_->MallocMemFromMemPool(output_sizes[i]);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
device_address->ptr_ = device_ptr;
|
||||
}
|
||||
|
@ -244,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
|
|||
kernel_workspaces->emplace_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]);
|
||||
auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(workspace);
|
||||
|
@ -292,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
|
|||
addr_size.emplace_back(device_address.get(), output_size);
|
||||
}
|
||||
|
||||
auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
|
||||
auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total);
|
||||
MS_EXCEPTION_IF_NULL(device_mem_ptr);
|
||||
for (const auto &iter : addr_size) {
|
||||
MS_EXCEPTION_IF_NULL(iter.first);
|
||||
|
@ -328,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
|
|||
addr_size.emplace_back(device_address.get(), output_sizes[i]);
|
||||
}
|
||||
|
||||
auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
|
||||
auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total);
|
||||
MS_EXCEPTION_IF_NULL(device_mem_ptr);
|
||||
for (const auto &iter : addr_size) {
|
||||
MS_EXCEPTION_IF_NULL(iter.first);
|
||||
|
@ -361,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
|
|||
auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
MS_EXCEPTION_IF_NULL(device_address->ptr_);
|
||||
mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
|
||||
mem_manager_->FreeMemFromMemPool(device_address->ptr_);
|
||||
device_address->ptr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -372,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
|
|||
auto workspace = kernel_workspaces[i];
|
||||
if (workspace != nullptr) {
|
||||
MS_EXCEPTION_IF_NULL(workspace->addr);
|
||||
mem_manager_->FreeTensorMemDynamic(workspace->addr);
|
||||
mem_manager_->FreeMemFromMemPool(workspace->addr);
|
||||
workspace->addr = nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -389,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
|
|||
auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
MS_EXCEPTION_IF_NULL(device_address->ptr_);
|
||||
mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
|
||||
mem_manager_->FreeMemFromMemPool(device_address->ptr_);
|
||||
device_address->ptr_ = nullptr;
|
||||
}
|
||||
*is_communication_op = true;
|
||||
|
@ -411,7 +411,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
|
|||
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
MS_EXCEPTION_IF_NULL(device_address->ptr_);
|
||||
mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
|
||||
mem_manager_->FreeMemFromMemPool(device_address->ptr_);
|
||||
device_address->ptr_ = nullptr;
|
||||
}
|
||||
*is_communication_op = true;
|
||||
|
|
|
@ -21,11 +21,11 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) {
|
||||
void *GPUMemoryManager::MallocMemFromMemPool(size_t size) {
|
||||
return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
|
||||
}
|
||||
|
||||
void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) {
|
||||
void GPUMemoryManager::FreeMemFromMemPool(void *device_ptr) {
|
||||
GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ void GPUMemoryManager::MallocDeviceMemory() {
|
|||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
// If use the dynamic memory pool, then alloc the first memory block to init.
|
||||
if (context_ptr->enable_dynamic_mem_pool()) {
|
||||
auto device_addr = AllocTensorMemDynamic(1);
|
||||
auto device_addr = MallocMemFromMemPool(1);
|
||||
if (!device_addr) {
|
||||
MS_LOG(ERROR) << "Dynamic memory pool init error.";
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
|
|||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->enable_dynamic_mem_pool()) {
|
||||
auto device_ptr = AllocTensorMemDynamic(size);
|
||||
auto device_ptr = MallocMemFromMemPool(size);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
return AddressOffset(device_ptr, 0);
|
||||
}
|
||||
|
|
|
@ -28,11 +28,11 @@ class GPUMemoryManager : public MemoryManager {
|
|||
void MallocDeviceMemory() override;
|
||||
void FreeDeviceMemory() override;
|
||||
|
||||
void *AllocTensorMemDynamic(size_t size) override;
|
||||
void FreeTensorMemDynamic(void *device_ptr) override;
|
||||
void *MallocMemFromMemPool(size_t size) override;
|
||||
void FreeMemFromMemPool(void *device_ptr) override;
|
||||
|
||||
protected:
|
||||
uint8_t *MallocStaticMem(size_t size, bool communication_mem);
|
||||
uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
|
|
|
@ -169,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
|
|||
auto device_address =
|
||||
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocOpMemory(device_address, tensor_size);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
|
||||
AnfAlgo::SetOutputAddr(device_address, index, item.get());
|
||||
}
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
|
|||
auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
|
||||
auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocOpMemory(device_address, output_sizes[i]);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
|
||||
AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
|
||||
}
|
||||
}
|
||||
|
@ -213,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
|
|||
for (size_t i = 0; i < workspace_lists.size(); ++i) {
|
||||
auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocOpMemory(device_address, workspace_lists[i]);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
|
||||
AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
|
||||
}
|
||||
}
|
||||
|
@ -457,7 +457,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
|
|||
bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
|
||||
auto mem_flag = kDynamicMem;
|
||||
if (is_enable_mem_reuse) {
|
||||
mem_manager_->InitReuseDynamicMemory(graph);
|
||||
mem_manager_->MallocReusedDynamicMem(graph);
|
||||
mem_flag = kReuseDynamicMem;
|
||||
}
|
||||
auto &kernels = graph->execution_order();
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#include "utils/context/ms_context.h"
|
||||
#include "device/memory_manager.h"
|
||||
|
||||
// using mindspore::session::KernelGraph;
|
||||
using mindspore::tensor::Tensor;
|
||||
using TensorPtr = std::shared_ptr<Tensor>;
|
||||
using mindspore::kernel::AddressPtr;
|
||||
|
|
|
@ -21,12 +21,6 @@ using mindspore::memreuse::BestFitMemReuse;
|
|||
using mindspore::memreuse::MemReuseUtilPtr;
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
MemoryManager::~MemoryManager() {
|
||||
device_mem_base_ = nullptr;
|
||||
device_mem_pool_base_ = nullptr;
|
||||
mem_reuse_util_ptr_ = nullptr;
|
||||
}
|
||||
|
||||
size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
|
||||
return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
|
||||
}
|
||||
|
@ -35,7 +29,7 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
|
|||
return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
|
||||
}
|
||||
|
||||
void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) {
|
||||
void MemoryManager::MallocReusedDynamicMem(session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
|
||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
|
||||
|
@ -147,23 +141,23 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
|
|||
}
|
||||
}
|
||||
|
||||
void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) {
|
||||
auto device_ptr = AllocTensorMemDynamic(size);
|
||||
void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
|
||||
auto device_ptr = MallocMemFromMemPool(size);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
address->ptr_ = device_ptr;
|
||||
address->mem_dynamic_alloc_ = true;
|
||||
address->from_mem_pool_ = true;
|
||||
}
|
||||
|
||||
void *MemoryManager::AllocTensorMemDynamic(size_t size) {
|
||||
void *MemoryManager::MallocMemFromMemPool(size_t size) {
|
||||
if (size == 0) {
|
||||
MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
|
||||
MS_LOG(ERROR) << "MallocMemFromMemPool size is 0.";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MemoryManager::FreeTensorMemDynamic(void *device_ptr) {
|
||||
void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
|
||||
MS_LOG(ERROR) << "FreeMemFromMemPool device_ptr is null.";
|
||||
}
|
||||
}
|
||||
} // namespace device
|
||||
|
|
|
@ -31,7 +31,7 @@ using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
|
|||
class MemoryManager {
|
||||
public:
|
||||
MemoryManager() = default;
|
||||
virtual ~MemoryManager();
|
||||
virtual ~MemoryManager() = default;
|
||||
|
||||
virtual void MallocDeviceMemory() = 0;
|
||||
virtual void FreeDeviceMemory() = 0;
|
||||
|
@ -40,16 +40,15 @@ class MemoryManager {
|
|||
dynamic_mem_offset_ = 0;
|
||||
}
|
||||
|
||||
void InitReuseDynamicMemory(session::KernelGraph *graph);
|
||||
void MallocReusedDynamicMem(session::KernelGraph *graph);
|
||||
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
|
||||
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
|
||||
virtual uint8_t *MallocMem(int flag, size_t size);
|
||||
|
||||
// Alloc memory use the dynamic memory pool.
|
||||
virtual void *AllocTensorMemDynamic(size_t size);
|
||||
// Free memory use the dynamic memory pool.
|
||||
virtual void FreeTensorMemDynamic(void *device_ptr);
|
||||
virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size);
|
||||
virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
|
||||
virtual void *MallocMemFromMemPool(size_t size);
|
||||
virtual void FreeMemFromMemPool(void *device_ptr);
|
||||
|
||||
size_t GetCommonAlignSize(size_t input_size) const;
|
||||
size_t GetCommunicationAlignSize(size_t input_size) const;
|
||||
|
||||
|
@ -57,9 +56,7 @@ class MemoryManager {
|
|||
virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
|
||||
virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
|
||||
uint8_t *device_mem_base_{nullptr};
|
||||
uint8_t *device_mem_pool_base_{nullptr};
|
||||
uint64_t device_mem_size_{0};
|
||||
uint64_t device_mem_pool_size_{0};
|
||||
uint64_t dynamic_mem_offset_{0};
|
||||
uint64_t static_mem_offset_{0};
|
||||
size_t total_static_size_ = 0;
|
||||
|
|
|
@ -95,7 +95,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
|
||||
"../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
|
||||
"../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
|
||||
"../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc"
|
||||
"../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc"
|
||||
"../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"
|
||||
"../../../mindspore/ccsrc/predict/predict.cc"
|
||||
"../../../mindspore/ccsrc/predict/converter/*.cc"
|
||||
|
|
Loading…
Reference in New Issue