forked from mindspore-Ecosystem/mindspore
!842 gpu optimize some return values of dynamic memory pool
Merge pull request !842 from limingqi107/master
This commit is contained in:
commit
020ee3d532
|
@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
|
|||
MS_EXCEPTION_IF_NULL(input);
|
||||
input->addr = device_address->ptr_;
|
||||
input->size = device_address->size_;
|
||||
kernel_inputs->push_back(input);
|
||||
kernel_inputs->emplace_back(input);
|
||||
}
|
||||
|
||||
auto output_sizes = kernel_mod.GetOutputSizeList();
|
||||
for (size_t i = 0; i < output_sizes.size(); ++i) {
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
if (device_address->ptr_ == nullptr) {
|
||||
mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
|
||||
auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
}
|
||||
kernel::AddressPtr output = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(output);
|
||||
output->addr = device_address->ptr_;
|
||||
output->size = output_sizes[i];
|
||||
kernel_outputs->push_back(output);
|
||||
kernel_outputs->emplace_back(output);
|
||||
}
|
||||
|
||||
auto workspace_sizes = kernel_mod.GetWorkspaceSizeList();
|
||||
for (size_t i = 0; i < workspace_sizes.size(); ++i) {
|
||||
if (workspace_sizes[i] == 0) {
|
||||
|
@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
|
|||
continue;
|
||||
}
|
||||
auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
if (!device_ptr) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(workspace);
|
||||
workspace->addr = device_ptr;
|
||||
workspace->size = workspace_sizes[i];
|
||||
kernel_workspaces->push_back(workspace);
|
||||
kernel_workspaces->emplace_back(workspace);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo
|
|||
}
|
||||
}
|
||||
}
|
||||
mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
|
||||
auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
}
|
||||
|
||||
void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
|
||||
|
|
|
@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() {
|
|||
if (context_ptr->enable_dynamic_mem_pool()) {
|
||||
auto device_addr = MallocMemFromMemPool(1);
|
||||
if (!device_addr) {
|
||||
MS_LOG(ERROR) << "Dynamic memory pool init error.";
|
||||
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
|
||||
}
|
||||
} else {
|
||||
// Need to reserve 20% space for dynamic memory
|
||||
|
|
|
@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
|
|||
auto device_address =
|
||||
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
|
||||
auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
AnfAlgo::SetOutputAddr(device_address, index, item.get());
|
||||
}
|
||||
}
|
||||
|
@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
|
|||
auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
|
||||
auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
|
||||
auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
|
||||
}
|
||||
}
|
||||
|
@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
|
|||
for (size_t i = 0; i < workspace_lists.size(); ++i) {
|
||||
auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
|
||||
auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
|
||||
}
|
||||
AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
|
|||
}
|
||||
}
|
||||
|
||||
void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
|
||||
bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
|
||||
auto device_ptr = MallocMemFromMemPool(size);
|
||||
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||
if (!device_ptr) {
|
||||
return false;
|
||||
}
|
||||
address->ptr_ = device_ptr;
|
||||
address->from_mem_pool_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void *MemoryManager::MallocMemFromMemPool(size_t size) {
|
||||
|
@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
|
|||
}
|
||||
}
|
||||
|
||||
void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
|
||||
bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
|
||||
std::vector<size_t> size_list) {
|
||||
auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list);
|
||||
if (device_ptr_list.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
if (addr_list.size() != device_ptr_list.size()) {
|
||||
MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list.";
|
||||
}
|
||||
|
@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad
|
|||
addr_list[i]->ptr_ = device_ptr_list[i];
|
||||
addr_list[i]->from_mem_pool_ = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) {
|
||||
|
|
|
@ -46,11 +46,11 @@ class MemoryManager {
|
|||
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
|
||||
virtual uint8_t *MallocMem(int flag, size_t size);
|
||||
|
||||
virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
|
||||
virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
|
||||
virtual void *MallocMemFromMemPool(size_t size);
|
||||
virtual void FreeMemFromMemPool(const DeviceAddressPtr address);
|
||||
virtual void FreeMemFromMemPool(void *device_ptr);
|
||||
virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
|
||||
virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
|
||||
std::vector<size_t> size_list);
|
||||
virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list);
|
||||
|
||||
|
|
|
@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) {
|
|||
|
||||
std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size,
|
||||
std::vector<size_t> size_list) {
|
||||
std::vector<DeviceMemPtr> device_addr_list;
|
||||
// Pre-alloc the one whole piece memory.
|
||||
auto device_addr = AllocTensorMem(total_size);
|
||||
MS_EXCEPTION_IF_NULL(device_addr);
|
||||
if (!device_addr) {
|
||||
return device_addr_list;
|
||||
}
|
||||
// Remove the pre-alloc memory.
|
||||
auto mem_block = FindMemBlock(device_addr);
|
||||
MS_EXCEPTION_IF_NULL(mem_block);
|
||||
|
@ -54,7 +57,6 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
|
|||
(void)mem_block->block_all_mem_buf_map_.erase(iter);
|
||||
// Split the pre-alloc memory into continuous memory by the size list.
|
||||
DynamicMemBufPtr continuous_mem_buf;
|
||||
std::vector<DeviceMemPtr> device_addr_list;
|
||||
auto buf_addr = device_addr;
|
||||
for (size_t i = 0; i < size_list.size(); i++) {
|
||||
continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]);
|
||||
|
@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) {
|
|||
|
||||
DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
|
||||
size_t alloc_mem_size = CalMemBlockAllocSize(size);
|
||||
|
||||
if (alloc_mem_size == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
// Add new memory block
|
||||
DeviceMemPtr device_addr = nullptr;
|
||||
auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr);
|
||||
if (real_alloc_size < size) {
|
||||
MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size["
|
||||
<< size << "].";
|
||||
MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size
|
||||
<< "].";
|
||||
return nullptr;
|
||||
}
|
||||
auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size);
|
||||
MS_EXCEPTION_IF_NULL(mem_block);
|
||||
|
@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
|
|||
size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) {
|
||||
auto device_free_mem_size = free_mem_size();
|
||||
if (device_free_mem_size < size) {
|
||||
MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size
|
||||
<< "] is smaller than required size[" << size << "].";
|
||||
MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
|
||||
<< "] is smaller than required size[" << size << "].";
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto alloc_mem_size = mem_alloc_unit_size();
|
||||
// Growing at twice of alloc size
|
||||
while (alloc_mem_size < size) {
|
||||
|
@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me
|
|||
MS_EXCEPTION_IF_NULL(mem_buf);
|
||||
auto mem_block = FindMemBlock(mem_buf->device_addr_);
|
||||
MS_EXCEPTION_IF_NULL(mem_block);
|
||||
|
||||
// Divide new memory buf
|
||||
size_t newbuf_size = mem_buf->size_ - size;
|
||||
mem_buf->size_ = size;
|
||||
|
|
Loading…
Reference in New Issue