!842 gpu optimize some return values of dynamic memory pool

Merge pull request !842 from limingqi107/master
This commit is contained in:
mindspore-ci-bot 2020-04-30 09:34:26 +08:00 committed by Gitee
commit 020ee3d532
6 changed files with 52 additions and 26 deletions

View File

@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
MS_EXCEPTION_IF_NULL(input);
input->addr = device_address->ptr_;
input->size = device_address->size_;
kernel_inputs->push_back(input);
kernel_inputs->emplace_back(input);
}
auto output_sizes = kernel_mod.GetOutputSizeList();
for (size_t i = 0; i < output_sizes.size(); ++i) {
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i);
MS_EXCEPTION_IF_NULL(device_address);
if (device_address->ptr_ == nullptr) {
mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
if (!ret) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
}
kernel::AddressPtr output = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(output);
output->addr = device_address->ptr_;
output->size = output_sizes[i];
kernel_outputs->push_back(output);
kernel_outputs->emplace_back(output);
}
auto workspace_sizes = kernel_mod.GetWorkspaceSizeList();
for (size_t i = 0; i < workspace_sizes.size(); ++i) {
if (workspace_sizes[i] == 0) {
@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
continue;
}
auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]);
MS_EXCEPTION_IF_NULL(device_ptr);
if (!device_ptr) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(workspace);
workspace->addr = device_ptr;
workspace->size = workspace_sizes[i];
kernel_workspaces->push_back(workspace);
kernel_workspaces->emplace_back(workspace);
}
}
@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo
}
}
}
mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
if (!ret) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
}
void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,

View File

@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() {
if (context_ptr->enable_dynamic_mem_pool()) {
auto device_addr = MallocMemFromMemPool(1);
if (!device_addr) {
MS_LOG(ERROR) << "Dynamic memory pool init error.";
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
}
} else {
// Need to reserve 20% space for dynamic memory

View File

@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
auto device_address =
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
MS_EXCEPTION_IF_NULL(device_address);
mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
if (!ret) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
AnfAlgo::SetOutputAddr(device_address, index, item.get());
}
}
@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
MS_EXCEPTION_IF_NULL(device_address);
mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
if (!ret) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
}
}
@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
for (size_t i = 0; i < workspace_lists.size(); ++i) {
auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
MS_EXCEPTION_IF_NULL(device_address);
mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
if (!ret) {
MS_LOG(EXCEPTION) << "Malloc device memory failed.";
}
AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
}
}

View File

@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
}
}
void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
auto device_ptr = MallocMemFromMemPool(size);
MS_EXCEPTION_IF_NULL(device_ptr);
if (!device_ptr) {
return false;
}
address->ptr_ = device_ptr;
address->from_mem_pool_ = true;
return true;
}
void *MemoryManager::MallocMemFromMemPool(size_t size) {
@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
}
}
void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
std::vector<size_t> size_list) {
auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list);
if (device_ptr_list.size() == 0) {
return false;
}
if (addr_list.size() != device_ptr_list.size()) {
MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list.";
}
@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad
addr_list[i]->ptr_ = device_ptr_list[i];
addr_list[i]->from_mem_pool_ = true;
}
return true;
}
std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) {

View File

@ -46,11 +46,11 @@ class MemoryManager {
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
virtual uint8_t *MallocMem(int flag, size_t size);
virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
virtual void *MallocMemFromMemPool(size_t size);
virtual void FreeMemFromMemPool(const DeviceAddressPtr address);
virtual void FreeMemFromMemPool(void *device_ptr);
virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
std::vector<size_t> size_list);
virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list);

View File

@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) {
std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size,
std::vector<size_t> size_list) {
std::vector<DeviceMemPtr> device_addr_list;
// Pre-alloc the one whole piece memory.
auto device_addr = AllocTensorMem(total_size);
MS_EXCEPTION_IF_NULL(device_addr);
if (!device_addr) {
return device_addr_list;
}
// Remove the pre-alloc memory.
auto mem_block = FindMemBlock(device_addr);
MS_EXCEPTION_IF_NULL(mem_block);
@ -54,7 +57,6 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
(void)mem_block->block_all_mem_buf_map_.erase(iter);
// Split the pre-alloc memory into continuous memory by the size list.
DynamicMemBufPtr continuous_mem_buf;
std::vector<DeviceMemPtr> device_addr_list;
auto buf_addr = device_addr;
for (size_t i = 0; i < size_list.size(); i++) {
continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]);
@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) {
DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
size_t alloc_mem_size = CalMemBlockAllocSize(size);
if (alloc_mem_size == 0) {
return nullptr;
}
// Add new memory block
DeviceMemPtr device_addr = nullptr;
auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr);
if (real_alloc_size < size) {
MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size["
<< size << "].";
MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size
<< "].";
return nullptr;
}
auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size);
MS_EXCEPTION_IF_NULL(mem_block);
@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) {
auto device_free_mem_size = free_mem_size();
if (device_free_mem_size < size) {
MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size
<< "] is smaller than required size[" << size << "].";
MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
<< "] is smaller than required size[" << size << "].";
return 0;
}
auto alloc_mem_size = mem_alloc_unit_size();
// Growing at twice of alloc size
while (alloc_mem_size < size) {
@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me
MS_EXCEPTION_IF_NULL(mem_buf);
auto mem_block = FindMemBlock(mem_buf->device_addr_);
MS_EXCEPTION_IF_NULL(mem_block);
// Divide new memory buf
size_t newbuf_size = mem_buf->size_ - size;
mem_buf->size_ = size;