From 55b3557c0ddd38cbb90c79a288fdb9e576a1f277 Mon Sep 17 00:00:00 2001 From: limingqi107 Date: Wed, 17 Jun 2020 11:52:41 +0800 Subject: [PATCH] gpu optimize the max device memory config --- .../ccsrc/device/gpu/gpu_kernel_runtime.cc | 1 + .../ccsrc/device/gpu/gpu_memory_allocator.cc | 28 +++++++++++++------ .../ccsrc/device/gpu/gpu_memory_allocator.h | 4 ++- .../mem_reuse/mem_dynamic_allocator.cc | 2 +- mindspore/context.py | 5 +++- tests/st/ops/gpu/test_conv2d_op.py | 2 +- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 0e90b90f2d2..9e7fb884008 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -39,6 +39,7 @@ bool GPUKernelRuntime::SyncStream() { return GPUDeviceManager::GetInstance().Syn bool GPUKernelRuntime::Init() { if (device_init_ == true) { + GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory(); return true; } auto ret = InitDevice(); diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc index fceb5e78b4c..91379456619 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc @@ -29,18 +29,30 @@ bool GPUMemoryAllocator::Init() { size_t free_size = CudaDriver::free_mem_size(); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); - float max_device_memory = context_ptr->max_device_memory(); - max_available_device_memory_ = FloatToSize(max_device_memory * 1024 * 1024 * 1024); - if (total_size > 0 && free_size > 0 && max_available_device_memory_ > 0) { + limited_device_memory_ = context_ptr->max_device_memory(); + available_device_memory_ = FloatToSize(limited_device_memory_ * 1024 * 1024 * 1024); + if (total_size > 0 && free_size > 0 && available_device_memory_ > 0) { MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size - << ", set max available memory size " << max_available_device_memory_; + << ", set max available memory size " << available_device_memory_ << "."; } else { MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size " - << free_size << ", set max available memory size " << max_available_device_memory_; + << free_size << ", set max available memory size " << available_device_memory_ << "."; } return true; } +void GPUMemoryAllocator::CheckMaxDeviceMemory() const { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto max_device_memory = context_ptr->max_device_memory(); + // Currently not support modifying the max device memory. + if (limited_device_memory_ != max_device_memory) { + MS_LOG(EXCEPTION) + << "Can't change context param max_device_memory in runtime, currently effective max_device_memory(" + << limited_device_memory_ << "GB), set new max_device_memory(" << max_device_memory << "GB) failed."; + } +} + bool GPUMemoryAllocator::Finalize() { if (buffer_q_addr_ != nullptr) { if (!CudaDriver::FreeDeviceMem(buffer_q_addr_)) { @@ -73,7 +85,7 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed."; } total_used_device_memory_ += alloc_size; - max_available_device_memory_ -= alloc_size; + available_device_memory_ -= alloc_size; MS_LOG(INFO) << "Current free memory size[" << free_size - alloc_size << "], current alloc size[" << alloc_size << "], total used size[" << total_used_device_memory_ << "]."; return alloc_size; @@ -81,9 +93,7 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); } -size_t GPUMemoryAllocator::free_mem_size() { - return std::min(CudaDriver::free_mem_size(), max_available_device_memory_); -} +size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); } size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); } } // namespace gpu diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h index baaf50b641b..90d77910574 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h +++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h @@ -28,6 +28,7 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { public: ~GPUMemoryAllocator() override = default; bool Init(); + void CheckMaxDeviceMemory() const; bool Finalize(); bool AllocBufferQueueMem(size_t size, DeviceMemPtr *addr); @@ -49,8 +50,9 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { // Used to track address of data buffer queue. DeviceMemPtr buffer_q_addr_{nullptr}; + float limited_device_memory_{0.0}; size_t total_used_device_memory_{0}; - size_t max_available_device_memory_{0}; + size_t available_device_memory_{0}; }; } // namespace gpu } // namespace device diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc index a2dfce22414..095f8f6495a 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc @@ -150,7 +150,7 @@ size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { alloc_mem_size = alloc_mem_size * 2; } alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size); - return AlignMemorySize(alloc_mem_size); + return alloc_mem_size; } bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) const { diff --git a/mindspore/context.py b/mindspore/context.py index ef2be246369..1e7ba3b28b7 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -342,6 +342,8 @@ class _Context: if not check_input_format(max_device_memory): raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"") max_device_memory_value = float(max_device_memory[:-2]) + if max_device_memory_value == 0: + raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"") self._context_handle.set_max_device_memory(max_device_memory_value) def check_input_format(x): @@ -523,7 +525,8 @@ def set_context(**kwargs): separated by colons; single operator can choose op_trace, op_trace cannot be combined with training_trace and task_trace. Default: "training_trace". check_bprop (bool): Whether to check bprop. Default: False. - max_device_memory (str): Sets the maximum memory available for device. Default: "1024GB". + max_device_memory (str): Sets the maximum memory available for device, currently only supported on GPU. + The format is "xxGB". Default: "1024GB". Raises: ValueError: If input key is not an attribute in context. diff --git a/tests/st/ops/gpu/test_conv2d_op.py b/tests/st/ops/gpu/test_conv2d_op.py index a42114a106e..6af5fc39656 100644 --- a/tests/st/ops/gpu/test_conv2d_op.py +++ b/tests/st/ops/gpu/test_conv2d_op.py @@ -53,7 +53,7 @@ def test_conv2d(): [162, 174, 186], [198, 210, 222]]]]).astype(np.float32) - context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") + context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", max_device_memory="0.2GB") conv2d = NetConv2d() output = conv2d(x, w) assert (output.asnumpy() == expect).all()