forked from mindspore-Ecosystem/mindspore
!2210 gpu optimize the max device memory config
Merge pull request !2210 from limingqi107/max_device_memory_optimize
This commit is contained in:
commit
4642df207a
|
@ -39,6 +39,7 @@ bool GPUKernelRuntime::SyncStream() { return GPUDeviceManager::GetInstance().Syn
|
|||
|
||||
bool GPUKernelRuntime::Init() {
|
||||
if (device_init_ == true) {
|
||||
GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory();
|
||||
return true;
|
||||
}
|
||||
auto ret = InitDevice();
|
||||
|
|
|
@ -29,18 +29,30 @@ bool GPUMemoryAllocator::Init() {
|
|||
size_t free_size = CudaDriver::free_mem_size();
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
float max_device_memory = context_ptr->max_device_memory();
|
||||
max_available_device_memory_ = FloatToSize(max_device_memory * 1024 * 1024 * 1024);
|
||||
if (total_size > 0 && free_size > 0 && max_available_device_memory_ > 0) {
|
||||
limited_device_memory_ = context_ptr->max_device_memory();
|
||||
available_device_memory_ = FloatToSize(limited_device_memory_ * 1024 * 1024 * 1024);
|
||||
if (total_size > 0 && free_size > 0 && available_device_memory_ > 0) {
|
||||
MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size
|
||||
<< ", set max available memory size " << max_available_device_memory_;
|
||||
<< ", set max available memory size " << available_device_memory_ << ".";
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size "
|
||||
<< free_size << ", set max available memory size " << max_available_device_memory_;
|
||||
<< free_size << ", set max available memory size " << available_device_memory_ << ".";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPUMemoryAllocator::CheckMaxDeviceMemory() const {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto max_device_memory = context_ptr->max_device_memory();
|
||||
// Currently not support modifying the max device memory.
|
||||
if (limited_device_memory_ != max_device_memory) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Can't change context param max_device_memory in runtime, currently effective max_device_memory("
|
||||
<< limited_device_memory_ << "GB), set new max_device_memory(" << max_device_memory << "GB) failed.";
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUMemoryAllocator::Finalize() {
|
||||
if (buffer_q_addr_ != nullptr) {
|
||||
if (!CudaDriver::FreeDeviceMem(buffer_q_addr_)) {
|
||||
|
@ -73,7 +85,7 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
|||
MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed.";
|
||||
}
|
||||
total_used_device_memory_ += alloc_size;
|
||||
max_available_device_memory_ -= alloc_size;
|
||||
available_device_memory_ -= alloc_size;
|
||||
MS_LOG(INFO) << "Current free memory size[" << free_size - alloc_size << "], current alloc size[" << alloc_size
|
||||
<< "], total used size[" << total_used_device_memory_ << "].";
|
||||
return alloc_size;
|
||||
|
@ -81,9 +93,7 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
|
|||
|
||||
bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); }
|
||||
|
||||
size_t GPUMemoryAllocator::free_mem_size() {
|
||||
return std::min(CudaDriver::free_mem_size(), max_available_device_memory_);
|
||||
}
|
||||
size_t GPUMemoryAllocator::free_mem_size() { return std::min(CudaDriver::free_mem_size(), available_device_memory_); }
|
||||
|
||||
size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); }
|
||||
} // namespace gpu
|
||||
|
|
|
@ -28,6 +28,7 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit {
|
|||
public:
|
||||
~GPUMemoryAllocator() override = default;
|
||||
bool Init();
|
||||
void CheckMaxDeviceMemory() const;
|
||||
bool Finalize();
|
||||
bool AllocBufferQueueMem(size_t size, DeviceMemPtr *addr);
|
||||
|
||||
|
@ -49,8 +50,9 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit {
|
|||
// Used to track address of data buffer queue.
|
||||
DeviceMemPtr buffer_q_addr_{nullptr};
|
||||
|
||||
float limited_device_memory_{0.0};
|
||||
size_t total_used_device_memory_{0};
|
||||
size_t max_available_device_memory_{0};
|
||||
size_t available_device_memory_{0};
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
|
|
|
@ -150,7 +150,7 @@ size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) {
|
|||
alloc_mem_size = alloc_mem_size * 2;
|
||||
}
|
||||
alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size);
|
||||
return AlignMemorySize(alloc_mem_size);
|
||||
return alloc_mem_size;
|
||||
}
|
||||
|
||||
bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) const {
|
||||
|
|
|
@ -342,6 +342,8 @@ class _Context:
|
|||
if not check_input_format(max_device_memory):
|
||||
raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"")
|
||||
max_device_memory_value = float(max_device_memory[:-2])
|
||||
if max_device_memory_value == 0:
|
||||
raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"")
|
||||
self._context_handle.set_max_device_memory(max_device_memory_value)
|
||||
|
||||
def check_input_format(x):
|
||||
|
@ -523,7 +525,8 @@ def set_context(**kwargs):
|
|||
separated by colons; single operator can choose op_trace, op_trace cannot be combined with
|
||||
training_trace and task_trace. Default: "training_trace".
|
||||
check_bprop (bool): Whether to check bprop. Default: False.
|
||||
max_device_memory (str): Sets the maximum memory available for device. Default: "1024GB".
|
||||
max_device_memory (str): Sets the maximum memory available for device, currently only supported on GPU.
|
||||
The format is "xxGB". Default: "1024GB".
|
||||
|
||||
Raises:
|
||||
ValueError: If input key is not an attribute in context.
|
||||
|
|
|
@ -53,7 +53,7 @@ def test_conv2d():
|
|||
[162, 174, 186],
|
||||
[198, 210, 222]]]]).astype(np.float32)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", max_device_memory="0.2GB")
|
||||
conv2d = NetConv2d()
|
||||
output = conv2d(x, w)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
|
Loading…
Reference in New Issue