!38517 optimize ascend memory pool block size

Merge pull request !38517 from laiyongqiang/pool_size
This commit is contained in:
i-robot 2022-08-02 07:10:38 +00:00 committed by Gitee
commit 893e6a4c12
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 49 additions and 37 deletions

View File

@ -73,7 +73,7 @@ mindspore.set_context
- **device_target** (str) - 表示待运行的目标设备,支持'Ascend'、'GPU'和'CPU'。如果未设置此参数则使用MindSpore包对应的后端设备。
- **max_device_memory** (str) - 设置设备可用的最大内存。格式为"xxGB"。默认值1024GB。实际使用的内存大小是设备的可用内存和 `max_device_memory` 值中的最小值。
- **variable_memory_max_size** (str) - 此参数已弃用,将被删除。请使用 `max_device_memory`
- **mempool_block_size** (str) - 设置PyNative模式下设备内存池的块大小。格式为"xxGB"。默认值1GB。最小值是1GB。实际使用的内存池块大小是设备的可用内存和 `mempool_block_size` 值中的最小值。
- **mempool_block_size** (str) - 设置设备内存池的块大小。格式为"xxGB"。默认值1GB。最小值是1GB。实际使用的内存池块大小是设备的可用内存和 `mempool_block_size` 值中的最小值。
- **save_graphs** (bool) - 表示是否保存计算图。默认值False。当 `save_graphs` 属性设为True时 `save_graphs_path` 属性用于设置中间编译图的存储路径。默认情况下,计算图保存在当前目录下。
- **save_graphs_path** (str) - 表示保存计算图的路径。默认值:"."。如果指定的目录不存在,系统将自动创建该目录。在分布式训练中,图形将被保存到 `save_graphs_path/rank_${rank_id}/` 目录下。 `rank_id` 为集群中当前设备的ID。
- **enable_dump** (bool) - 此参数已弃用,将在下一版本中删除。

View File

@ -165,8 +165,6 @@ class DynamicMemPoolBestFit {
size_t MemAllocUnitSize(bool from_persistent_mem = false) const;
// Set the minimum memory unit size using for dynamic extend.
void SetMemAllocUintSize(size_t common_size, size_t persist_size = DYNAMIC_MEM_ALLOC_UNIT_SIZE);
// Set mem pool block size
void SetMemPoolBlockSize(size_t available_device_mem_size);
// The statistics information.
size_t TotalMemStatistics() const {
@ -188,6 +186,8 @@ class DynamicMemPoolBestFit {
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
virtual size_t free_mem_size() = 0;
// Set mem pool block size
virtual void SetMemPoolBlockSize(size_t available_device_mem_size);
protected:
const MemStatusManagerPtr &common_mem() const { return common_mem_; }

View File

@ -30,10 +30,7 @@ using mindspore::profiler::ascend::MemoryProfiling;
namespace mindspore {
namespace device {
namespace ascend {
void AscendMemoryManager::Initialize() {
(void)AscendMemAdapter::GetInstance().Initialize();
AscendMemoryPool::GetInstance().Init();
}
void AscendMemoryManager::Initialize() { (void)AscendMemAdapter::GetInstance().Initialize(); }
void AscendMemoryManager::Finalize() {
AscendMemoryPool::GetInstance().ReleaseDeviceRes();

View File

@ -24,25 +24,37 @@
namespace mindspore {
namespace device {
namespace ascend {
// The minimum unit size (8MB) of memory block used for dynamic extend in graph task sink mode.
static const size_t ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20;
constexpr float kCommonMemoryRatio = 0.9667; // 29/30
constexpr float kPersistMemoryRatio = 0.0333; // 1/30
// The minimum unit size (8MB) of memory block used for dynamic extend in graph run mode.
static const size_t ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE = 8 << 20;
void AscendMemoryPool::Init() {
void AscendMemoryPool::SetMemPoolBlockSize(size_t available_device_mem_size) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
const bool task_sink = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
auto total_size = AscendMemAdapter::GetInstance().GetMsUsedHbmSize();
if (pynative_mode) {
SetMemPoolBlockSize(total_size);
} else {
if (task_sink) {
SetMemAllocUintSize(ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH, ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH);
} else {
SetMemAllocUintSize(FloatToSize(total_size * kCommonMemoryRatio), FloatToSize(total_size * kPersistMemoryRatio));
float mem_block_size = ms_context->get_param<float>(MS_CTX_MEMPOOL_BLOCK_SIZE);
// set from context configuration
if (mem_block_size != kDefaultMempoolBlockSize) {
size_t config_size = FloatToSize(mem_block_size * kGBToByte);
if (config_size > available_device_mem_size) {
MS_LOG(WARNING) << "Memory pool block size " << config_size
<< " is bigger than currently available maximum memory " << available_device_mem_size
<< ", and the actual effective value will be " << available_device_mem_size;
}
// Reserve 1G for persistent_mem
if (available_device_mem_size > DYNAMIC_MEM_ALLOC_UNIT_SIZE) {
available_device_mem_size -= DYNAMIC_MEM_ALLOC_UNIT_SIZE;
}
size_t real_block_size = std::min(config_size, available_device_mem_size);
SetMemAllocUintSize(real_block_size, DYNAMIC_MEM_ALLOC_UNIT_SIZE);
return;
}
// set by default configuration
const bool is_graph_run_mode = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
if (is_graph_run_mode) {
SetMemAllocUintSize(ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE,
ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE);
} else {
SetMemAllocUintSize(DYNAMIC_MEM_ALLOC_UNIT_SIZE, DYNAMIC_MEM_ALLOC_UNIT_SIZE);
}
}
@ -59,27 +71,33 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size, bool from_persistent_
<< ", Memory Statistic:" << AscendMemAdapter::GetInstance().DevMemStatistics()
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
AscendMemAdapter::GetInstance().DevMemStatistics();
DumpDynamicMemPoolDebugInfo();
return 0;
}
size_t alloc_mem_size;
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
SetMemPoolBlockSize(device_free_mem_size);
auto alloc_mem_unit_size = MemAllocUnitSize(from_persistent_mem);
MS_LOG(DEBUG) << "Get unit block size " << alloc_mem_unit_size;
alloc_mem_size = alloc_mem_unit_size;
if (pynative_mode) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
const bool is_graph_run_mode = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
if (is_graph_run_mode) {
// Growing at adding alloc unit size
while (alloc_mem_size < size) {
alloc_mem_size = alloc_mem_size + alloc_mem_unit_size;
}
} else {
// Growing at twice of alloc unit size
constexpr size_t kDouble = 2;
while (alloc_mem_size < size) {
alloc_mem_size = alloc_mem_size * kDouble;
}
} else {
// Growing at adding alloc unit size
while (alloc_mem_size < size) {
alloc_mem_size = alloc_mem_size + alloc_mem_unit_size;
}
}
alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size);
return alloc_mem_size;
}

View File

@ -29,10 +29,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
AscendMemoryPool(const AscendMemoryPool &) = delete;
AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
void Init();
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
size_t free_mem_size() override;
// Set mem pool block size
void SetMemPoolBlockSize(size_t available_device_mem_size) override;
void ResetIdleMemBuf() const;

View File

@ -289,10 +289,6 @@ class _Context:
def set_mempool_block_size(self, mempool_block_size):
"""Set the block size of memory pool."""
if _get_mode() == GRAPH_MODE:
logger.warning("Graph mode doesn't support to set parameter 'mempool_block_size' of context currently, "
"you can use context.set_context to set pynative mode.")
return
if not Validator.check_str_by_regular(mempool_block_size, _re_pattern):
raise ValueError("For 'context.set_context', the argument 'mempool_block_size' should be in "
"correct format! Such as \"10GB\", "
@ -745,7 +741,7 @@ def set_context(**kwargs):
The actual used memory size is the minimum of the available memory of the device and max_device_memory.
variable_memory_max_size (str): This parameter is deprecated, and will be removed in a future version.
Please use parameter 'max_device_memory' instead.
mempool_block_size (str): Set the size of the memory pool block in PyNative mode for devices.
mempool_block_size (str): Set the size of the memory pool block for devices.
The format is "xxGB". Default: "1GB". Minimum size is "1G". The actual used memory block size is the minimum
of the available memory of the device and mempool_block_size.
save_graphs (bool): Whether to save graphs. Default: False.