forked from mindspore-Ecosystem/mindspore
!38517 optimize ascend memory pool block size
Merge pull request !38517 from laiyongqiang/pool_size
This commit is contained in:
commit
893e6a4c12
|
@ -73,7 +73,7 @@ mindspore.set_context
|
|||
- **device_target** (str) - 表示待运行的目标设备,支持'Ascend'、'GPU'和'CPU'。如果未设置此参数,则使用MindSpore包对应的后端设备。
|
||||
- **max_device_memory** (str) - 设置设备可用的最大内存。格式为"xxGB"。默认值:1024GB。实际使用的内存大小是设备的可用内存和 `max_device_memory` 值中的最小值。
|
||||
- **variable_memory_max_size** (str) - 此参数已弃用,将被删除。请使用 `max_device_memory` 。
|
||||
- **mempool_block_size** (str) - 设置PyNative模式下设备内存池的块大小。格式为"xxGB"。默认值:1GB。最小值是1GB。实际使用的内存池块大小是设备的可用内存和 `mempool_block_size` 值中的最小值。
|
||||
- **mempool_block_size** (str) - 设置设备内存池的块大小。格式为"xxGB"。默认值:1GB。最小值是1GB。实际使用的内存池块大小是设备的可用内存和 `mempool_block_size` 值中的最小值。
|
||||
- **save_graphs** (bool) - 表示是否保存计算图。默认值:False。当 `save_graphs` 属性设为True时, `save_graphs_path` 属性用于设置中间编译图的存储路径。默认情况下,计算图保存在当前目录下。
|
||||
- **save_graphs_path** (str) - 表示保存计算图的路径。默认值:"."。如果指定的目录不存在,系统将自动创建该目录。在分布式训练中,图形将被保存到 `save_graphs_path/rank_${rank_id}/` 目录下。 `rank_id` 为集群中当前设备的ID。
|
||||
- **enable_dump** (bool) - 此参数已弃用,将在下一版本中删除。
|
||||
|
|
|
@ -165,8 +165,6 @@ class DynamicMemPoolBestFit {
|
|||
size_t MemAllocUnitSize(bool from_persistent_mem = false) const;
|
||||
// Set the minimum memory unit size using for dynamic extend.
|
||||
void SetMemAllocUintSize(size_t common_size, size_t persist_size = DYNAMIC_MEM_ALLOC_UNIT_SIZE);
|
||||
// Set mem pool block size
|
||||
void SetMemPoolBlockSize(size_t available_device_mem_size);
|
||||
|
||||
// The statistics information.
|
||||
size_t TotalMemStatistics() const {
|
||||
|
@ -188,6 +186,8 @@ class DynamicMemPoolBestFit {
|
|||
virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) = 0;
|
||||
virtual bool FreeDeviceMem(const DeviceMemPtr &addr) = 0;
|
||||
virtual size_t free_mem_size() = 0;
|
||||
// Set mem pool block size
|
||||
virtual void SetMemPoolBlockSize(size_t available_device_mem_size);
|
||||
|
||||
protected:
|
||||
const MemStatusManagerPtr &common_mem() const { return common_mem_; }
|
||||
|
|
|
@ -30,10 +30,7 @@ using mindspore::profiler::ascend::MemoryProfiling;
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void AscendMemoryManager::Initialize() {
|
||||
(void)AscendMemAdapter::GetInstance().Initialize();
|
||||
AscendMemoryPool::GetInstance().Init();
|
||||
}
|
||||
void AscendMemoryManager::Initialize() { (void)AscendMemAdapter::GetInstance().Initialize(); }
|
||||
|
||||
void AscendMemoryManager::Finalize() {
|
||||
AscendMemoryPool::GetInstance().ReleaseDeviceRes();
|
||||
|
|
|
@ -24,25 +24,37 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
// The minimum unit size (8MB) of memory block used for dynamic extend in graph task sink mode.
|
||||
static const size_t ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH = 8 << 20;
|
||||
constexpr float kCommonMemoryRatio = 0.9667; // 29/30
|
||||
constexpr float kPersistMemoryRatio = 0.0333; // 1/30
|
||||
// The minimum unit size (8MB) of memory block used for dynamic extend in graph run mode.
|
||||
static const size_t ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE = 8 << 20;
|
||||
|
||||
void AscendMemoryPool::Init() {
|
||||
void AscendMemoryPool::SetMemPoolBlockSize(size_t available_device_mem_size) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
|
||||
const bool task_sink = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
|
||||
auto total_size = AscendMemAdapter::GetInstance().GetMsUsedHbmSize();
|
||||
if (pynative_mode) {
|
||||
SetMemPoolBlockSize(total_size);
|
||||
} else {
|
||||
if (task_sink) {
|
||||
SetMemAllocUintSize(ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH, ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH);
|
||||
} else {
|
||||
SetMemAllocUintSize(FloatToSize(total_size * kCommonMemoryRatio), FloatToSize(total_size * kPersistMemoryRatio));
|
||||
float mem_block_size = ms_context->get_param<float>(MS_CTX_MEMPOOL_BLOCK_SIZE);
|
||||
// set from context configuration
|
||||
if (mem_block_size != kDefaultMempoolBlockSize) {
|
||||
size_t config_size = FloatToSize(mem_block_size * kGBToByte);
|
||||
if (config_size > available_device_mem_size) {
|
||||
MS_LOG(WARNING) << "Memory pool block size " << config_size
|
||||
<< " is bigger than currently available maximum memory " << available_device_mem_size
|
||||
<< ", and the actual effective value will be " << available_device_mem_size;
|
||||
}
|
||||
// Reserve 1G for persistent_mem
|
||||
if (available_device_mem_size > DYNAMIC_MEM_ALLOC_UNIT_SIZE) {
|
||||
available_device_mem_size -= DYNAMIC_MEM_ALLOC_UNIT_SIZE;
|
||||
}
|
||||
size_t real_block_size = std::min(config_size, available_device_mem_size);
|
||||
SetMemAllocUintSize(real_block_size, DYNAMIC_MEM_ALLOC_UNIT_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// set by default configuration
|
||||
const bool is_graph_run_mode = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
|
||||
if (is_graph_run_mode) {
|
||||
SetMemAllocUintSize(ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE,
|
||||
ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE);
|
||||
} else {
|
||||
SetMemAllocUintSize(DYNAMIC_MEM_ALLOC_UNIT_SIZE, DYNAMIC_MEM_ALLOC_UNIT_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,27 +71,33 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size, bool from_persistent_
|
|||
<< ", Memory Statistic:" << AscendMemAdapter::GetInstance().DevMemStatistics()
|
||||
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||
AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
DumpDynamicMemPoolDebugInfo();
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t alloc_mem_size;
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
|
||||
SetMemPoolBlockSize(device_free_mem_size);
|
||||
auto alloc_mem_unit_size = MemAllocUnitSize(from_persistent_mem);
|
||||
MS_LOG(DEBUG) << "Get unit block size " << alloc_mem_unit_size;
|
||||
alloc_mem_size = alloc_mem_unit_size;
|
||||
if (pynative_mode) {
|
||||
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
const bool is_graph_run_mode = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
|
||||
if (is_graph_run_mode) {
|
||||
// Growing at adding alloc unit size
|
||||
while (alloc_mem_size < size) {
|
||||
alloc_mem_size = alloc_mem_size + alloc_mem_unit_size;
|
||||
}
|
||||
} else {
|
||||
// Growing at twice of alloc unit size
|
||||
constexpr size_t kDouble = 2;
|
||||
while (alloc_mem_size < size) {
|
||||
alloc_mem_size = alloc_mem_size * kDouble;
|
||||
}
|
||||
} else {
|
||||
// Growing at adding alloc unit size
|
||||
while (alloc_mem_size < size) {
|
||||
alloc_mem_size = alloc_mem_size + alloc_mem_unit_size;
|
||||
}
|
||||
}
|
||||
|
||||
alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size);
|
||||
return alloc_mem_size;
|
||||
}
|
||||
|
|
|
@ -29,10 +29,11 @@ class AscendMemoryPool : public DynamicMemPoolBestFit {
|
|||
AscendMemoryPool(const AscendMemoryPool &) = delete;
|
||||
AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
|
||||
|
||||
void Init();
|
||||
size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
|
||||
bool FreeDeviceMem(const DeviceMemPtr &addr) override;
|
||||
size_t free_mem_size() override;
|
||||
// Set mem pool block size
|
||||
void SetMemPoolBlockSize(size_t available_device_mem_size) override;
|
||||
|
||||
void ResetIdleMemBuf() const;
|
||||
|
||||
|
|
|
@ -289,10 +289,6 @@ class _Context:
|
|||
|
||||
def set_mempool_block_size(self, mempool_block_size):
|
||||
"""Set the block size of memory pool."""
|
||||
if _get_mode() == GRAPH_MODE:
|
||||
logger.warning("Graph mode doesn't support to set parameter 'mempool_block_size' of context currently, "
|
||||
"you can use context.set_context to set pynative mode.")
|
||||
return
|
||||
if not Validator.check_str_by_regular(mempool_block_size, _re_pattern):
|
||||
raise ValueError("For 'context.set_context', the argument 'mempool_block_size' should be in "
|
||||
"correct format! Such as \"10GB\", "
|
||||
|
@ -745,7 +741,7 @@ def set_context(**kwargs):
|
|||
The actual used memory size is the minimum of the available memory of the device and max_device_memory.
|
||||
variable_memory_max_size (str): This parameter is deprecated, and will be removed in a future version.
|
||||
Please use parameter 'max_device_memory' instead.
|
||||
mempool_block_size (str): Set the size of the memory pool block in PyNative mode for devices.
|
||||
mempool_block_size (str): Set the size of the memory pool block for devices.
|
||||
The format is "xxGB". Default: "1GB". Minimum size is "1G". The actual used memory block size is the minimum
|
||||
of the available memory of the device and mempool_block_size.
|
||||
save_graphs (bool): Whether to save graphs. Default: False.
|
||||
|
|
Loading…
Reference in New Issue