forked from mindspore-Ecosystem/mindspore
add API for enable_alltoall
This commit is contained in:
parent
e55ce35b0f
commit
9f177a1cf2
|
@ -180,7 +180,7 @@ MindSpore context,用于配置当前执行环境,包括执行模式、执行
|
|||
parallel_mode strategy_ckpt_load_file
|
||||
all_reduce_fusion_config strategy_ckpt_save_file
|
||||
enable_parallel_optimizer dataset_strategy
|
||||
\ pipeline_stages
|
||||
enable_alltoall pipeline_stages
|
||||
\ grad_accumulation_step
|
||||
========================= =========================
|
||||
|
||||
|
@ -211,6 +211,7 @@ MindSpore context,用于配置当前执行环境,包括执行模式、执行
|
|||
- **full_batch** (bool) - 如果在auto_parallel模式下加载整个batch数据集,则此参数应设置为True。默认值:False。目前不建议使用该接口,建议使用dataset_strategy来替换它。
|
||||
- **dataset_strategy** (Union[str, tuple]) - 表示数据集分片策略。默认值:data_parallel。dataset_strategy="data_parallel"等于full_batch=False,dataset_strategy="full_batch"等于full_batch=True。对于通过模型并列策略加载到网络的数据集,如ds_stra ((1, 8)、(1, 8)),需要使用set_auto_parallel_context(dataset_strategy=ds_stra)。
|
||||
- **enable_parallel_optimizer** (bool) - 这是一个开发中的特性,它可以为数据并行训练对权重更新计算进行分片,以节省时间和内存。目前,自动和半自动并行模式支持Ascend和GPU中的所有优化器。数据并行模式仅支持Ascend中的 `Lamb` 和 `AdamWeightDecay` 。默认值:False。
|
||||
- **enable_alltoall** (bool) - 允许在通信期间生成 `AllToAll` 通信算子的开关。 如果其值为 False,则将由 `AllGather` 、 `Split` 和 `Concat` 等通信算子的组合来代替 `AllToAll` 。 默认值:False。
|
||||
- **all_reduce_fusion_config** (list) - 通过参数索引设置 AllReduce 融合策略。仅支持ReduceOp.SUM和HCCL_WORLD_GROUP/NCCL_WORLD_GROUP。没有默认值。如果不设置,则关闭算子融合。
|
||||
- **pipeline_stages** (int) - 设置pipeline并行的阶段信息。这表明了设备如何单独分布在pipeline上。所有的设备将被划分为pipeline_stags个阶段。目前,这只能在启动semi_auto_parallel模式的情况下使用。默认值:1。
|
||||
- **grad_accumulation_step** (int) - 在自动和半自动并行模式下设置梯度的累积step。其值应为正整数。默认值:1。
|
||||
|
@ -260,6 +261,7 @@ MindSpore context,用于配置当前执行环境,包括执行模式、执行
|
|||
- strategy_ckpt_save_file:''。
|
||||
- full_batch:False。
|
||||
- enable_parallel_optimizer:False。
|
||||
- enable_alltoall: False。
|
||||
- pipeline_stages:1。
|
||||
|
||||
.. py:class:: mindspore.context.ParallelMode
|
||||
|
|
|
@ -398,7 +398,7 @@ def _context():
|
|||
|
||||
@args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool, parallel_mode=str,
|
||||
auto_parallel_search_mode=str, search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str,
|
||||
strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool,
|
||||
strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool, enable_alltoall=bool,
|
||||
all_reduce_fusion_config=list, pipeline_stages=int, grad_accumulation_step=int,
|
||||
parallel_optimizer_config=dict, comm_fusion=dict)
|
||||
def set_auto_parallel_context(**kwargs):
|
||||
|
@ -427,7 +427,7 @@ def set_auto_parallel_context(**kwargs):
|
|||
all_reduce_fusion_config strategy_ckpt_save_file
|
||||
enable_parallel_optimizer dataset_strategy
|
||||
parallel_optimizer_config pipeline_stages
|
||||
\ grad_accumulation_step
|
||||
enable_alltoall grad_accumulation_step
|
||||
\ auto_parallel_search_mode
|
||||
\ comm_fusion
|
||||
=========================== ===========================
|
||||
|
@ -481,6 +481,9 @@ def set_auto_parallel_context(**kwargs):
|
|||
data parallel training in the benefit of time and memory saving. Currently, auto and semi auto
|
||||
parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports
|
||||
`Lamb` and `AdamWeightDecay` in Ascend . Default: False.
|
||||
enable_alltoall (bool): A switch that allows AllToAll operators to be generated during communication. If its
|
||||
value is False, there will be a combination of operators such as AllGather, Split and Concat
|
||||
instead of AllToAll. Default: False.
|
||||
all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices. Only support ReduceOp.SUM
|
||||
and HCCL_WORLD_GROUP/NCCL_WORLD_GROUP. No Default, if it is not set, the fusion is closed.
|
||||
pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how the devices are
|
||||
|
@ -545,6 +548,7 @@ def set_auto_parallel_context(**kwargs):
|
|||
>>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt")
|
||||
>>> context.set_auto_parallel_context(dataset_strategy=((1, 8), (1, 8)))
|
||||
>>> context.set_auto_parallel_context(enable_parallel_optimizer=False)
|
||||
>>> context.set_auto_parallel_context(enable_alltoall=False)
|
||||
>>> context.set_auto_parallel_context(all_reduce_fusion_config=[8, 160])
|
||||
>>> context.set_auto_parallel_context(pipeline_stages=2)
|
||||
>>> parallel_config = {"gradient_accumulation_shard": True, "parallel_optimizer_threshold": 24}
|
||||
|
@ -592,6 +596,7 @@ def reset_auto_parallel_context():
|
|||
- strategy_ckpt_save_file: ''.
|
||||
- full_batch: False.
|
||||
- enable_parallel_optimizer: False.
|
||||
- enable_alltoall: False.
|
||||
- pipeline_stages: 1.
|
||||
- fusion_threshold: 64.
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue