From d4cfe55c040ff3fc5998d09ef195ffc8167af756 Mon Sep 17 00:00:00 2001 From: yao_yf Date: Fri, 4 Sep 2020 14:31:12 +0800 Subject: [PATCH] rename mirror_mean to gradients_mean --- mindspore/ccsrc/frontend/parallel/context.cc | 4 +-- mindspore/ccsrc/frontend/parallel/context.h | 6 ++--- .../parallel/ops_info/operator_info.cc | 2 +- .../ccsrc/frontend/parallel/step_parallel.cc | 2 +- mindspore/ccsrc/pipeline/jit/init.cc | 4 +-- mindspore/context.py | 10 +++---- mindspore/nn/wrap/cell_wrapper.py | 4 +-- mindspore/nn/wrap/grad_reducer.py | 2 +- mindspore/nn/wrap/loss_scale.py | 4 +-- mindspore/parallel/_auto_parallel_context.py | 26 +++++++++---------- mindspore/parallel/_utils.py | 6 ++--- model_zoo/official/cv/deeplabv3/train.py | 2 +- model_zoo/official/cv/faster_rcnn/train.py | 2 +- model_zoo/official/cv/googlenet/train.py | 4 +-- model_zoo/official/cv/inceptionv3/train.py | 2 +- model_zoo/official/cv/maskrcnn/train.py | 2 +- .../official/cv/mobilenetv2/src/utils.py | 4 +-- .../official/cv/mobilenetv2_quant/train.py | 4 +-- model_zoo/official/cv/mobilenetv3/train.py | 2 +- .../official/cv/nasnet/src/nasnet_a_mobile.py | 4 +-- model_zoo/official/cv/nasnet/train.py | 2 +- model_zoo/official/cv/resnet/train.py | 4 +-- model_zoo/official/cv/resnet50_quant/train.py | 4 +-- .../cv/resnet_thor/src/grad_reducer_thor.py | 2 +- model_zoo/official/cv/resnet_thor/src/thor.py | 6 ++--- model_zoo/official/cv/resnet_thor/train.py | 4 +-- model_zoo/official/cv/resnext50/eval.py | 2 +- model_zoo/official/cv/resnext50/train.py | 2 +- model_zoo/official/cv/shufflenetv2/train.py | 2 +- model_zoo/official/cv/ssd/src/ssd.py | 2 +- model_zoo/official/cv/ssd/train.py | 2 +- model_zoo/official/cv/vgg16/train.py | 2 +- .../cv/warpctc/src/warpctc_for_train.py | 4 +-- model_zoo/official/cv/warpctc/train.py | 2 +- .../official/cv/yolov3_darknet53/eval.py | 2 +- .../official/cv/yolov3_darknet53/src/yolo.py | 2 +- .../official/cv/yolov3_darknet53/train.py | 2 +- .../cv/yolov3_darknet53_quant/eval.py | 2 +- .../cv/yolov3_darknet53_quant/src/yolo.py | 2 +- .../cv/yolov3_darknet53_quant/train.py | 2 +- .../official/cv/yolov3_resnet18/src/yolov3.py | 2 +- .../official/cv/yolov3_resnet18/train.py | 2 +- model_zoo/official/nlp/bert/run_pretrain.py | 2 +- .../nlp/bert/src/bert_for_finetune.py | 4 +-- .../nlp/bert/src/bert_for_pre_training.py | 2 +- .../official/nlp/bert_thor/run_pretrain.py | 2 +- .../bert_thor/src/bert_for_pre_training.py | 2 +- .../nlp/bert_thor/src/grad_reducer_thor.py | 2 +- .../nlp/bert_thor/src/thor_for_bert_arg.py | 4 +-- .../src/transformer/transformer_for_train.py | 4 +-- model_zoo/official/nlp/mass/train.py | 2 +- .../nlp/tinybert/run_general_distill.py | 2 +- .../nlp/tinybert/src/tinybert_for_gd_td.py | 4 +-- .../transformer/src/transformer_for_train.py | 6 ++--- model_zoo/official/nlp/transformer/train.py | 2 +- model_zoo/official/recommend/deepfm/train.py | 4 +-- .../wide_and_deep/src/wide_and_deep.py | 2 +- .../train_and_eval_auto_parallel.py | 4 +-- .../train_and_eval_distribute.py | 2 +- .../train_and_eval_parameter_server.py | 2 +- .../src/wide_and_deep.py | 2 +- .../train_and_eval_distribute.py | 2 +- .../st/auto_parallel/resnet50_expand_loss.py | 2 +- .../train_and_test_multinpu_ci.py | 2 +- .../python_file_for_ci/wide_and_deep.py | 4 +-- ...rain_and_test_multinpu_ci_data_parallel.py | 2 +- tests/st/model_zoo_tests/yolov3/src/yolov3.py | 2 +- tests/st/nccl/test_nccl_lenet.py | 2 +- .../models/bert/src/bert_for_pre_training.py | 2 +- tests/st/networks/models/bert/src/utils.py | 2 +- .../resnet50/src_thor/grad_reducer_thor.py | 2 +- .../networks/models/resnet50/src_thor/thor.py | 4 +-- .../models/resnet50/test_resnet50_imagenet.py | 4 +-- .../st/ps/multi_full_ps/test_multi_full_ps.py | 3 ++- .../communication/test_data_parallel_dense.py | 2 +- .../communication/test_data_parallel_lenet.py | 2 +- tests/ut/python/model/test_mix_precision.py | 2 +- tests/ut/python/parallel/test_optimizer.py | 2 +- .../test_set_auto_parallel_context.py | 16 ++++++------ tests/ut/python/parallel/test_two_matmul.py | 4 +-- tests/ut/python/train/test_amp.py | 2 +- 81 files changed, 135 insertions(+), 134 deletions(-) diff --git a/mindspore/ccsrc/frontend/parallel/context.cc b/mindspore/ccsrc/frontend/parallel/context.cc index 6624e8e5667..d4c1bf6edc5 100644 --- a/mindspore/ccsrc/frontend/parallel/context.cc +++ b/mindspore/ccsrc/frontend/parallel/context.cc @@ -45,7 +45,7 @@ std::shared_ptr ParallelContext::GetInstance() { ParallelContext::ParallelContext() { Reset(); } void ParallelContext::Reset() { - mirror_mean_ = false; + gradients_mean_ = false; full_batch_ = false; gradient_fp32_sync_ = true; loss_repeated_mean_ = true; @@ -74,7 +74,7 @@ void ParallelContext::set_global_rank(int32_t global_rank) { global_rank_is_set_ = true; } -void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_mean; } +void ParallelContext::set_gradients_mean(bool gradients_mean) { gradients_mean_ = gradients_mean; } void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; } diff --git a/mindspore/ccsrc/frontend/parallel/context.h b/mindspore/ccsrc/frontend/parallel/context.h index 828300af1cc..3f55f9a1528 100644 --- a/mindspore/ccsrc/frontend/parallel/context.h +++ b/mindspore/ccsrc/frontend/parallel/context.h @@ -52,8 +52,8 @@ class ParallelContext { static std::shared_ptr GetInstance(); - void set_mirror_mean(bool mirror_mean); - bool mirror_mean() const { return mirror_mean_; } + void set_gradients_mean(bool gradients_mean); + bool gradients_mean() const { return gradients_mean_; } void set_full_batch(bool full_batch); bool full_batch() const { return full_batch_; } @@ -107,7 +107,7 @@ class ParallelContext { private: ParallelContext(); static std::shared_ptr inst_context_; - bool mirror_mean_; + bool gradients_mean_; bool full_batch_; bool gradient_fp32_sync_; bool loss_repeated_mean_; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc index 60a1c783ecf..213b872e4b5 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc @@ -251,7 +251,7 @@ OperatorVector CreateMirrorOps(const std::string &group_name, size_t dev_num) { MS_LOG(EXCEPTION) << "Invalid dev num: " << dev_num; } OperatorVector op_for_weight; - bool mean_flag = ParallelContext::GetInstance()->mirror_mean(); + bool mean_flag = ParallelContext::GetInstance()->gradients_mean(); OperatorName operator_name = MIRROR_OPERATOR; ValuePtr attr0_value = MakeValue(group_name); diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index 9f934e429cb..ef36e912bcb 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -2488,7 +2488,7 @@ Status ParallelInit() { } MS_LOG(INFO) << "The parallel context: dev num: " << device_num << ", global rank: " << global_rank - << ", backend: " << backend << ", mirror_mean: " << ParallelContext::GetInstance()->mirror_mean() + << ", backend: " << backend << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean() << ", gradient_fp32_sync: " << ParallelContext::GetInstance()->gradient_fp32_sync(); return SUCCESS; } diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index 54b9bac552d..cc43ec6bf22 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -113,8 +113,8 @@ PYBIND11_MODULE(_c_expression, m) { .def("get_global_rank", &ParallelContext::global_rank, "Get global rank.") .def("set_global_rank", &ParallelContext::set_global_rank, "Set global rank.") .def("get_global_rank_is_set", &ParallelContext::global_rank_is_set, "Get global rank is set.") - .def("get_mirror_mean", &ParallelContext::mirror_mean, "Get mirror mean.") - .def("set_mirror_mean", &ParallelContext::set_mirror_mean, "Set mirror mean.") + .def("get_gradients_mean", &ParallelContext::gradients_mean, "Get mirror mean.") + .def("set_gradients_mean", &ParallelContext::set_gradients_mean, "Set mirror mean.") .def("get_gradient_fp32_sync", &ParallelContext::gradient_fp32_sync, "Get cast before mirror.") .def("set_gradient_fp32_sync", &ParallelContext::set_gradient_fp32_sync, "Set cast before mirror.") .def("get_loss_repeated_mean", &ParallelContext::loss_repeated_mean, "Get loss repeated mean.") diff --git a/mindspore/context.py b/mindspore/context.py index 1d4b76e57c8..8b5023f957d 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -323,7 +323,7 @@ def _context(): return _k_context -@args_type_check(device_num=int, global_rank=int, mirror_mean=bool, gradient_fp32_sync=bool, parallel_mode=str, +@args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool, parallel_mode=str, auto_parallel_search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str, strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool) def set_auto_parallel_context(**kwargs): @@ -341,8 +341,8 @@ def set_auto_parallel_context(**kwargs): Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. - mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. - "stand_alone" do not support mirror_mean. Default: False. + gradients_mean (bool): Whether to perform mean operator after all-reduce of mirror. + "stand_alone" do not support gradients_mean. Default: False. gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True.. "stand_alone", "data_parallel" and "hybrid_parallel" do not support gradient_fp32_sync. Default: True. @@ -380,7 +380,7 @@ def set_auto_parallel_context(**kwargs): Examples: >>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(global_rank=0) - >>> context.set_auto_parallel_context(mirror_mean=True) + >>> context.set_auto_parallel_context(gradients_mean=True) >>> context.set_auto_parallel_context(gradient_fp32_sync=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(parameter_broadcast=False) @@ -412,7 +412,7 @@ def reset_auto_parallel_context(): - device_num: 1. - global_rank: 0. - - mirror_mean: False. + - gradients_mean: False. - gradient_fp32_sync: True. - parallel_mode: "stand_alone". - parameter_broadcast: False. diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py index d24c166cafd..91194e8d784 100644 --- a/mindspore/nn/wrap/cell_wrapper.py +++ b/mindspore/nn/wrap/cell_wrapper.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================ """Cell_wrapper.""" -from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean, +from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) from mindspore.context import ParallelMode from ...common import dtype as mstype @@ -190,7 +190,7 @@ class TrainOneStepCell(Cell): if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py index 345854a8584..47f502e7802 100644 --- a/mindspore/nn/wrap/grad_reducer.py +++ b/mindspore/nn/wrap/grad_reducer.py @@ -279,7 +279,7 @@ class DistributedGradReducer(Cell): >>> ParallelMode.HYBRID_PARALLEL]: >>> self.reducer_flag = True >>> if self.reducer_flag: - >>> mean = context.get_auto_parallel_context("mirror_mean") + >>> mean = context.get_auto_parallel_context("gradients_mean") >>> if mean.get_device_num_is_set(): >>> degree = context.get_auto_parallel_context("device_num") >>> else: diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 999873ea6e7..19bd6b6580f 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -16,7 +16,7 @@ import mindspore.context as context from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.context import ParallelMode -from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean from ..cell import Cell from ...common import Tensor, RowTensor from ...common.parameter import Parameter @@ -231,7 +231,7 @@ class TrainOneStepWithLossScaleCell(Cell): self.grad_reducer = F.identity self.reducer_flag = self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL] if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py index 0cd11d7fb8e..6756912d641 100644 --- a/mindspore/parallel/_auto_parallel_context.py +++ b/mindspore/parallel/_auto_parallel_context.py @@ -95,23 +95,23 @@ class _AutoParallelContext: self.check_context_handle() return self._context_handle.get_global_rank() - def set_mirror_mean(self, mirror_mean): + def set_gradients_mean(self, gradients_mean): """ - Set mirror_mean flag. + Set gradients_mean flag. Note: - If mirror_mean is true, it will insert a div operator after parameter gradients allreduce. + If gradients_mean is true, it will insert a div operator after parameter gradients allreduce. Args: - mirror_mean (bool): The mirror_mean flag. + gradients_mean (bool): The gradients_mean flag. """ self.check_context_handle() - self._context_handle.set_mirror_mean(mirror_mean) + self._context_handle.set_gradients_mean(gradients_mean) - def get_mirror_mean(self): - """Get mirror_mean flag.""" + def get_gradients_mean(self): + """Get gradients_mean flag.""" self.check_context_handle() - return self._context_handle.get_mirror_mean() + return self._context_handle.get_gradients_mean() def set_gradient_fp32_sync(self, gradient_fp32_sync): """ @@ -453,7 +453,7 @@ def auto_parallel_context(): _set_auto_parallel_context_func_map = { "device_num": auto_parallel_context().set_device_num, "global_rank": auto_parallel_context().set_global_rank, - "mirror_mean": auto_parallel_context().set_mirror_mean, + "gradients_mean": auto_parallel_context().set_gradients_mean, "gradient_fp32_sync": auto_parallel_context().set_gradient_fp32_sync, "loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean, "parallel_mode": auto_parallel_context().set_parallel_mode, @@ -468,7 +468,7 @@ _set_auto_parallel_context_func_map = { _get_auto_parallel_context_func_map = { "device_num": auto_parallel_context().get_device_num, "global_rank": auto_parallel_context().get_global_rank, - "mirror_mean": auto_parallel_context().get_mirror_mean, + "gradients_mean": auto_parallel_context().get_gradients_mean, "gradient_fp32_sync": auto_parallel_context().get_gradient_fp32_sync, "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean, "parallel_mode": auto_parallel_context().get_parallel_mode, @@ -480,7 +480,7 @@ _get_auto_parallel_context_func_map = { "enable_parallel_optimizer": auto_parallel_context().get_enable_parallel_optimizer} -@args_type_check(device_num=int, global_rank=int, mirror_mean=bool, gradient_fp32_sync=bool, +@args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool, loss_repeated_mean=bool, parallel_mode=str, auto_parallel_search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str, strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool) @@ -495,7 +495,7 @@ def _set_auto_parallel_context(**kwargs): Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. - mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False. + gradients_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False. loss_repeated_mean (bool): Whether to perform mean operator in backward in the case of repeated calculations. Default: True. gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True. @@ -562,7 +562,7 @@ def _reset_auto_parallel_context(): - device_num: 1. - global_rank: 0. - - mirror_mean: False. + - gradients_mean: False. - gradient_fp32_sync: True. - parallel_mode: "stand_alone". - parameter_broadcast: False. diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py index ff1fbcc6c2b..1c93ae20029 100644 --- a/mindspore/parallel/_utils.py +++ b/mindspore/parallel/_utils.py @@ -88,9 +88,9 @@ def _to_full_tensor(elem, device_num, global_rank, scaling_sens=None): lst.append(Tensor(scaling_sens, mstype.float32)) return tuple(lst) -def _get_mirror_mean(): - """Get if using mirror_mean.""" - return auto_parallel_context().get_mirror_mean() +def _get_gradients_mean(): + """Get if using gradients_mean.""" + return auto_parallel_context().get_gradients_mean() def _get_device_num(): diff --git a/model_zoo/official/cv/deeplabv3/train.py b/model_zoo/official/cv/deeplabv3/train.py index 1e501105c02..da84215fd96 100644 --- a/model_zoo/official/cv/deeplabv3/train.py +++ b/model_zoo/official/cv/deeplabv3/train.py @@ -66,7 +66,7 @@ def model_fine_tune(flags, train_net, fix_weight_layer): para.requires_grad = False if __name__ == "__main__": if args_opt.distribute == "true": - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() args_opt.base_size = config.crop_size args_opt.crop_size = config.crop_size diff --git a/model_zoo/official/cv/faster_rcnn/train.py b/model_zoo/official/cv/faster_rcnn/train.py index 2add1913c5a..53238a0dd75 100644 --- a/model_zoo/official/cv/faster_rcnn/train.py +++ b/model_zoo/official/cv/faster_rcnn/train.py @@ -54,7 +54,7 @@ if __name__ == '__main__': rank = args_opt.rank_id device_num = args_opt.device_num context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True, parameter_broadcast=True) + gradients_mean=True, parameter_broadcast=True) init() else: rank = 0 diff --git a/model_zoo/official/cv/googlenet/train.py b/model_zoo/official/cv/googlenet/train.py index 3e0fae74118..53de6ef9c41 100644 --- a/model_zoo/official/cv/googlenet/train.py +++ b/model_zoo/official/cv/googlenet/train.py @@ -78,7 +78,7 @@ if __name__ == '__main__': if device_num > 1: context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) init() elif device_target == "GPU": init() @@ -86,7 +86,7 @@ if __name__ == '__main__': if device_num > 1: context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) else: raise ValueError("Unsupported platform.") diff --git a/model_zoo/official/cv/inceptionv3/train.py b/model_zoo/official/cv/inceptionv3/train.py index b3af5beefbd..4efc558ad35 100644 --- a/model_zoo/official/cv/inceptionv3/train.py +++ b/model_zoo/official/cv/inceptionv3/train.py @@ -58,7 +58,7 @@ if __name__ == '__main__': cfg.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: cfg.rank = 0 cfg.group_size = 1 diff --git a/model_zoo/official/cv/maskrcnn/train.py b/model_zoo/official/cv/maskrcnn/train.py index c9a40303633..0081cec6d3c 100644 --- a/model_zoo/official/cv/maskrcnn/train.py +++ b/model_zoo/official/cv/maskrcnn/train.py @@ -58,7 +58,7 @@ if __name__ == '__main__': rank = args_opt.rank_id device_num = args_opt.device_num context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True, parameter_broadcast=True) + gradients_mean=True, parameter_broadcast=True) init() else: rank = 0 diff --git a/model_zoo/official/cv/mobilenetv2/src/utils.py b/model_zoo/official/cv/mobilenetv2/src/utils.py index d0a09afb8b7..5a05f397a4a 100644 --- a/model_zoo/official/cv/mobilenetv2/src/utils.py +++ b/model_zoo/official/cv/mobilenetv2/src/utils.py @@ -39,7 +39,7 @@ def context_device_init(config): init("nccl") context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) elif config.platform == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, device_id=config.device_id, @@ -47,7 +47,7 @@ def context_device_init(config): if config.run_distribute: context.set_auto_parallel_context(device_num=config.rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([140]) init() else: diff --git a/model_zoo/official/cv/mobilenetv2_quant/train.py b/model_zoo/official/cv/mobilenetv2_quant/train.py index 40d9b83ae23..30455bbdd56 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/train.py +++ b/model_zoo/official/cv/mobilenetv2_quant/train.py @@ -57,7 +57,7 @@ elif args_opt.device_target == "GPU": init() context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False) @@ -77,7 +77,7 @@ def train_on_ascend(): context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, parameter_broadcast=True, - mirror_mean=True) + gradients_mean=True) init() # define network diff --git a/model_zoo/official/cv/mobilenetv3/train.py b/model_zoo/official/cv/mobilenetv3/train.py index 9f05fe4fde8..9ffa0b836f0 100644 --- a/model_zoo/official/cv/mobilenetv3/train.py +++ b/model_zoo/official/cv/mobilenetv3/train.py @@ -55,7 +55,7 @@ if args_opt.device_target == "GPU": init() context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) else: raise ValueError("Unsupported device_target.") diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py index fbf13ec9909..2f95a0dc764 100755 --- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py +++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py @@ -24,7 +24,7 @@ import mindspore.ops.composite as C import mindspore.common.dtype as mstype from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.train.parallel_utils import ParallelMode -from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean GRADIENT_CLIP_TYPE = 1 @@ -921,7 +921,7 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell): if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/model_zoo/official/cv/nasnet/train.py b/model_zoo/official/cv/nasnet/train.py index 290dc892e4d..46143c35ec5 100755 --- a/model_zoo/official/cv/nasnet/train.py +++ b/model_zoo/official/cv/nasnet/train.py @@ -58,7 +58,7 @@ if __name__ == '__main__': cfg.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: cfg.rank = 0 cfg.group_size = 1 diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py index 6d65cce48e8..a45e26bd834 100755 --- a/model_zoo/official/cv/resnet/train.py +++ b/model_zoo/official/cv/resnet/train.py @@ -76,7 +76,7 @@ if __name__ == '__main__': device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id, enable_auto_mixed_precision=True) context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) if args_opt.net == "resnet50" or args_opt.net == "se-resnet50": auto_parallel_context().set_all_reduce_fusion_split_indices([85, 160]) else: @@ -86,7 +86,7 @@ if __name__ == '__main__': else: init() context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) if args_opt.net == "resnet50": auto_parallel_context().set_all_reduce_fusion_split_indices([85, 160]) ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" diff --git a/model_zoo/official/cv/resnet50_quant/train.py b/model_zoo/official/cv/resnet50_quant/train.py index 927fe98c777..f4a3965c838 100755 --- a/model_zoo/official/cv/resnet50_quant/train.py +++ b/model_zoo/official/cv/resnet50_quant/train.py @@ -76,11 +76,11 @@ if __name__ == '__main__': context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, parameter_broadcast=True, - mirror_mean=True) + gradients_mean=True) init() context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) # define network diff --git a/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py b/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py index 35cbaa1460b..dbc7b3796a6 100644 --- a/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py +++ b/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py @@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell): >>> ParallelMode.HYBRID_PARALLEL]: >>> self.reducer_flag = True >>> if self.reducer_flag: - >>> mean = context.get_auto_parallel_context("mirror_mean") + >>> mean = context.get_auto_parallel_context("gradients_mean") >>> if mean.get_device_num_is_set(): >>> degree = context.get_auto_parallel_context("device_num") >>> else: diff --git a/model_zoo/official/cv/resnet_thor/src/thor.py b/model_zoo/official/cv/resnet_thor/src/thor.py index 7fc0d9b74ed..44b6930684b 100644 --- a/model_zoo/official/cv/resnet_thor/src/thor.py +++ b/model_zoo/official/cv/resnet_thor/src/thor.py @@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype from mindspore._checkparam import check_bool from mindspore._checkparam import Validator as validator from mindspore.nn.optim.optimizer import Optimizer -from mindspore.parallel._utils import _get_device_num, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_gradients_mean from src.grad_reducer_thor import DistributedGradReducerThor _momentum_opt = C.MultitypeFuncGraph("momentum_opt") @@ -85,7 +85,7 @@ class THOR_GPU(Optimizer): self.assign = P.Assign() self.mul = P.Mul() - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_thorA = DistributedGradReducerThor(self.parameters, 0, mean, degree) self.grad_reducer_thorG = DistributedGradReducerThor(self.parameters, 0, mean, degree) @@ -191,7 +191,7 @@ class THOR(Optimizer): 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0] - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_Amax = DistributedGradReducerThor(self.parameters, 2, mean, degree) self.grad_reducer_Gmax = DistributedGradReducerThor(self.parameters, 5, mean, degree) diff --git a/model_zoo/official/cv/resnet_thor/train.py b/model_zoo/official/cv/resnet_thor/train.py index 1b651086075..5d8ce2f38f8 100644 --- a/model_zoo/official/cv/resnet_thor/train.py +++ b/model_zoo/official/cv/resnet_thor/train.py @@ -94,7 +94,7 @@ if __name__ == '__main__': device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id, enable_auto_mixed_precision=True) context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107], "hccl_world_groupsum1") auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum2") auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum3") @@ -105,7 +105,7 @@ if __name__ == '__main__': else: init() context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107]) ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" diff --git a/model_zoo/official/cv/resnext50/eval.py b/model_zoo/official/cv/resnext50/eval.py index 06b4acfe05a..88e7ce8e4bc 100644 --- a/model_zoo/official/cv/resnext50/eval.py +++ b/model_zoo/official/cv/resnext50/eval.py @@ -117,7 +117,7 @@ def test(cloud_args=None): args.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: args.rank = 0 args.group_size = 1 diff --git a/model_zoo/official/cv/resnext50/train.py b/model_zoo/official/cv/resnext50/train.py index 8e7d4b0220e..d3a3873f3a2 100644 --- a/model_zoo/official/cv/resnext50/train.py +++ b/model_zoo/official/cv/resnext50/train.py @@ -179,7 +179,7 @@ def train(cloud_args=None): args.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: args.rank = 0 args.group_size = 1 diff --git a/model_zoo/official/cv/shufflenetv2/train.py b/model_zoo/official/cv/shufflenetv2/train.py index ed70f9186e4..066b225d9f2 100644 --- a/model_zoo/official/cv/shufflenetv2/train.py +++ b/model_zoo/official/cv/shufflenetv2/train.py @@ -60,7 +60,7 @@ if __name__ == '__main__': cfg.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: cfg.rank = 0 cfg.group_size = 1 diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py index 8fbca66c5c6..631aa44470a 100644 --- a/model_zoo/official/cv/ssd/src/ssd.py +++ b/model_zoo/official/cv/ssd/src/ssd.py @@ -392,7 +392,7 @@ class TrainingWrapper(nn.Cell): if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: diff --git a/model_zoo/official/cv/ssd/train.py b/model_zoo/official/cv/ssd/train.py index 34b1e3ca2b6..08aeaec8aa3 100644 --- a/model_zoo/official/cv/ssd/train.py +++ b/model_zoo/official/cv/ssd/train.py @@ -60,7 +60,7 @@ def main(): if args_opt.distribute: device_num = args_opt.device_num context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=device_num) init() rank = args_opt.device_id % device_num diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py index 832664eb0c1..1eeed9a0388 100644 --- a/model_zoo/official/cv/vgg16/train.py +++ b/model_zoo/official/cv/vgg16/train.py @@ -140,7 +140,7 @@ if __name__ == '__main__': device_num = args.group_size context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - parameter_broadcast=True, mirror_mean=True) + parameter_broadcast=True, gradients_mean=True) else: context.set_context(device_id=args.device_id) context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py index 1640342fad6..d58a72b4d3f 100755 --- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py +++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py @@ -14,7 +14,7 @@ # ============================================================================ """Automatic differentiation with grad clip.""" import numpy as np -from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean, +from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) from mindspore.context import ParallelMode from mindspore.common import dtype as mstype @@ -93,7 +93,7 @@ class TrainOneStepCellWithGradClip(Cell): if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/model_zoo/official/cv/warpctc/train.py b/model_zoo/official/cv/warpctc/train.py index db3775a04b7..bd4e64a7257 100755 --- a/model_zoo/official/cv/warpctc/train.py +++ b/model_zoo/official/cv/warpctc/train.py @@ -64,7 +64,7 @@ if __name__ == '__main__': context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) else: device_num = 1 rank = 0 diff --git a/model_zoo/official/cv/yolov3_darknet53/eval.py b/model_zoo/official/cv/yolov3_darknet53/eval.py index 7352db7a5d8..f9d74ba1c1d 100644 --- a/model_zoo/official/cv/yolov3_darknet53/eval.py +++ b/model_zoo/official/cv/yolov3_darknet53/eval.py @@ -255,7 +255,7 @@ def test(): context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE - context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=1) + context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py index eae0d9f028c..5a191b9b343 100644 --- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py @@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell): if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: diff --git a/model_zoo/official/cv/yolov3_darknet53/train.py b/model_zoo/official/cv/yolov3_darknet53/train.py index a17b32d66c5..f3449908ea0 100644 --- a/model_zoo/official/cv/yolov3_darknet53/train.py +++ b/model_zoo/official/cv/yolov3_darknet53/train.py @@ -178,7 +178,7 @@ def train(): else: parallel_mode = ParallelMode.STAND_ALONE degree = 1 - context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree) + context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py index 58b0067b562..a7a4297dbc2 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py @@ -254,7 +254,7 @@ def test(): context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE - context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=1) + context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py index 81b00303df0..755179cac1c 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py @@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell): if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/train.py b/model_zoo/official/cv/yolov3_darknet53_quant/train.py index 975ea993d6c..c0a0ba5b305 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/train.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/train.py @@ -162,7 +162,7 @@ def train(): else: parallel_mode = ParallelMode.STAND_ALONE degree = 1 - context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree) + context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py index 2fe4c8f07c6..1fecaba83e2 100644 --- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py +++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py @@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell): if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: diff --git a/model_zoo/official/cv/yolov3_resnet18/train.py b/model_zoo/official/cv/yolov3_resnet18/train.py index 8830df1af33..19ba0c20d7d 100644 --- a/model_zoo/official/cv/yolov3_resnet18/train.py +++ b/model_zoo/official/cv/yolov3_resnet18/train.py @@ -92,7 +92,7 @@ def main(): if args_opt.distribute: device_num = args_opt.device_num context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=device_num) init() rank = args_opt.device_id % device_num diff --git a/model_zoo/official/nlp/bert/run_pretrain.py b/model_zoo/official/nlp/bert/run_pretrain.py index b69ee52463f..6836da1d735 100644 --- a/model_zoo/official/nlp/bert/run_pretrain.py +++ b/model_zoo/official/nlp/bert/run_pretrain.py @@ -85,7 +85,7 @@ def run_pretrain(): ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(rank) + '/' context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=device_num) from mindspore.parallel._auto_parallel_context import auto_parallel_context if bert_net_cfg.num_hidden_layers == 12: diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py index 886cc15398d..1ad52ea07d9 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py @@ -66,7 +66,7 @@ class BertFinetuneCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) @@ -167,7 +167,7 @@ class BertSquadCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py index e257bedcf7c..616b4990208 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py @@ -283,7 +283,7 @@ class BertTrainOneStepCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/model_zoo/official/nlp/bert_thor/run_pretrain.py b/model_zoo/official/nlp/bert_thor/run_pretrain.py index e9e04b67a88..42091cdafd4 100644 --- a/model_zoo/official/nlp/bert_thor/run_pretrain.py +++ b/model_zoo/official/nlp/bert_thor/run_pretrain.py @@ -87,7 +87,7 @@ def run_pretrain(): ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(rank) + '/' context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=device_num) from mindspore.parallel._auto_parallel_context import auto_parallel_context if bert_net_cfg.num_hidden_layers == 12: diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py index 81271a69920..98b1023f7cb 100644 --- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py @@ -301,7 +301,7 @@ class BertTrainOneStepCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py b/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py index cd0cc348192..dab75c99af1 100644 --- a/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py +++ b/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py @@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell): >>> ParallelMode.HYBRID_PARALLEL]: >>> self.reducer_flag = True >>> if self.reducer_flag: - >>> mean = context.get_auto_parallel_context("mirror_mean") + >>> mean = context.get_auto_parallel_context("gradients_mean") >>> if mean.get_device_num_is_set(): >>> degree = context.get_auto_parallel_context("device_num") >>> else: diff --git a/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py b/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py index aeb3cf309fe..0cc7e33276a 100644 --- a/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py +++ b/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py @@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple from mindspore.common.tensor import Tensor from mindspore.nn.optim.optimizer import Optimizer from mindspore.ops import functional as F, composite as C, operations as P -from mindspore.parallel._utils import _get_device_num, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_gradients_mean from .grad_reducer_thor import DistributedGradReducerThor momentum_opt = C.MultitypeFuncGraph("momentum_opt") @@ -83,7 +83,7 @@ class THOR(Optimizer): self.damping = damping self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree) diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py index fd5974e162c..fee7aac2b34 100644 --- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py +++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py @@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter from mindspore.common import dtype as mstype from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.context import ParallelMode -from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean from .transformer import Transformer from .grad_clip import GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE, ClipGradients @@ -251,7 +251,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) diff --git a/model_zoo/official/nlp/mass/train.py b/model_zoo/official/nlp/mass/train.py index 408ece34cbc..80ed331b54f 100644 --- a/model_zoo/official/nlp/mass/train.py +++ b/model_zoo/official/nlp/mass/train.py @@ -234,7 +234,7 @@ def _setup_parallel_env(platform): parallel_mode=ParallelMode.DATA_PARALLEL, device_num=MultiAscend.get_group_size(), parameter_broadcast=True, - mirror_mean=True + gradients_mean=True ) diff --git a/model_zoo/official/nlp/tinybert/run_general_distill.py b/model_zoo/official/nlp/tinybert/run_general_distill.py index 7257c5883e3..cf7d876ae81 100644 --- a/model_zoo/official/nlp/tinybert/run_general_distill.py +++ b/model_zoo/official/nlp/tinybert/run_general_distill.py @@ -81,7 +81,7 @@ def run_general_distill(): rank = D.get_rank() save_ckpt_dir = save_ckpt_dir + '_ckpt_' + str(rank) context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=device_num) else: rank = 0 diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py index 4e595ec4e66..b4b927c1f9e 100644 --- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py +++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py @@ -318,7 +318,7 @@ class BertTrainCell(nn.Cell): self.grad_reducer = F.identity self.degree = 1 if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, self.degree) self.cast = P.Cast() @@ -568,7 +568,7 @@ class BertEvaluationCell(nn.Cell): self.grad_reducer = F.identity self.degree = 1 if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") self.degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, self.degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py index f26396d1f1b..a58a3fcce7b 100644 --- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py +++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py @@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.common import dtype as mstype from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.context import ParallelMode -from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean from mindspore.communication.management import get_group_size from mindspore import context from .transformer_model import TransformerModel @@ -168,7 +168,7 @@ class TransformerTrainOneStepCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) @@ -256,7 +256,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) diff --git a/model_zoo/official/nlp/transformer/train.py b/model_zoo/official/nlp/transformer/train.py index 45207e19ee0..a1d040fad4c 100644 --- a/model_zoo/official/nlp/transformer/train.py +++ b/model_zoo/official/nlp/transformer/train.py @@ -118,7 +118,7 @@ def run_transformer_train(): if args.distribute == "true": device_num = args.device_num context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True, device_num=device_num) D.init() rank_id = args.device_id % device_num diff --git a/model_zoo/official/recommend/deepfm/train.py b/model_zoo/official/recommend/deepfm/train.py index db660737ea1..f3299a42d69 100644 --- a/model_zoo/official/recommend/deepfm/train.py +++ b/model_zoo/official/recommend/deepfm/train.py @@ -56,7 +56,7 @@ if __name__ == '__main__': device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=device_id) context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() rank_id = int(os.environ.get('RANK_ID')) elif args_opt.device_target == "GPU": @@ -65,7 +65,7 @@ if __name__ == '__main__': context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) + gradients_mean=True) rank_id = get_rank() else: print("Unsupported device_target ", args_opt.device_target) diff --git a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py index 6a03d6feff1..e579b26e1c8 100644 --- a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py +++ b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py @@ -367,7 +367,7 @@ class TrainStepWrap(nn.Cell): self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL) if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = context.get_auto_parallel_context("device_num") self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree) self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree) diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py index f20015c8079..24385da6b31 100644 --- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py +++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py @@ -147,8 +147,8 @@ if __name__ == "__main__": init() if wide_deep_config.host_device_mix == 1: context.set_auto_parallel_context( - parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, mirror_mean=True) + parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=True) else: context.set_auto_parallel_context( - parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) + parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True) train_and_eval(wide_deep_config) diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py index a460ec42ff2..7aed065b9ec 100644 --- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py +++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py @@ -119,7 +119,7 @@ if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target, save_graphs=True) init() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=get_group_size()) train_and_eval(wide_deep_config) diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py index b4f4e3c59e6..232488ceffd 100644 --- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py +++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py @@ -119,7 +119,7 @@ if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target) init() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=get_group_size()) train_and_eval(wide_deep_config) diff --git a/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py b/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py index 7d9b566a885..246fa2a82aa 100644 --- a/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py +++ b/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py @@ -554,7 +554,7 @@ class TrainStepWrap(nn.Cell): ParallelMode.HYBRID_PARALLEL): self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = context.get_auto_parallel_context("device_num") self.grad_reducer_w = DistributedGradReducer( self.optimizer_w.parameters, mean, degree) diff --git a/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py b/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py index 4383a930715..0f278d16680 100644 --- a/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py +++ b/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py @@ -113,6 +113,6 @@ if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target="Davinci", save_graphs=True) init() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=get_group_size()) train_and_eval(wide_and_deep_config) diff --git a/tests/st/auto_parallel/resnet50_expand_loss.py b/tests/st/auto_parallel/resnet50_expand_loss.py index fc6d00ff612..de78b907288 100644 --- a/tests/st/auto_parallel/resnet50_expand_loss.py +++ b/tests/st/auto_parallel/resnet50_expand_loss.py @@ -34,7 +34,7 @@ from mindspore.context import ParallelMode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(device_id=int(os.getenv('DEVICE_ID'))) init() -context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL) +context.set_auto_parallel_context(gradients_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL) np.random.seed(10) diff --git a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py index 857b9579c3f..1f46eed9f1a 100644 --- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py +++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py @@ -31,7 +31,7 @@ from src.config import WideDeepConfig sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) -context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, mirror_mean=True) +context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=True) init() diff --git a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py index 6944b46b2d5..3043055f2a5 100644 --- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py +++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py @@ -24,7 +24,7 @@ from mindspore.nn.optim import Adam, FTRL # from mindspore.nn.metrics import Metric from mindspore.common.initializer import Uniform, initializer # from mindspore.train.callback import ModelCheckpoint, CheckpointConfig -from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean from mindspore.context import ParallelMode from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.communication.management import get_group_size @@ -299,7 +299,7 @@ class TrainStepWrap(nn.Cell): self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL) if self.reducer_flag: - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree) self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree) diff --git a/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py b/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py index b76f9d28ef2..77d301f556d 100644 --- a/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py +++ b/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py @@ -30,7 +30,7 @@ from src.config import WideDeepConfig sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) -context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) +context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() diff --git a/tests/st/model_zoo_tests/yolov3/src/yolov3.py b/tests/st/model_zoo_tests/yolov3/src/yolov3.py index 2fe4c8f07c6..1fecaba83e2 100644 --- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py +++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py @@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell): if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]: self.reducer_flag = True if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") if auto_parallel_context().get_device_num_is_set(): degree = context.get_auto_parallel_context("device_num") else: diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py index d4c08b9e2ed..0869e459c8c 100644 --- a/tests/st/nccl/test_nccl_lenet.py +++ b/tests/st/nccl/test_nccl_lenet.py @@ -78,7 +78,7 @@ def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32): def test_lenet_nccl(): - context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size()) + context.set_auto_parallel_context(parallel_mode="data_parallel", gradients_mean=True, device_num=get_group_size()) net = LeNet() net.set_train() diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py index 0a0675a8051..cd60334f746 100644 --- a/tests/st/networks/models/bert/src/bert_for_pre_training.py +++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py @@ -279,7 +279,7 @@ class BertTrainOneStepCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py index bcea50dc3b4..9adda84731d 100644 --- a/tests/st/networks/models/bert/src/utils.py +++ b/tests/st/networks/models/bert/src/utils.py @@ -61,7 +61,7 @@ class BertFinetuneCell(nn.Cell): self.reducer_flag = True self.grad_reducer = None if self.reducer_flag: - mean = context.get_auto_parallel_context("mirror_mean") + mean = context.get_auto_parallel_context("gradients_mean") degree = get_group_size() self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE) diff --git a/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py b/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py index 97d2cb22702..02c37b1127d 100644 --- a/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py +++ b/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py @@ -130,7 +130,7 @@ class DistributedGradReducerThor(Cell): >>> ParallelMode.HYBRID_PARALLEL]: >>> self.reducer_flag = True >>> if self.reducer_flag: - >>> mean = context.get_auto_parallel_context("mirror_mean") + >>> mean = context.get_auto_parallel_context("gradients_mean") >>> if mean.get_device_num_is_set(): >>> degree = context.get_auto_parallel_context("device_num") >>> else: diff --git a/tests/st/networks/models/resnet50/src_thor/thor.py b/tests/st/networks/models/resnet50/src_thor/thor.py index d4469a58271..b5b1faa1d50 100644 --- a/tests/st/networks/models/resnet50/src_thor/thor.py +++ b/tests/st/networks/models/resnet50/src_thor/thor.py @@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple from mindspore.common.tensor import Tensor from mindspore.nn.optim.optimizer import Optimizer from mindspore.ops import functional as F, composite as C, operations as P -from mindspore.parallel._utils import _get_device_num, _get_mirror_mean +from mindspore.parallel._utils import _get_device_num, _get_gradients_mean from .grad_reducer_thor import DistributedGradReducerThor @@ -87,7 +87,7 @@ class THOR(Optimizer): 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0] - mean = _get_mirror_mean() + mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_Amax = DistributedGradReducerThor(self.parameters, 2, mean, degree) self.grad_reducer_Gmax = DistributedGradReducerThor(self.parameters, 5, mean, degree) diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py index 7be7ef89a0c..28ed8b54893 100644 --- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py +++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py @@ -137,7 +137,7 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl): os.environ['RANK_SIZE'] = str(device_num) if enable_hccl: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True, parameter_broadcast=True) + gradients_mean=True, parameter_broadcast=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) init() @@ -240,7 +240,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): os.environ['RANK_SIZE'] = str(device_num) if enable_hccl: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True, parameter_broadcast=True) + gradients_mean=True, parameter_broadcast=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107], "hccl_world_groupsum1") auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum2") auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum3") diff --git a/tests/st/ps/multi_full_ps/test_multi_full_ps.py b/tests/st/ps/multi_full_ps/test_multi_full_ps.py index e33212ce983..99e8bb2322a 100644 --- a/tests/st/ps/multi_full_ps/test_multi_full_ps.py +++ b/tests/st/ps/multi_full_ps/test_multi_full_ps.py @@ -97,7 +97,8 @@ if __name__ == "__main__": criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) if device_target == "GPU": - context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size()) + context.set_auto_parallel_context(parallel_mode="data_parallel", gradients_mean=True, + device_num=get_group_size()) net_with_criterion = WithLossCell(network, criterion) train_network = TrainOneStepCell(net_with_criterion, net_opt) train_network.set_train() diff --git a/tests/ut/python/communication/test_data_parallel_dense.py b/tests/ut/python/communication/test_data_parallel_dense.py index d2fdf2d1589..c80f4c5a2f0 100644 --- a/tests/ut/python/communication/test_data_parallel_dense.py +++ b/tests/ut/python/communication/test_data_parallel_dense.py @@ -58,7 +58,7 @@ def test_data_parallel_dense(): """test_data_parallel_dense""" context.set_context(mode=context.GRAPH_MODE) context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8) + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8) inp = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01) label = Tensor(np.zeros([32, 768]).astype(np.float32)) net = DenseMMNet() diff --git a/tests/ut/python/communication/test_data_parallel_lenet.py b/tests/ut/python/communication/test_data_parallel_lenet.py index 0897023a69e..2908f89b8f4 100755 --- a/tests/ut/python/communication/test_data_parallel_lenet.py +++ b/tests/ut/python/communication/test_data_parallel_lenet.py @@ -80,7 +80,7 @@ def test_lenet5_train_step_training_pynative(): context.set_context(mode=context.PYNATIVE_MODE) context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, - device_num=8, mirror_mean=True) + device_num=8, gradients_mean=True) predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.zeros([1, 10]).astype(np.float32)) DatasetLenet(predict, label, 2) diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py index cfed2beb27e..d311f0b40bb 100644 --- a/tests/ut/python/model/test_mix_precision.py +++ b/tests/ut/python/model/test_mix_precision.py @@ -97,7 +97,7 @@ def test_on_momentum(): def test_data_parallel_with_cast(): """test_data_parallel_with_cast""" context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8) + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8) predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.zeros([1, 10]).astype(np.float32)) net = LeNet5() diff --git a/tests/ut/python/parallel/test_optimizer.py b/tests/ut/python/parallel/test_optimizer.py index 0d6de9526a1..112069f5f19 100644 --- a/tests/ut/python/parallel/test_optimizer.py +++ b/tests/ut/python/parallel/test_optimizer.py @@ -46,7 +46,7 @@ class Net(nn.Cell): def test_dense_gen_graph(): context.set_context(mode=context.GRAPH_MODE) context.reset_auto_parallel_context() - context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8) + context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, gradients_mean=True, device_num=8) init() network = Net(512, 128) diff --git a/tests/ut/python/parallel/test_set_auto_parallel_context.py b/tests/ut/python/parallel/test_set_auto_parallel_context.py index ff69b3bee84..56957847405 100644 --- a/tests/ut/python/parallel/test_set_auto_parallel_context.py +++ b/tests/ut/python/parallel/test_set_auto_parallel_context.py @@ -20,17 +20,17 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context def test_set_auto_parallel_context(): - context.set_auto_parallel_context(device_num=4, global_rank=3, mirror_mean=True, gradient_fp32_sync=False, + context.set_auto_parallel_context(device_num=4, global_rank=3, gradients_mean=True, gradient_fp32_sync=False, parallel_mode="auto_parallel", parameter_broadcast=False) device_num = context.get_auto_parallel_context("device_num") global_rank = context.get_auto_parallel_context("global_rank") - mirror_mean = context.get_auto_parallel_context("mirror_mean") + gradients_mean = context.get_auto_parallel_context("gradients_mean") gradient_fp32_sync = context.get_auto_parallel_context("gradient_fp32_sync") parallel_mode = context.get_auto_parallel_context("parallel_mode") parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast") assert device_num == 4 assert global_rank == 3 - assert mirror_mean + assert gradients_mean assert not gradient_fp32_sync assert parallel_mode == "auto_parallel" assert not parameter_broadcast @@ -45,9 +45,9 @@ def test_set_auto_parallel_context(): global_rank = auto_parallel_context().get_global_rank() assert global_rank == 4 - auto_parallel_context().set_mirror_mean(True) - mirror_mean = auto_parallel_context().get_mirror_mean() - assert mirror_mean + auto_parallel_context().set_gradients_mean(True) + gradients_mean = auto_parallel_context().get_gradients_mean() + assert gradients_mean auto_parallel_context().set_gradient_fp32_sync(False) gradient_fp32_sync = auto_parallel_context().get_gradient_fp32_sync() @@ -86,7 +86,7 @@ def test_reset_auto_parallel_context(): context.reset_auto_parallel_context() device_num = context.get_auto_parallel_context("device_num") global_rank = context.get_auto_parallel_context("global_rank") - mirror_mean = context.get_auto_parallel_context("mirror_mean") + gradients_mean = context.get_auto_parallel_context("gradients_mean") gradient_fp32_sync = context.get_auto_parallel_context("gradient_fp32_sync") parallel_mode = context.get_auto_parallel_context("parallel_mode") parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast") @@ -94,7 +94,7 @@ def test_reset_auto_parallel_context(): parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set() assert device_num == 1 assert global_rank == 0 - assert not mirror_mean + assert not gradients_mean assert gradient_fp32_sync assert parallel_mode == "stand_alone" assert not parameter_broadcast diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py index 854df0ca71f..cf91af463d5 100644 --- a/tests/ut/python/parallel/test_two_matmul.py +++ b/tests/ut/python/parallel/test_two_matmul.py @@ -65,7 +65,7 @@ def test_two_matmul(): out = self.matmul2(out, b) return out - context.set_auto_parallel_context(device_num=8, global_rank=0, mirror_mean=True) + context.set_auto_parallel_context(device_num=8, global_rank=0, gradients_mean=True) strategy1 = ((4, 2), (2, 1)) strategy2 = ((2, 4), (4, 1)) net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) @@ -90,7 +90,7 @@ def test_two_matmul_repeated_calculation1(): out = self.matmul2(out, b) return out - context.set_auto_parallel_context(device_num=64, global_rank=5, mirror_mean=True) + context.set_auto_parallel_context(device_num=64, global_rank=5, gradients_mean=True) strategy1 = ((2, 4), (4, 8)) strategy2 = ((1, 1), (1, 1)) net = GradWrap(NetWithLoss(Net(strategy1, strategy2))) diff --git a/tests/ut/python/train/test_amp.py b/tests/ut/python/train/test_amp.py index 074056f3267..03102025046 100644 --- a/tests/ut/python/train/test_amp.py +++ b/tests/ut/python/train/test_amp.py @@ -148,7 +148,7 @@ def test_compile_model_train_O2_parallel(): dataset_shapes = ((16, 16), (16, 16)) context.set_auto_parallel_context( global_rank=0, device_num=8, - mirror_mean=True, parameter_broadcast=True, + gradients_mean=True, parameter_broadcast=True, parallel_mode=ParallelMode.DATA_PARALLEL) dataset = MindDataSet(dataset_types, dataset_shapes)