From d4cfe55c040ff3fc5998d09ef195ffc8167af756 Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Fri, 4 Sep 2020 14:31:12 +0800
Subject: [PATCH] rename mirror_mean to gradients_mean

---
 mindspore/ccsrc/frontend/parallel/context.cc  |  4 +--
 mindspore/ccsrc/frontend/parallel/context.h   |  6 ++---
 .../parallel/ops_info/operator_info.cc        |  2 +-
 .../ccsrc/frontend/parallel/step_parallel.cc  |  2 +-
 mindspore/ccsrc/pipeline/jit/init.cc          |  4 +--
 mindspore/context.py                          | 10 +++----
 mindspore/nn/wrap/cell_wrapper.py             |  4 +--
 mindspore/nn/wrap/grad_reducer.py             |  2 +-
 mindspore/nn/wrap/loss_scale.py               |  4 +--
 mindspore/parallel/_auto_parallel_context.py  | 26 +++++++++----------
 mindspore/parallel/_utils.py                  |  6 ++---
 model_zoo/official/cv/deeplabv3/train.py      |  2 +-
 model_zoo/official/cv/faster_rcnn/train.py    |  2 +-
 model_zoo/official/cv/googlenet/train.py      |  4 +--
 model_zoo/official/cv/inceptionv3/train.py    |  2 +-
 model_zoo/official/cv/maskrcnn/train.py       |  2 +-
 .../official/cv/mobilenetv2/src/utils.py      |  4 +--
 .../official/cv/mobilenetv2_quant/train.py    |  4 +--
 model_zoo/official/cv/mobilenetv3/train.py    |  2 +-
 .../official/cv/nasnet/src/nasnet_a_mobile.py |  4 +--
 model_zoo/official/cv/nasnet/train.py         |  2 +-
 model_zoo/official/cv/resnet/train.py         |  4 +--
 model_zoo/official/cv/resnet50_quant/train.py |  4 +--
 .../cv/resnet_thor/src/grad_reducer_thor.py   |  2 +-
 model_zoo/official/cv/resnet_thor/src/thor.py |  6 ++---
 model_zoo/official/cv/resnet_thor/train.py    |  4 +--
 model_zoo/official/cv/resnext50/eval.py       |  2 +-
 model_zoo/official/cv/resnext50/train.py      |  2 +-
 model_zoo/official/cv/shufflenetv2/train.py   |  2 +-
 model_zoo/official/cv/ssd/src/ssd.py          |  2 +-
 model_zoo/official/cv/ssd/train.py            |  2 +-
 model_zoo/official/cv/vgg16/train.py          |  2 +-
 .../cv/warpctc/src/warpctc_for_train.py       |  4 +--
 model_zoo/official/cv/warpctc/train.py        |  2 +-
 .../official/cv/yolov3_darknet53/eval.py      |  2 +-
 .../official/cv/yolov3_darknet53/src/yolo.py  |  2 +-
 .../official/cv/yolov3_darknet53/train.py     |  2 +-
 .../cv/yolov3_darknet53_quant/eval.py         |  2 +-
 .../cv/yolov3_darknet53_quant/src/yolo.py     |  2 +-
 .../cv/yolov3_darknet53_quant/train.py        |  2 +-
 .../official/cv/yolov3_resnet18/src/yolov3.py |  2 +-
 .../official/cv/yolov3_resnet18/train.py      |  2 +-
 model_zoo/official/nlp/bert/run_pretrain.py   |  2 +-
 .../nlp/bert/src/bert_for_finetune.py         |  4 +--
 .../nlp/bert/src/bert_for_pre_training.py     |  2 +-
 .../official/nlp/bert_thor/run_pretrain.py    |  2 +-
 .../bert_thor/src/bert_for_pre_training.py    |  2 +-
 .../nlp/bert_thor/src/grad_reducer_thor.py    |  2 +-
 .../nlp/bert_thor/src/thor_for_bert_arg.py    |  4 +--
 .../src/transformer/transformer_for_train.py  |  4 +--
 model_zoo/official/nlp/mass/train.py          |  2 +-
 .../nlp/tinybert/run_general_distill.py       |  2 +-
 .../nlp/tinybert/src/tinybert_for_gd_td.py    |  4 +--
 .../transformer/src/transformer_for_train.py  |  6 ++---
 model_zoo/official/nlp/transformer/train.py   |  2 +-
 model_zoo/official/recommend/deepfm/train.py  |  4 +--
 .../wide_and_deep/src/wide_and_deep.py        |  2 +-
 .../train_and_eval_auto_parallel.py           |  4 +--
 .../train_and_eval_distribute.py              |  2 +-
 .../train_and_eval_parameter_server.py        |  2 +-
 .../src/wide_and_deep.py                      |  2 +-
 .../train_and_eval_distribute.py              |  2 +-
 .../st/auto_parallel/resnet50_expand_loss.py  |  2 +-
 .../train_and_test_multinpu_ci.py             |  2 +-
 .../python_file_for_ci/wide_and_deep.py       |  4 +--
 ...rain_and_test_multinpu_ci_data_parallel.py |  2 +-
 tests/st/model_zoo_tests/yolov3/src/yolov3.py |  2 +-
 tests/st/nccl/test_nccl_lenet.py              |  2 +-
 .../models/bert/src/bert_for_pre_training.py  |  2 +-
 tests/st/networks/models/bert/src/utils.py    |  2 +-
 .../resnet50/src_thor/grad_reducer_thor.py    |  2 +-
 .../networks/models/resnet50/src_thor/thor.py |  4 +--
 .../models/resnet50/test_resnet50_imagenet.py |  4 +--
 .../st/ps/multi_full_ps/test_multi_full_ps.py |  3 ++-
 .../communication/test_data_parallel_dense.py |  2 +-
 .../communication/test_data_parallel_lenet.py |  2 +-
 tests/ut/python/model/test_mix_precision.py   |  2 +-
 tests/ut/python/parallel/test_optimizer.py    |  2 +-
 .../test_set_auto_parallel_context.py         | 16 ++++++------
 tests/ut/python/parallel/test_two_matmul.py   |  4 +--
 tests/ut/python/train/test_amp.py             |  2 +-
 81 files changed, 135 insertions(+), 134 deletions(-)

diff --git a/mindspore/ccsrc/frontend/parallel/context.cc b/mindspore/ccsrc/frontend/parallel/context.cc
index 6624e8e5667..d4c1bf6edc5 100644
--- a/mindspore/ccsrc/frontend/parallel/context.cc
+++ b/mindspore/ccsrc/frontend/parallel/context.cc
@@ -45,7 +45,7 @@ std::shared_ptr<ParallelContext> ParallelContext::GetInstance() {
 ParallelContext::ParallelContext() { Reset(); }
 
 void ParallelContext::Reset() {
-  mirror_mean_ = false;
+  gradients_mean_ = false;
   full_batch_ = false;
   gradient_fp32_sync_ = true;
   loss_repeated_mean_ = true;
@@ -74,7 +74,7 @@ void ParallelContext::set_global_rank(int32_t global_rank) {
   global_rank_is_set_ = true;
 }
 
-void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_mean; }
+void ParallelContext::set_gradients_mean(bool gradients_mean) { gradients_mean_ = gradients_mean; }
 
 void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; }
 
diff --git a/mindspore/ccsrc/frontend/parallel/context.h b/mindspore/ccsrc/frontend/parallel/context.h
index 828300af1cc..3f55f9a1528 100644
--- a/mindspore/ccsrc/frontend/parallel/context.h
+++ b/mindspore/ccsrc/frontend/parallel/context.h
@@ -52,8 +52,8 @@ class ParallelContext {
 
   static std::shared_ptr<ParallelContext> GetInstance();
 
-  void set_mirror_mean(bool mirror_mean);
-  bool mirror_mean() const { return mirror_mean_; }
+  void set_gradients_mean(bool gradients_mean);
+  bool gradients_mean() const { return gradients_mean_; }
 
   void set_full_batch(bool full_batch);
   bool full_batch() const { return full_batch_; }
@@ -107,7 +107,7 @@ class ParallelContext {
  private:
   ParallelContext();
   static std::shared_ptr<ParallelContext> inst_context_;
-  bool mirror_mean_;
+  bool gradients_mean_;
   bool full_batch_;
   bool gradient_fp32_sync_;
   bool loss_repeated_mean_;
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
index 60a1c783ecf..213b872e4b5 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
@@ -251,7 +251,7 @@ OperatorVector CreateMirrorOps(const std::string &group_name, size_t dev_num) {
     MS_LOG(EXCEPTION) << "Invalid dev num: " << dev_num;
   }
   OperatorVector op_for_weight;
-  bool mean_flag = ParallelContext::GetInstance()->mirror_mean();
+  bool mean_flag = ParallelContext::GetInstance()->gradients_mean();
 
   OperatorName operator_name = MIRROR_OPERATOR;
   ValuePtr attr0_value = MakeValue(group_name);
diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
index 9f934e429cb..ef36e912bcb 100644
--- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
@@ -2488,7 +2488,7 @@ Status ParallelInit() {
   }
 
   MS_LOG(INFO) << "The parallel context: dev num: " << device_num << ", global rank: " << global_rank
-               << ", backend: " << backend << ", mirror_mean: " << ParallelContext::GetInstance()->mirror_mean()
+               << ", backend: " << backend << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean()
                << ", gradient_fp32_sync: " << ParallelContext::GetInstance()->gradient_fp32_sync();
   return SUCCESS;
 }
diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc
index 54b9bac552d..cc43ec6bf22 100644
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@@ -113,8 +113,8 @@ PYBIND11_MODULE(_c_expression, m) {
     .def("get_global_rank", &ParallelContext::global_rank, "Get global rank.")
     .def("set_global_rank", &ParallelContext::set_global_rank, "Set global rank.")
     .def("get_global_rank_is_set", &ParallelContext::global_rank_is_set, "Get global rank is set.")
-    .def("get_mirror_mean", &ParallelContext::mirror_mean, "Get mirror mean.")
-    .def("set_mirror_mean", &ParallelContext::set_mirror_mean, "Set mirror mean.")
+    .def("get_gradients_mean", &ParallelContext::gradients_mean, "Get mirror mean.")
+    .def("set_gradients_mean", &ParallelContext::set_gradients_mean, "Set mirror mean.")
     .def("get_gradient_fp32_sync", &ParallelContext::gradient_fp32_sync, "Get cast before mirror.")
     .def("set_gradient_fp32_sync", &ParallelContext::set_gradient_fp32_sync, "Set cast before mirror.")
     .def("get_loss_repeated_mean", &ParallelContext::loss_repeated_mean, "Get loss repeated mean.")
diff --git a/mindspore/context.py b/mindspore/context.py
index 1d4b76e57c8..8b5023f957d 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -323,7 +323,7 @@ def _context():
     return _k_context
 
 
-@args_type_check(device_num=int, global_rank=int, mirror_mean=bool, gradient_fp32_sync=bool, parallel_mode=str,
+@args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool, parallel_mode=str,
                  auto_parallel_search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str,
                  strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool)
 def set_auto_parallel_context(**kwargs):
@@ -341,8 +341,8 @@ def set_auto_parallel_context(**kwargs):
     Args:
         device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
         global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
-        mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror.
-                     "stand_alone" do not support mirror_mean. Default: False.
+        gradients_mean (bool): Whether to perform mean operator after all-reduce of mirror.
+                     "stand_alone" do not support gradients_mean. Default: False.
         gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True..
                      "stand_alone", "data_parallel" and "hybrid_parallel" do not support
                      gradient_fp32_sync. Default: True.
@@ -380,7 +380,7 @@ def set_auto_parallel_context(**kwargs):
     Examples:
         >>> context.set_auto_parallel_context(device_num=8)
         >>> context.set_auto_parallel_context(global_rank=0)
-        >>> context.set_auto_parallel_context(mirror_mean=True)
+        >>> context.set_auto_parallel_context(gradients_mean=True)
         >>> context.set_auto_parallel_context(gradient_fp32_sync=False)
         >>> context.set_auto_parallel_context(parallel_mode="auto_parallel")
         >>> context.set_auto_parallel_context(parameter_broadcast=False)
@@ -412,7 +412,7 @@ def reset_auto_parallel_context():
 
     - device_num: 1.
     - global_rank: 0.
-    - mirror_mean: False.
+    - gradients_mean: False.
     - gradient_fp32_sync: True.
     - parallel_mode: "stand_alone".
     - parameter_broadcast: False.
diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py
index d24c166cafd..91194e8d784 100644
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """Cell_wrapper."""
-from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean,
+from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean,
                                        _get_parallel_mode)
 from mindspore.context import ParallelMode
 from ...common import dtype as mstype
@@ -190,7 +190,7 @@ class TrainOneStepCell(Cell):
         if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index 345854a8584..47f502e7802 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -279,7 +279,7 @@ class DistributedGradReducer(Cell):
         >>>                                            ParallelMode.HYBRID_PARALLEL]:
         >>>             self.reducer_flag = True
         >>>         if self.reducer_flag:
-        >>>             mean = context.get_auto_parallel_context("mirror_mean")
+        >>>             mean = context.get_auto_parallel_context("gradients_mean")
         >>>             if mean.get_device_num_is_set():
         >>>                 degree = context.get_auto_parallel_context("device_num")
         >>>             else:
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 999873ea6e7..19bd6b6580f 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -16,7 +16,7 @@
 import mindspore.context as context
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.context import ParallelMode
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 from ..cell import Cell
 from ...common import Tensor, RowTensor
 from ...common.parameter import Parameter
@@ -231,7 +231,7 @@ class TrainOneStepWithLossScaleCell(Cell):
         self.grad_reducer = F.identity
         self.reducer_flag = self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE
diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py
index 0cd11d7fb8e..6756912d641 100644
--- a/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/parallel/_auto_parallel_context.py
@@ -95,23 +95,23 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_global_rank()
 
-    def set_mirror_mean(self, mirror_mean):
+    def set_gradients_mean(self, gradients_mean):
         """
-        Set mirror_mean flag.
+        Set gradients_mean flag.
 
         Note:
-            If mirror_mean is true, it will insert a div operator after parameter gradients allreduce.
+            If gradients_mean is true, it will insert a div operator after parameter gradients allreduce.
 
         Args:
-            mirror_mean (bool): The mirror_mean flag.
+            gradients_mean (bool): The gradients_mean flag.
         """
         self.check_context_handle()
-        self._context_handle.set_mirror_mean(mirror_mean)
+        self._context_handle.set_gradients_mean(gradients_mean)
 
-    def get_mirror_mean(self):
-        """Get mirror_mean flag."""
+    def get_gradients_mean(self):
+        """Get gradients_mean flag."""
         self.check_context_handle()
-        return self._context_handle.get_mirror_mean()
+        return self._context_handle.get_gradients_mean()
 
     def set_gradient_fp32_sync(self, gradient_fp32_sync):
         """
@@ -453,7 +453,7 @@ def auto_parallel_context():
 _set_auto_parallel_context_func_map = {
     "device_num": auto_parallel_context().set_device_num,
     "global_rank": auto_parallel_context().set_global_rank,
-    "mirror_mean": auto_parallel_context().set_mirror_mean,
+    "gradients_mean": auto_parallel_context().set_gradients_mean,
     "gradient_fp32_sync": auto_parallel_context().set_gradient_fp32_sync,
     "loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean,
     "parallel_mode": auto_parallel_context().set_parallel_mode,
@@ -468,7 +468,7 @@ _set_auto_parallel_context_func_map = {
 _get_auto_parallel_context_func_map = {
     "device_num": auto_parallel_context().get_device_num,
     "global_rank": auto_parallel_context().get_global_rank,
-    "mirror_mean": auto_parallel_context().get_mirror_mean,
+    "gradients_mean": auto_parallel_context().get_gradients_mean,
     "gradient_fp32_sync": auto_parallel_context().get_gradient_fp32_sync,
     "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
     "parallel_mode": auto_parallel_context().get_parallel_mode,
@@ -480,7 +480,7 @@ _get_auto_parallel_context_func_map = {
     "enable_parallel_optimizer": auto_parallel_context().get_enable_parallel_optimizer}
 
 
-@args_type_check(device_num=int, global_rank=int, mirror_mean=bool, gradient_fp32_sync=bool,
+@args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool,
                  loss_repeated_mean=bool, parallel_mode=str, auto_parallel_search_mode=str,
                  parameter_broadcast=bool, strategy_ckpt_load_file=str,
                  strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool)
@@ -495,7 +495,7 @@ def _set_auto_parallel_context(**kwargs):
     Args:
         device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
         global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
-        mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False.
+        gradients_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False.
         loss_repeated_mean (bool): Whether to perform mean operator in backward in the case of repeated
                         calculations. Default: True.
         gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True.
@@ -562,7 +562,7 @@ def _reset_auto_parallel_context():
 
     - device_num: 1.
     - global_rank: 0.
-    - mirror_mean: False.
+    - gradients_mean: False.
     - gradient_fp32_sync: True.
     - parallel_mode: "stand_alone".
     - parameter_broadcast: False.
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index ff1fbcc6c2b..1c93ae20029 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -88,9 +88,9 @@ def _to_full_tensor(elem, device_num, global_rank, scaling_sens=None):
         lst.append(Tensor(scaling_sens, mstype.float32))
     return tuple(lst)
 
-def _get_mirror_mean():
-    """Get if using mirror_mean."""
-    return auto_parallel_context().get_mirror_mean()
+def _get_gradients_mean():
+    """Get if using gradients_mean."""
+    return auto_parallel_context().get_gradients_mean()
 
 
 def _get_device_num():
diff --git a/model_zoo/official/cv/deeplabv3/train.py b/model_zoo/official/cv/deeplabv3/train.py
index 1e501105c02..da84215fd96 100644
--- a/model_zoo/official/cv/deeplabv3/train.py
+++ b/model_zoo/official/cv/deeplabv3/train.py
@@ -66,7 +66,7 @@ def model_fine_tune(flags, train_net, fix_weight_layer):
             para.requires_grad = False
 if __name__ == "__main__":
     if args_opt.distribute == "true":
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True)
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
         init()
     args_opt.base_size = config.crop_size
     args_opt.crop_size = config.crop_size
diff --git a/model_zoo/official/cv/faster_rcnn/train.py b/model_zoo/official/cv/faster_rcnn/train.py
index 2add1913c5a..53238a0dd75 100644
--- a/model_zoo/official/cv/faster_rcnn/train.py
+++ b/model_zoo/official/cv/faster_rcnn/train.py
@@ -54,7 +54,7 @@ if __name__ == '__main__':
         rank = args_opt.rank_id
         device_num = args_opt.device_num
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True, parameter_broadcast=True)
+                                          gradients_mean=True, parameter_broadcast=True)
         init()
     else:
         rank = 0
diff --git a/model_zoo/official/cv/googlenet/train.py b/model_zoo/official/cv/googlenet/train.py
index 3e0fae74118..53de6ef9c41 100644
--- a/model_zoo/official/cv/googlenet/train.py
+++ b/model_zoo/official/cv/googlenet/train.py
@@ -78,7 +78,7 @@ if __name__ == '__main__':
         if device_num > 1:
             context.reset_auto_parallel_context()
             context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             init()
     elif device_target == "GPU":
         init()
@@ -86,7 +86,7 @@ if __name__ == '__main__':
         if device_num > 1:
             context.reset_auto_parallel_context()
             context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
     else:
         raise ValueError("Unsupported platform.")
 
diff --git a/model_zoo/official/cv/inceptionv3/train.py b/model_zoo/official/cv/inceptionv3/train.py
index b3af5beefbd..4efc558ad35 100644
--- a/model_zoo/official/cv/inceptionv3/train.py
+++ b/model_zoo/official/cv/inceptionv3/train.py
@@ -58,7 +58,7 @@ if __name__ == '__main__':
         cfg.group_size = get_group_size()
         parallel_mode = ParallelMode.DATA_PARALLEL
         context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         cfg.rank = 0
         cfg.group_size = 1
diff --git a/model_zoo/official/cv/maskrcnn/train.py b/model_zoo/official/cv/maskrcnn/train.py
index c9a40303633..0081cec6d3c 100644
--- a/model_zoo/official/cv/maskrcnn/train.py
+++ b/model_zoo/official/cv/maskrcnn/train.py
@@ -58,7 +58,7 @@ if __name__ == '__main__':
         rank = args_opt.rank_id
         device_num = args_opt.device_num
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True, parameter_broadcast=True)
+                                          gradients_mean=True, parameter_broadcast=True)
         init()
     else:
         rank = 0
diff --git a/model_zoo/official/cv/mobilenetv2/src/utils.py b/model_zoo/official/cv/mobilenetv2/src/utils.py
index d0a09afb8b7..5a05f397a4a 100644
--- a/model_zoo/official/cv/mobilenetv2/src/utils.py
+++ b/model_zoo/official/cv/mobilenetv2/src/utils.py
@@ -39,7 +39,7 @@ def context_device_init(config):
         init("nccl")
         context.set_auto_parallel_context(device_num=get_group_size(),
                                           parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True)
+                                          gradients_mean=True)
 
     elif config.platform == "Ascend":
         context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, device_id=config.device_id,
@@ -47,7 +47,7 @@ def context_device_init(config):
         if config.run_distribute:
             context.set_auto_parallel_context(device_num=config.rank_size,
                                               parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              parameter_broadcast=True, mirror_mean=True)
+                                              parameter_broadcast=True, gradients_mean=True)
             auto_parallel_context().set_all_reduce_fusion_split_indices([140])
             init()
     else:
diff --git a/model_zoo/official/cv/mobilenetv2_quant/train.py b/model_zoo/official/cv/mobilenetv2_quant/train.py
index 40d9b83ae23..30455bbdd56 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/train.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/train.py
@@ -57,7 +57,7 @@ elif args_opt.device_target == "GPU":
     init()
     context.set_auto_parallel_context(device_num=get_group_size(),
                                       parallel_mode=ParallelMode.DATA_PARALLEL,
-                                      mirror_mean=True)
+                                      gradients_mean=True)
     context.set_context(mode=context.GRAPH_MODE,
                         device_target="GPU",
                         save_graphs=False)
@@ -77,7 +77,7 @@ def train_on_ascend():
         context.set_auto_parallel_context(device_num=rank_size,
                                           parallel_mode=ParallelMode.DATA_PARALLEL,
                                           parameter_broadcast=True,
-                                          mirror_mean=True)
+                                          gradients_mean=True)
         init()
 
     # define network
diff --git a/model_zoo/official/cv/mobilenetv3/train.py b/model_zoo/official/cv/mobilenetv3/train.py
index 9f05fe4fde8..9ffa0b836f0 100644
--- a/model_zoo/official/cv/mobilenetv3/train.py
+++ b/model_zoo/official/cv/mobilenetv3/train.py
@@ -55,7 +55,7 @@ if args_opt.device_target == "GPU":
     init()
     context.set_auto_parallel_context(device_num=get_group_size(),
                                       parallel_mode=ParallelMode.DATA_PARALLEL,
-                                      mirror_mean=True)
+                                      gradients_mean=True)
 else:
     raise ValueError("Unsupported device_target.")
 
diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
index fbf13ec9909..2f95a0dc764 100755
--- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
+++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
@@ -24,7 +24,7 @@ import mindspore.ops.composite as C
 import mindspore.common.dtype as mstype
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.train.parallel_utils import ParallelMode
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 
 
 GRADIENT_CLIP_TYPE = 1
@@ -921,7 +921,7 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell):
         if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/model_zoo/official/cv/nasnet/train.py b/model_zoo/official/cv/nasnet/train.py
index 290dc892e4d..46143c35ec5 100755
--- a/model_zoo/official/cv/nasnet/train.py
+++ b/model_zoo/official/cv/nasnet/train.py
@@ -58,7 +58,7 @@ if __name__ == '__main__':
         cfg.group_size = get_group_size()
         parallel_mode = ParallelMode.DATA_PARALLEL
         context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         cfg.rank = 0
         cfg.group_size = 1
diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py
index 6d65cce48e8..a45e26bd834 100755
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@@ -76,7 +76,7 @@ if __name__ == '__main__':
             device_id = int(os.getenv('DEVICE_ID'))
             context.set_context(device_id=device_id, enable_auto_mixed_precision=True)
             context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             if args_opt.net == "resnet50" or args_opt.net == "se-resnet50":
                 auto_parallel_context().set_all_reduce_fusion_split_indices([85, 160])
             else:
@@ -86,7 +86,7 @@ if __name__ == '__main__':
         else:
             init()
             context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             if args_opt.net == "resnet50":
                 auto_parallel_context().set_all_reduce_fusion_split_indices([85, 160])
             ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
diff --git a/model_zoo/official/cv/resnet50_quant/train.py b/model_zoo/official/cv/resnet50_quant/train.py
index 927fe98c777..f4a3965c838 100755
--- a/model_zoo/official/cv/resnet50_quant/train.py
+++ b/model_zoo/official/cv/resnet50_quant/train.py
@@ -76,11 +76,11 @@ if __name__ == '__main__':
         context.set_auto_parallel_context(device_num=rank_size,
                                           parallel_mode=ParallelMode.DATA_PARALLEL,
                                           parameter_broadcast=True,
-                                          mirror_mean=True)
+                                          gradients_mean=True)
         init()
         context.set_auto_parallel_context(device_num=args_opt.device_num,
                                           parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True)
+                                          gradients_mean=True)
         auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
 
     # define network
diff --git a/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py b/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
index 35cbaa1460b..dbc7b3796a6 100644
--- a/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
+++ b/model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
@@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell):
         >>>                                            ParallelMode.HYBRID_PARALLEL]:
         >>>             self.reducer_flag = True
         >>>         if self.reducer_flag:
-        >>>             mean = context.get_auto_parallel_context("mirror_mean")
+        >>>             mean = context.get_auto_parallel_context("gradients_mean")
         >>>             if mean.get_device_num_is_set():
         >>>                 degree = context.get_auto_parallel_context("device_num")
         >>>             else:
diff --git a/model_zoo/official/cv/resnet_thor/src/thor.py b/model_zoo/official/cv/resnet_thor/src/thor.py
index 7fc0d9b74ed..44b6930684b 100644
--- a/model_zoo/official/cv/resnet_thor/src/thor.py
+++ b/model_zoo/official/cv/resnet_thor/src/thor.py
@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype
 from mindspore._checkparam import check_bool
 from mindspore._checkparam import Validator as validator
 from mindspore.nn.optim.optimizer import Optimizer
-from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_gradients_mean
 from src.grad_reducer_thor import DistributedGradReducerThor
 
 _momentum_opt = C.MultitypeFuncGraph("momentum_opt")
@@ -85,7 +85,7 @@ class THOR_GPU(Optimizer):
         self.assign = P.Assign()
         self.mul = P.Mul()
 
-        mean = _get_mirror_mean()
+        mean = _get_gradients_mean()
         degree = _get_device_num()
         self.grad_reducer_thorA = DistributedGradReducerThor(self.parameters, 0, mean, degree)
         self.grad_reducer_thorG = DistributedGradReducerThor(self.parameters, 0, mean, degree)
@@ -191,7 +191,7 @@ class THOR(Optimizer):
                             1.0 / 196, 1.0 / 196, 1.0 / 196,
                             1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49,
                             1.0]
-        mean = _get_mirror_mean()
+        mean = _get_gradients_mean()
         degree = _get_device_num()
         self.grad_reducer_Amax = DistributedGradReducerThor(self.parameters, 2, mean, degree)
         self.grad_reducer_Gmax = DistributedGradReducerThor(self.parameters, 5, mean, degree)
diff --git a/model_zoo/official/cv/resnet_thor/train.py b/model_zoo/official/cv/resnet_thor/train.py
index 1b651086075..5d8ce2f38f8 100644
--- a/model_zoo/official/cv/resnet_thor/train.py
+++ b/model_zoo/official/cv/resnet_thor/train.py
@@ -94,7 +94,7 @@ if __name__ == '__main__':
             device_id = int(os.getenv('DEVICE_ID'))
             context.set_context(device_id=device_id, enable_auto_mixed_precision=True)
             context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             auto_parallel_context().set_all_reduce_fusion_split_indices([107], "hccl_world_groupsum1")
             auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum2")
             auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum3")
@@ -105,7 +105,7 @@ if __name__ == '__main__':
         else:
             init()
             context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             auto_parallel_context().set_all_reduce_fusion_split_indices([107])
             ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
 
diff --git a/model_zoo/official/cv/resnext50/eval.py b/model_zoo/official/cv/resnext50/eval.py
index 06b4acfe05a..88e7ce8e4bc 100644
--- a/model_zoo/official/cv/resnext50/eval.py
+++ b/model_zoo/official/cv/resnext50/eval.py
@@ -117,7 +117,7 @@ def test(cloud_args=None):
         args.group_size = get_group_size()
         parallel_mode = ParallelMode.DATA_PARALLEL
         context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         args.rank = 0
         args.group_size = 1
diff --git a/model_zoo/official/cv/resnext50/train.py b/model_zoo/official/cv/resnext50/train.py
index 8e7d4b0220e..d3a3873f3a2 100644
--- a/model_zoo/official/cv/resnext50/train.py
+++ b/model_zoo/official/cv/resnext50/train.py
@@ -179,7 +179,7 @@ def train(cloud_args=None):
         args.group_size = get_group_size()
         parallel_mode = ParallelMode.DATA_PARALLEL
         context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         args.rank = 0
         args.group_size = 1
diff --git a/model_zoo/official/cv/shufflenetv2/train.py b/model_zoo/official/cv/shufflenetv2/train.py
index ed70f9186e4..066b225d9f2 100644
--- a/model_zoo/official/cv/shufflenetv2/train.py
+++ b/model_zoo/official/cv/shufflenetv2/train.py
@@ -60,7 +60,7 @@ if __name__ == '__main__':
         cfg.group_size = get_group_size()
         parallel_mode = ParallelMode.DATA_PARALLEL
         context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=cfg.group_size,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         cfg.rank = 0
         cfg.group_size = 1
diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py
index 8fbca66c5c6..631aa44470a 100644
--- a/model_zoo/official/cv/ssd/src/ssd.py
+++ b/model_zoo/official/cv/ssd/src/ssd.py
@@ -392,7 +392,7 @@ class TrainingWrapper(nn.Cell):
         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             if auto_parallel_context().get_device_num_is_set():
                 degree = context.get_auto_parallel_context("device_num")
             else:
diff --git a/model_zoo/official/cv/ssd/train.py b/model_zoo/official/cv/ssd/train.py
index 34b1e3ca2b6..08aeaec8aa3 100644
--- a/model_zoo/official/cv/ssd/train.py
+++ b/model_zoo/official/cv/ssd/train.py
@@ -60,7 +60,7 @@ def main():
     if args_opt.distribute:
         device_num = args_opt.device_num
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           device_num=device_num)
         init()
         rank = args_opt.device_id % device_num
diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py
index 832664eb0c1..1eeed9a0388 100644
--- a/model_zoo/official/cv/vgg16/train.py
+++ b/model_zoo/official/cv/vgg16/train.py
@@ -140,7 +140,7 @@ if __name__ == '__main__':
         device_num = args.group_size
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          parameter_broadcast=True, mirror_mean=True)
+                                          parameter_broadcast=True, gradients_mean=True)
     else:
         context.set_context(device_id=args.device_id)
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
index 1640342fad6..d58a72b4d3f 100755
--- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
+++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
@@ -14,7 +14,7 @@
 # ============================================================================
 """Automatic differentiation with grad clip."""
 import numpy as np
-from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean,
+from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean,
                                        _get_parallel_mode)
 from mindspore.context import ParallelMode
 from mindspore.common import dtype as mstype
@@ -93,7 +93,7 @@ class TrainOneStepCellWithGradClip(Cell):
         if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/model_zoo/official/cv/warpctc/train.py b/model_zoo/official/cv/warpctc/train.py
index db3775a04b7..bd4e64a7257 100755
--- a/model_zoo/official/cv/warpctc/train.py
+++ b/model_zoo/official/cv/warpctc/train.py
@@ -64,7 +64,7 @@ if __name__ == '__main__':
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(device_num=device_num,
                                           parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True)
+                                          gradients_mean=True)
     else:
         device_num = 1
         rank = 0
diff --git a/model_zoo/official/cv/yolov3_darknet53/eval.py b/model_zoo/official/cv/yolov3_darknet53/eval.py
index 7352db7a5d8..f9d74ba1c1d 100644
--- a/model_zoo/official/cv/yolov3_darknet53/eval.py
+++ b/model_zoo/official/cv/yolov3_darknet53/eval.py
@@ -255,7 +255,7 @@ def test():
 
     context.reset_auto_parallel_context()
     parallel_mode = ParallelMode.STAND_ALONE
-    context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=1)
+    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1)
 
     args.logger.info('Creating Network....')
     network = YOLOV3DarkNet53(is_training=False)
diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
index eae0d9f028c..5a191b9b343 100644
--- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
@@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell):
         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             if auto_parallel_context().get_device_num_is_set():
                 degree = context.get_auto_parallel_context("device_num")
             else:
diff --git a/model_zoo/official/cv/yolov3_darknet53/train.py b/model_zoo/official/cv/yolov3_darknet53/train.py
index a17b32d66c5..f3449908ea0 100644
--- a/model_zoo/official/cv/yolov3_darknet53/train.py
+++ b/model_zoo/official/cv/yolov3_darknet53/train.py
@@ -178,7 +178,7 @@ def train():
     else:
         parallel_mode = ParallelMode.STAND_ALONE
         degree = 1
-    context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree)
+    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)
 
     network = YOLOV3DarkNet53(is_training=True)
     # default is kaiming-normal
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py
index 58b0067b562..a7a4297dbc2 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py
@@ -254,7 +254,7 @@ def test():
 
     context.reset_auto_parallel_context()
     parallel_mode = ParallelMode.STAND_ALONE
-    context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=1)
+    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1)
 
     args.logger.info('Creating Network....')
     network = YOLOV3DarkNet53(is_training=False)
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
index 81b00303df0..755179cac1c 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
@@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell):
         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             if auto_parallel_context().get_device_num_is_set():
                 degree = context.get_auto_parallel_context("device_num")
             else:
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/train.py b/model_zoo/official/cv/yolov3_darknet53_quant/train.py
index 975ea993d6c..c0a0ba5b305 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/train.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/train.py
@@ -162,7 +162,7 @@ def train():
     else:
         parallel_mode = ParallelMode.STAND_ALONE
         degree = 1
-    context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree)
+    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)
 
     network = YOLOV3DarkNet53(is_training=True)
     # default is kaiming-normal
diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
index 2fe4c8f07c6..1fecaba83e2 100644
--- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
+++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
@@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell):
         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             if auto_parallel_context().get_device_num_is_set():
                 degree = context.get_auto_parallel_context("device_num")
             else:
diff --git a/model_zoo/official/cv/yolov3_resnet18/train.py b/model_zoo/official/cv/yolov3_resnet18/train.py
index 8830df1af33..19ba0c20d7d 100644
--- a/model_zoo/official/cv/yolov3_resnet18/train.py
+++ b/model_zoo/official/cv/yolov3_resnet18/train.py
@@ -92,7 +92,7 @@ def main():
     if args_opt.distribute:
         device_num = args_opt.device_num
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           device_num=device_num)
         init()
         rank = args_opt.device_id % device_num
diff --git a/model_zoo/official/nlp/bert/run_pretrain.py b/model_zoo/official/nlp/bert/run_pretrain.py
index b69ee52463f..6836da1d735 100644
--- a/model_zoo/official/nlp/bert/run_pretrain.py
+++ b/model_zoo/official/nlp/bert/run_pretrain.py
@@ -85,7 +85,7 @@ def run_pretrain():
             ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(rank) + '/'
 
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           device_num=device_num)
         from mindspore.parallel._auto_parallel_context import auto_parallel_context
         if bert_net_cfg.num_hidden_layers == 12:
diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
index 886cc15398d..1ad52ea07d9 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
@@ -66,7 +66,7 @@ class BertFinetuneCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
@@ -167,7 +167,7 @@ class BertSquadCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
index e257bedcf7c..616b4990208 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
@@ -283,7 +283,7 @@ class BertTrainOneStepCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/model_zoo/official/nlp/bert_thor/run_pretrain.py b/model_zoo/official/nlp/bert_thor/run_pretrain.py
index e9e04b67a88..42091cdafd4 100644
--- a/model_zoo/official/nlp/bert_thor/run_pretrain.py
+++ b/model_zoo/official/nlp/bert_thor/run_pretrain.py
@@ -87,7 +87,7 @@ def run_pretrain():
             ckpt_save_dir = args_opt.save_checkpoint_path + 'ckpt_' + str(rank) + '/'
 
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           device_num=device_num)
         from mindspore.parallel._auto_parallel_context import auto_parallel_context
         if bert_net_cfg.num_hidden_layers == 12:
diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
index 81271a69920..98b1023f7cb 100644
--- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
@@ -301,7 +301,7 @@ class BertTrainOneStepCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py b/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
index cd0cc348192..dab75c99af1 100644
--- a/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
+++ b/model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
@@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell):
         >>>                                            ParallelMode.HYBRID_PARALLEL]:
         >>>             self.reducer_flag = True
         >>>         if self.reducer_flag:
-        >>>             mean = context.get_auto_parallel_context("mirror_mean")
+        >>>             mean = context.get_auto_parallel_context("gradients_mean")
         >>>             if mean.get_device_num_is_set():
         >>>                 degree = context.get_auto_parallel_context("device_num")
         >>>             else:
diff --git a/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py b/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
index aeb3cf309fe..0cc7e33276a 100644
--- a/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
+++ b/model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
@@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.common.tensor import Tensor
 from mindspore.nn.optim.optimizer import Optimizer
 from mindspore.ops import functional as F, composite as C, operations as P
-from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_gradients_mean
 from .grad_reducer_thor import DistributedGradReducerThor
 
 momentum_opt = C.MultitypeFuncGraph("momentum_opt")
@@ -83,7 +83,7 @@ class THOR(Optimizer):
         self.damping = damping
         self.one = Tensor(1, mstype.int32)
         self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
-        mean = _get_mirror_mean()
+        mean = _get_gradients_mean()
         degree = _get_device_num()
         self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree)
 
diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
index fd5974e162c..fee7aac2b34 100644
--- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
+++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter
 from mindspore.common import dtype as mstype
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.context import ParallelMode
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 
 from .transformer import Transformer
 from .grad_clip import GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE, ClipGradients
@@ -251,7 +251,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
diff --git a/model_zoo/official/nlp/mass/train.py b/model_zoo/official/nlp/mass/train.py
index 408ece34cbc..80ed331b54f 100644
--- a/model_zoo/official/nlp/mass/train.py
+++ b/model_zoo/official/nlp/mass/train.py
@@ -234,7 +234,7 @@ def _setup_parallel_env(platform):
         parallel_mode=ParallelMode.DATA_PARALLEL,
         device_num=MultiAscend.get_group_size(),
         parameter_broadcast=True,
-        mirror_mean=True
+        gradients_mean=True
     )
 
 
diff --git a/model_zoo/official/nlp/tinybert/run_general_distill.py b/model_zoo/official/nlp/tinybert/run_general_distill.py
index 7257c5883e3..cf7d876ae81 100644
--- a/model_zoo/official/nlp/tinybert/run_general_distill.py
+++ b/model_zoo/official/nlp/tinybert/run_general_distill.py
@@ -81,7 +81,7 @@ def run_general_distill():
             rank = D.get_rank()
             save_ckpt_dir = save_ckpt_dir + '_ckpt_' + str(rank)
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           device_num=device_num)
     else:
         rank = 0
diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
index 4e595ec4e66..b4b927c1f9e 100644
--- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
+++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
@@ -318,7 +318,7 @@ class BertTrainCell(nn.Cell):
         self.grad_reducer = F.identity
         self.degree = 1
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             self.degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, self.degree)
         self.cast = P.Cast()
@@ -568,7 +568,7 @@ class BertEvaluationCell(nn.Cell):
         self.grad_reducer = F.identity
         self.degree = 1
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             self.degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, self.degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
index f26396d1f1b..a58a3fcce7b 100644
--- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py
+++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common import dtype as mstype
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.context import ParallelMode
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 from mindspore.communication.management import get_group_size
 from mindspore import context
 from .transformer_model import TransformerModel
@@ -168,7 +168,7 @@ class TransformerTrainOneStepCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
@@ -256,7 +256,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
diff --git a/model_zoo/official/nlp/transformer/train.py b/model_zoo/official/nlp/transformer/train.py
index 45207e19ee0..a1d040fad4c 100644
--- a/model_zoo/official/nlp/transformer/train.py
+++ b/model_zoo/official/nlp/transformer/train.py
@@ -118,7 +118,7 @@ def run_transformer_train():
     if args.distribute == "true":
         device_num = args.device_num
         context.reset_auto_parallel_context()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                           parameter_broadcast=True, device_num=device_num)
         D.init()
         rank_id = args.device_id % device_num
diff --git a/model_zoo/official/recommend/deepfm/train.py b/model_zoo/official/recommend/deepfm/train.py
index db660737ea1..f3299a42d69 100644
--- a/model_zoo/official/recommend/deepfm/train.py
+++ b/model_zoo/official/recommend/deepfm/train.py
@@ -56,7 +56,7 @@ if __name__ == '__main__':
             device_id = int(os.getenv('DEVICE_ID'))
             context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=device_id)
             context.reset_auto_parallel_context()
-            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True)
+            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
             init()
             rank_id = int(os.environ.get('RANK_ID'))
         elif args_opt.device_target == "GPU":
@@ -65,7 +65,7 @@ if __name__ == '__main__':
             context.reset_auto_parallel_context()
             context.set_auto_parallel_context(device_num=get_group_size(),
                                               parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
+                                              gradients_mean=True)
             rank_id = get_rank()
         else:
             print("Unsupported device_target ", args_opt.device_target)
diff --git a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
index 6a03d6feff1..e579b26e1c8 100644
--- a/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
+++ b/model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
@@ -367,7 +367,7 @@ class TrainStepWrap(nn.Cell):
         self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL,
                                               ParallelMode.HYBRID_PARALLEL)
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = context.get_auto_parallel_context("device_num")
             self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree)
             self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree)
diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py
index f20015c8079..24385da6b31 100644
--- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py
+++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py
@@ -147,8 +147,8 @@ if __name__ == "__main__":
     init()
     if wide_deep_config.host_device_mix == 1:
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, mirror_mean=True)
+            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=True)
     else:
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True)
+            parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True)
     train_and_eval(wide_deep_config)
diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py
index a460ec42ff2..7aed065b9ec 100644
--- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py
+++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py
@@ -119,7 +119,7 @@ if __name__ == "__main__":
 
     context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target, save_graphs=True)
     init()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                       device_num=get_group_size())
 
     train_and_eval(wide_deep_config)
diff --git a/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py b/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py
index b4f4e3c59e6..232488ceffd 100644
--- a/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py
+++ b/model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py
@@ -119,7 +119,7 @@ if __name__ == "__main__":
 
     context.set_context(mode=context.GRAPH_MODE, device_target=wide_deep_config.device_target)
     init()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                       device_num=get_group_size())
 
     train_and_eval(wide_deep_config)
diff --git a/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py b/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
index 7d9b566a885..246fa2a82aa 100644
--- a/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
+++ b/model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
@@ -554,7 +554,7 @@ class TrainStepWrap(nn.Cell):
                              ParallelMode.HYBRID_PARALLEL):
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = context.get_auto_parallel_context("device_num")
             self.grad_reducer_w = DistributedGradReducer(
                 self.optimizer_w.parameters, mean, degree)
diff --git a/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py b/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py
index 4383a930715..0f278d16680 100644
--- a/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py
+++ b/model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py
@@ -113,6 +113,6 @@ if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target="Davinci",
                         save_graphs=True)
     init()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
                                       device_num=get_group_size())
     train_and_eval(wide_and_deep_config)
diff --git a/tests/st/auto_parallel/resnet50_expand_loss.py b/tests/st/auto_parallel/resnet50_expand_loss.py
index fc6d00ff612..de78b907288 100644
--- a/tests/st/auto_parallel/resnet50_expand_loss.py
+++ b/tests/st/auto_parallel/resnet50_expand_loss.py
@@ -34,7 +34,7 @@ from mindspore.context import ParallelMode
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 context.set_context(device_id=int(os.getenv('DEVICE_ID')))
 init()
-context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL)
+context.set_auto_parallel_context(gradients_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL)
 np.random.seed(10)
 
 
diff --git a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
index 857b9579c3f..1f46eed9f1a 100644
--- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
+++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
@@ -31,7 +31,7 @@ from src.config import WideDeepConfig
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
-context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, mirror_mean=True)
+context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=True)
 init()
 
 
diff --git a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
index 6944b46b2d5..3043055f2a5 100644
--- a/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
+++ b/tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
@@ -24,7 +24,7 @@ from mindspore.nn.optim import Adam, FTRL
 # from mindspore.nn.metrics import Metric
 from mindspore.common.initializer import Uniform, initializer
 # from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
-from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 from mindspore.context import ParallelMode
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.communication.management import get_group_size
@@ -299,7 +299,7 @@ class TrainStepWrap(nn.Cell):
         self.reducer_flag = parallel_mode in (ParallelMode.DATA_PARALLEL,
                                               ParallelMode.HYBRID_PARALLEL)
         if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
             degree = _get_device_num()
             self.grad_reducer_w = DistributedGradReducer(self.optimizer_w.parameters, mean, degree)
             self.grad_reducer_d = DistributedGradReducer(self.optimizer_d.parameters, mean, degree)
diff --git a/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py b/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
index b76f9d28ef2..77d301f556d 100644
--- a/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
+++ b/tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
@@ -30,7 +30,7 @@ from src.config import WideDeepConfig
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
-context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True)
+context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
 init()
 
 
diff --git a/tests/st/model_zoo_tests/yolov3/src/yolov3.py b/tests/st/model_zoo_tests/yolov3/src/yolov3.py
index 2fe4c8f07c6..1fecaba83e2 100644
--- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py
+++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py
@@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell):
         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
             self.reducer_flag = True
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             if auto_parallel_context().get_device_num_is_set():
                 degree = context.get_auto_parallel_context("device_num")
             else:
diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py
index d4c08b9e2ed..0869e459c8c 100644
--- a/tests/st/nccl/test_nccl_lenet.py
+++ b/tests/st/nccl/test_nccl_lenet.py
@@ -78,7 +78,7 @@ def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32):
 
 
 def test_lenet_nccl():
-    context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
+    context.set_auto_parallel_context(parallel_mode="data_parallel", gradients_mean=True, device_num=get_group_size())
     net = LeNet()
     net.set_train()
 
diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py
index 0a0675a8051..cd60334f746 100644
--- a/tests/st/networks/models/bert/src/bert_for_pre_training.py
+++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py
@@ -279,7 +279,7 @@ class BertTrainOneStepCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
 
diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py
index bcea50dc3b4..9adda84731d 100644
--- a/tests/st/networks/models/bert/src/utils.py
+++ b/tests/st/networks/models/bert/src/utils.py
@@ -61,7 +61,7 @@ class BertFinetuneCell(nn.Cell):
             self.reducer_flag = True
         self.grad_reducer = None
         if self.reducer_flag:
-            mean = context.get_auto_parallel_context("mirror_mean")
+            mean = context.get_auto_parallel_context("gradients_mean")
             degree = get_group_size()
             self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
         self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
diff --git a/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py b/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
index 97d2cb22702..02c37b1127d 100644
--- a/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
@@ -130,7 +130,7 @@ class DistributedGradReducerThor(Cell):
         >>>                                            ParallelMode.HYBRID_PARALLEL]:
         >>>             self.reducer_flag = True
         >>>         if self.reducer_flag:
-        >>>             mean = context.get_auto_parallel_context("mirror_mean")
+        >>>             mean = context.get_auto_parallel_context("gradients_mean")
         >>>             if mean.get_device_num_is_set():
         >>>                 degree = context.get_auto_parallel_context("device_num")
         >>>             else:
diff --git a/tests/st/networks/models/resnet50/src_thor/thor.py b/tests/st/networks/models/resnet50/src_thor/thor.py
index d4469a58271..b5b1faa1d50 100644
--- a/tests/st/networks/models/resnet50/src_thor/thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/thor.py
@@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.common.tensor import Tensor
 from mindspore.nn.optim.optimizer import Optimizer
 from mindspore.ops import functional as F, composite as C, operations as P
-from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
+from mindspore.parallel._utils import _get_device_num, _get_gradients_mean
 
 from .grad_reducer_thor import DistributedGradReducerThor
 
@@ -87,7 +87,7 @@ class THOR(Optimizer):
                             1.0 / 196, 1.0 / 196, 1.0 / 196,
                             1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49,
                             1.0]
-        mean = _get_mirror_mean()
+        mean = _get_gradients_mean()
         degree = _get_device_num()
         self.grad_reducer_Amax = DistributedGradReducerThor(self.parameters, 2, mean, degree)
         self.grad_reducer_Gmax = DistributedGradReducerThor(self.parameters, 5, mean, degree)
diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
index 7be7ef89a0c..28ed8b54893 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
@@ -137,7 +137,7 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl):
     os.environ['RANK_SIZE'] = str(device_num)
     if enable_hccl:
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True, parameter_broadcast=True)
+                                          gradients_mean=True, parameter_broadcast=True)
         auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
         init()
 
@@ -240,7 +240,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
     os.environ['RANK_SIZE'] = str(device_num)
     if enable_hccl:
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          mirror_mean=True, parameter_broadcast=True)
+                                          gradients_mean=True, parameter_broadcast=True)
         auto_parallel_context().set_all_reduce_fusion_split_indices([107], "hccl_world_groupsum1")
         auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum2")
         auto_parallel_context().set_all_reduce_fusion_split_indices([27], "hccl_world_groupsum3")
diff --git a/tests/st/ps/multi_full_ps/test_multi_full_ps.py b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
index e33212ce983..99e8bb2322a 100644
--- a/tests/st/ps/multi_full_ps/test_multi_full_ps.py
+++ b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
@@ -97,7 +97,8 @@ if __name__ == "__main__":
     criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     if device_target == "GPU":
-        context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
+        context.set_auto_parallel_context(parallel_mode="data_parallel", gradients_mean=True,
+                                          device_num=get_group_size())
     net_with_criterion = WithLossCell(network, criterion)
     train_network = TrainOneStepCell(net_with_criterion, net_opt)
     train_network.set_train()
diff --git a/tests/ut/python/communication/test_data_parallel_dense.py b/tests/ut/python/communication/test_data_parallel_dense.py
index d2fdf2d1589..c80f4c5a2f0 100644
--- a/tests/ut/python/communication/test_data_parallel_dense.py
+++ b/tests/ut/python/communication/test_data_parallel_dense.py
@@ -58,7 +58,7 @@ def test_data_parallel_dense():
     """test_data_parallel_dense"""
     context.set_context(mode=context.GRAPH_MODE)
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8)
     inp = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([32, 768]).astype(np.float32))
     net = DenseMMNet()
diff --git a/tests/ut/python/communication/test_data_parallel_lenet.py b/tests/ut/python/communication/test_data_parallel_lenet.py
index 0897023a69e..2908f89b8f4 100755
--- a/tests/ut/python/communication/test_data_parallel_lenet.py
+++ b/tests/ut/python/communication/test_data_parallel_lenet.py
@@ -80,7 +80,7 @@ def test_lenet5_train_step_training_pynative():
     context.set_context(mode=context.PYNATIVE_MODE)
     context.reset_auto_parallel_context()
     context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
-                                      device_num=8, mirror_mean=True)
+                                      device_num=8, gradients_mean=True)
     predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([1, 10]).astype(np.float32))
     DatasetLenet(predict, label, 2)
diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py
index cfed2beb27e..d311f0b40bb 100644
--- a/tests/ut/python/model/test_mix_precision.py
+++ b/tests/ut/python/model/test_mix_precision.py
@@ -97,7 +97,7 @@ def test_on_momentum():
 def test_data_parallel_with_cast():
     """test_data_parallel_with_cast"""
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=8)
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8)
     predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
     label = Tensor(np.zeros([1, 10]).astype(np.float32))
     net = LeNet5()
diff --git a/tests/ut/python/parallel/test_optimizer.py b/tests/ut/python/parallel/test_optimizer.py
index 0d6de9526a1..112069f5f19 100644
--- a/tests/ut/python/parallel/test_optimizer.py
+++ b/tests/ut/python/parallel/test_optimizer.py
@@ -46,7 +46,7 @@ class Net(nn.Cell):
 def test_dense_gen_graph():
     context.set_context(mode=context.GRAPH_MODE)
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, mirror_mean=True, device_num=8)
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.HYBRID_PARALLEL, gradients_mean=True, device_num=8)
     init()
     network = Net(512, 128)
 
diff --git a/tests/ut/python/parallel/test_set_auto_parallel_context.py b/tests/ut/python/parallel/test_set_auto_parallel_context.py
index ff69b3bee84..56957847405 100644
--- a/tests/ut/python/parallel/test_set_auto_parallel_context.py
+++ b/tests/ut/python/parallel/test_set_auto_parallel_context.py
@@ -20,17 +20,17 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context
 
 
 def test_set_auto_parallel_context():
-    context.set_auto_parallel_context(device_num=4, global_rank=3, mirror_mean=True, gradient_fp32_sync=False,
+    context.set_auto_parallel_context(device_num=4, global_rank=3, gradients_mean=True, gradient_fp32_sync=False,
                                       parallel_mode="auto_parallel", parameter_broadcast=False)
     device_num = context.get_auto_parallel_context("device_num")
     global_rank = context.get_auto_parallel_context("global_rank")
-    mirror_mean = context.get_auto_parallel_context("mirror_mean")
+    gradients_mean = context.get_auto_parallel_context("gradients_mean")
     gradient_fp32_sync = context.get_auto_parallel_context("gradient_fp32_sync")
     parallel_mode = context.get_auto_parallel_context("parallel_mode")
     parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast")
     assert device_num == 4
     assert global_rank == 3
-    assert mirror_mean
+    assert gradients_mean
     assert not gradient_fp32_sync
     assert parallel_mode == "auto_parallel"
     assert not parameter_broadcast
@@ -45,9 +45,9 @@ def test_set_auto_parallel_context():
     global_rank = auto_parallel_context().get_global_rank()
     assert global_rank == 4
 
-    auto_parallel_context().set_mirror_mean(True)
-    mirror_mean = auto_parallel_context().get_mirror_mean()
-    assert mirror_mean
+    auto_parallel_context().set_gradients_mean(True)
+    gradients_mean = auto_parallel_context().get_gradients_mean()
+    assert gradients_mean
 
     auto_parallel_context().set_gradient_fp32_sync(False)
     gradient_fp32_sync = auto_parallel_context().get_gradient_fp32_sync()
@@ -86,7 +86,7 @@ def test_reset_auto_parallel_context():
     context.reset_auto_parallel_context()
     device_num = context.get_auto_parallel_context("device_num")
     global_rank = context.get_auto_parallel_context("global_rank")
-    mirror_mean = context.get_auto_parallel_context("mirror_mean")
+    gradients_mean = context.get_auto_parallel_context("gradients_mean")
     gradient_fp32_sync = context.get_auto_parallel_context("gradient_fp32_sync")
     parallel_mode = context.get_auto_parallel_context("parallel_mode")
     parameter_broadcast = context.get_auto_parallel_context("parameter_broadcast")
@@ -94,7 +94,7 @@ def test_reset_auto_parallel_context():
     parameter_broadcast_is_set = auto_parallel_context().get_parameter_broadcast_is_set()
     assert device_num == 1
     assert global_rank == 0
-    assert not mirror_mean
+    assert not gradients_mean
     assert gradient_fp32_sync
     assert parallel_mode == "stand_alone"
     assert not parameter_broadcast
diff --git a/tests/ut/python/parallel/test_two_matmul.py b/tests/ut/python/parallel/test_two_matmul.py
index 854df0ca71f..cf91af463d5 100644
--- a/tests/ut/python/parallel/test_two_matmul.py
+++ b/tests/ut/python/parallel/test_two_matmul.py
@@ -65,7 +65,7 @@ def test_two_matmul():
             out = self.matmul2(out, b)
             return out
 
-    context.set_auto_parallel_context(device_num=8, global_rank=0, mirror_mean=True)
+    context.set_auto_parallel_context(device_num=8, global_rank=0, gradients_mean=True)
     strategy1 = ((4, 2), (2, 1))
     strategy2 = ((2, 4), (4, 1))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
@@ -90,7 +90,7 @@ def test_two_matmul_repeated_calculation1():
             out = self.matmul2(out, b)
             return out
 
-    context.set_auto_parallel_context(device_num=64, global_rank=5, mirror_mean=True)
+    context.set_auto_parallel_context(device_num=64, global_rank=5, gradients_mean=True)
     strategy1 = ((2, 4), (4, 8))
     strategy2 = ((1, 1), (1, 1))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2)))
diff --git a/tests/ut/python/train/test_amp.py b/tests/ut/python/train/test_amp.py
index 074056f3267..03102025046 100644
--- a/tests/ut/python/train/test_amp.py
+++ b/tests/ut/python/train/test_amp.py
@@ -148,7 +148,7 @@ def test_compile_model_train_O2_parallel():
     dataset_shapes = ((16, 16), (16, 16))
     context.set_auto_parallel_context(
         global_rank=0, device_num=8,
-        mirror_mean=True, parameter_broadcast=True,
+        gradients_mean=True, parameter_broadcast=True,
         parallel_mode=ParallelMode.DATA_PARALLEL)
 
     dataset = MindDataSet(dataset_types, dataset_shapes)