From 650dad47fb0e01ec53d8f072b516b3730583b794 Mon Sep 17 00:00:00 2001
From: zhangyi <zhangyi267@huawei.com>
Date: Fri, 2 Sep 2022 16:46:41 +0800
Subject: [PATCH] modify the inconsistence in files 0902

---
 .../mindspore.amp.DynamicLossScaleManager.rst    |  5 ++---
 .../amp/mindspore.amp.DynamicLossScaler.rst      | 14 +++++++-------
 .../amp/mindspore.amp.FixedLossScaleManager.rst  |  2 +-
 .../amp/mindspore.amp.LossScaleManager.rst       |  2 +-
 .../api_python/amp/mindspore.amp.LossScaler.rst  | 14 +++++++-------
 .../amp/mindspore.amp.StaticLossScaler.rst       | 16 ++++++++--------
 .../dataset/mindspore.dataset.RandomDataset.rst  |  2 ++
 docs/api/api_python/nn/mindspore.nn.Adadelta.rst |  2 +-
 .../nn/mindspore.nn.AdaptiveAvgPool3d.rst        |  4 ++--
 .../nn/mindspore.nn.AdaptiveMaxPool3d.rst        |  2 +-
 .../nn/mindspore.nn.Conv2dTranspose.rst          |  2 +-
 docs/api/api_python/nn/mindspore.nn.DiceLoss.rst |  4 ++--
 .../mindspore.nn.DynamicLossScaleUpdateCell.rst  |  2 +-
 .../nn/mindspore.nn.FixedLossScaleUpdateCell.rst |  2 +-
 docs/api/api_python/nn/mindspore.nn.GELU.rst     |  5 ++---
 docs/api/api_python/nn/mindspore.nn.RMSProp.rst  |  4 ++--
 .../nn/mindspore.nn.SampledSoftmaxLoss.rst       |  4 ++--
 ...indspore.nn.SoftmaxCrossEntropyWithLogits.rst |  4 ++--
 ...indspore.nn.TrainOneStepWithLossScaleCell.rst |  2 +-
 ...mindspore.nn.transformer.CrossEntropyLoss.rst |  2 +-
 ...spore.nn.transformer.FixedSparseAttention.rst |  2 +-
 .../mindspore.nn.transformer.VocabEmbedding.rst  |  2 +-
 .../mindspore/dataset/vision/transforms.py       |  8 ++++----
 .../python/mindspore/nn/layer/activation.py      |  6 +++---
 mindspore/python/mindspore/nn/layer/math.py      |  4 ++--
 mindspore/python/mindspore/nn/layer/padding.py   |  2 +-
 mindspore/python/mindspore/nn/loss/loss.py       |  2 +-
 .../python/mindspore/nn/transformer/layers.py    |  2 +-
 .../mindspore/nn/transformer/transformer.py      |  4 ++--
 mindspore/python/mindspore/nn/wrap/loss_scale.py |  4 ++--
 .../python/mindspore/ops/function/array_func.py  |  2 +-
 .../python/mindspore/ops/function/debug_func.py  |  2 +-
 mindspore/python/mindspore/train/amp.py          |  1 +
 .../python/mindspore/train/loss_scale_manager.py |  8 ++++----
 34 files changed, 72 insertions(+), 71 deletions(-)

diff --git a/docs/api/api_python/amp/mindspore.amp.DynamicLossScaleManager.rst b/docs/api/api_python/amp/mindspore.amp.DynamicLossScaleManager.rst
index d0deb4322e9..982d7804b1a 100644
--- a/docs/api/api_python/amp/mindspore.amp.DynamicLossScaleManager.rst
+++ b/docs/api/api_python/amp/mindspore.amp.DynamicLossScaleManager.rst
@@ -26,10 +26,9 @@ mindspore.amp.DynamicLossScaleManager
 
     .. py:method:: get_update_cell()
 
-        返回用于更新梯度放大系数的 `Cell` 实例，:class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该实例。
+        返回用于更新梯度放大系数的 :class:`mindspore.nn.Cell` 实例，:class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该实例。
 
         返回：
-
             :class:`mindspore.nn.DynamicLossScaleUpdateCell` 实例，用于更新梯度放大系数。
 
     .. py:method:: update_loss_scale(overflow)
@@ -37,4 +36,4 @@ mindspore.amp.DynamicLossScaleManager
         根据溢出状态更新梯度放大系数。如果发生溢出，减小梯度放大系数，否则增大梯度放大系数。
 
         参数：
-            **overflow** (bool) - 表示是否溢出。
+            - **overflow** (bool) - 表示是否溢出。
diff --git a/docs/api/api_python/amp/mindspore.amp.DynamicLossScaler.rst b/docs/api/api_python/amp/mindspore.amp.DynamicLossScaler.rst
index b0dad366ec5..35e6c587ee3 100644
--- a/docs/api/api_python/amp/mindspore.amp.DynamicLossScaler.rst
+++ b/docs/api/api_python/amp/mindspore.amp.DynamicLossScaler.rst
@@ -15,6 +15,13 @@ mindspore.amp.DynamicLossScaler
         - **scale_factor** (int) - 放大/缩小倍数。
         - **scale_window** (int) - 无溢出时的连续正常step的最大数量。
 
+    .. py:method:: adjust(grads_finite)
+
+        根据梯度是否为有效值（无溢出）对 `scale_value` 进行调整。
+
+        参数：
+            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
+
     .. py:method:: scale(inputs)
 
         根据 `scale_value` 放大inputs。
@@ -28,10 +35,3 @@ mindspore.amp.DynamicLossScaler
 
         参数：
             - **inputs** (Union(Tensor, tuple(Tensor))) - 损失值或梯度。
-
-    .. py:method:: adjust(grads_finite)
-
-        根据梯度是否为有效值（无溢出）对 `scale_value` 进行调整。
-
-        参数：
-            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
diff --git a/docs/api/api_python/amp/mindspore.amp.FixedLossScaleManager.rst b/docs/api/api_python/amp/mindspore.amp.FixedLossScaleManager.rst
index 0e4241f0a1a..f7e9b9e4475 100644
--- a/docs/api/api_python/amp/mindspore.amp.FixedLossScaleManager.rst
+++ b/docs/api/api_python/amp/mindspore.amp.FixedLossScaleManager.rst
@@ -25,7 +25,7 @@ mindspore.amp.FixedLossScaleManager
 
     .. py:method:: get_update_cell()
 
-        返回用于更新 `loss_scale` 值的 `Cell` 实例， :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该实例。该类使用固定的梯度放大系数，因此该实例不执行任何操作。
+        返回用于更新 `loss_scale` 值的 :class:`mindspore.nn.Cell` 实例， :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该实例。该类使用固定的梯度放大系数，因此该实例不执行任何操作。
 
         返回：
             None或 `Cell` 。当 `drop_overflow_update` 为True时，返回 :class:`mindspore.nn.FixedLossScaleUpdateCell` 实例，当 `drop_overflow_update` 为False时，返回None。
diff --git a/docs/api/api_python/amp/mindspore.amp.LossScaleManager.rst b/docs/api/api_python/amp/mindspore.amp.LossScaleManager.rst
index 2dd780d5acc..125deef6f43 100644
--- a/docs/api/api_python/amp/mindspore.amp.LossScaleManager.rst
+++ b/docs/api/api_python/amp/mindspore.amp.LossScaleManager.rst
@@ -15,7 +15,7 @@ mindspore.amp.LossScaleManager
 
     .. py:method:: get_update_cell()
 
-        获取用于更新梯度放大系数的Cell实例。
+        获取用于更新梯度放大系数的 :class:`mindspore.nn.Cell` 实例。
 
     .. py:method:: update_loss_scale(overflow)
 
diff --git a/docs/api/api_python/amp/mindspore.amp.LossScaler.rst b/docs/api/api_python/amp/mindspore.amp.LossScaler.rst
index 10a477f2af1..60a557c78da 100644
--- a/docs/api/api_python/amp/mindspore.amp.LossScaler.rst
+++ b/docs/api/api_python/amp/mindspore.amp.LossScaler.rst
@@ -10,6 +10,13 @@ mindspore.amp.LossScaler
     .. note::
         - 这是一个实验性接口，后续可能删除或修改。
 
+    .. py:method:: adjust(grads_finite)
+
+        根据梯度是否为有效值（无溢出）对 `scale_value` 进行调整。
+
+        参数：
+            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
+
     .. py:method:: scale(inputs)
 
         对inputs进行scale，`inputs \*= scale_value`。
@@ -23,10 +30,3 @@ mindspore.amp.LossScaler
 
         参数：
             - **inputs** (Union(Tensor, tuple(Tensor))) - 损失值或梯度。
-
-    .. py:method:: adjust(grads_finite)
-
-        根据梯度是否为有效值（无溢出）对 `scale_value` 进行调整。
-
-        参数：
-            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
diff --git a/docs/api/api_python/amp/mindspore.amp.StaticLossScaler.rst b/docs/api/api_python/amp/mindspore.amp.StaticLossScaler.rst
index 217d0bc39fe..fedde28cbe2 100644
--- a/docs/api/api_python/amp/mindspore.amp.StaticLossScaler.rst
+++ b/docs/api/api_python/amp/mindspore.amp.StaticLossScaler.rst
@@ -3,7 +3,7 @@ mindspore.amp.StaticLossScaler
 
 .. py:class:: mindspore.amp.StaticLossScaler(scale_value)
 
-    损失缩放系数不变的管理器。
+    Static Loss scale类。用固定的常数来scales和unscale损失或梯度。
 
     .. note::
         - 这是一个实验性接口，后续可能删除或修改。
@@ -11,6 +11,13 @@ mindspore.amp.StaticLossScaler
     参数：
         - **scale_value** (Union(float, int)) - 缩放系数。
 
+    .. py:method:: adjust(grads_finite)
+
+        `scale_value` 值固定。
+
+        参数：
+            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
+
     .. py:method:: scale(inputs)
 
         对inputs进行scale，`inputs \*= scale_value`。
@@ -24,10 +31,3 @@ mindspore.amp.StaticLossScaler
 
         参数：
             - **inputs** (Union(Tensor, tuple(Tensor))) - 损失值或梯度。
-
-    .. py:method:: adjust(grads_finite)
-
-        `scale_value` 值固定。
-
-        参数：
-            - **grads_finite** (Tensor) - bool类型的标量Tensor，表示梯度是否为有效值（无溢出）。
diff --git a/docs/api/api_python/dataset/mindspore.dataset.RandomDataset.rst b/docs/api/api_python/dataset/mindspore.dataset.RandomDataset.rst
index 3058bd6e67f..e38f86b1dea 100644
--- a/docs/api/api_python/dataset/mindspore.dataset.RandomDataset.rst
+++ b/docs/api/api_python/dataset/mindspore.dataset.RandomDataset.rst
@@ -21,6 +21,8 @@ mindspore.dataset.RandomDataset
 
     .. include:: mindspore.dataset.Dataset.rst
 
+    .. include:: mindspore.dataset.Dataset.b.rst
+
     .. include:: mindspore.dataset.Dataset.d.rst
 
     .. include:: mindspore.dataset.Dataset.e.rst
diff --git a/docs/api/api_python/nn/mindspore.nn.Adadelta.rst b/docs/api/api_python/nn/mindspore.nn.Adadelta.rst
index 8d43782399c..7e5e8e49283 100644
--- a/docs/api/api_python/nn/mindspore.nn.Adadelta.rst
+++ b/docs/api/api_python/nn/mindspore.nn.Adadelta.rst
@@ -33,7 +33,7 @@ mindspore.nn.Adadelta
           .. include:: mindspore.nn.optim_group_gc.rst
           .. include:: mindspore.nn.optim_group_order.rst
 
-        - **learning_rate** (Union[float, Tensor, Iterable, LearningRateSchedule]) - 默认值：1.0。
+        - **learning_rate** (Union[float, int, Tensor, Iterable, LearningRateSchedule]) - 默认值：1.0。
 
           .. include:: mindspore.nn.optim_arg_dynamic_lr.rst
 
diff --git a/docs/api/api_python/nn/mindspore.nn.AdaptiveAvgPool3d.rst b/docs/api/api_python/nn/mindspore.nn.AdaptiveAvgPool3d.rst
index d3c297d98d5..f734beaf8c6 100644
--- a/docs/api/api_python/nn/mindspore.nn.AdaptiveAvgPool3d.rst
+++ b/docs/api/api_python/nn/mindspore.nn.AdaptiveAvgPool3d.rst
@@ -5,9 +5,9 @@ mindspore.nn.AdaptiveAvgPool3d
 
     3维自适应平均池化。
 
-    对输入Tensor，提供3维的自适应平均池化操作，即对于输入任何尺寸，指定输出的尺寸都为 :math:`(D, H, W)`。但是输入和输出特征的数目不会变化。
+    对输入Tensor，提供3维的自适应平均池化操作。也就是说对于输入任何尺寸，指定输出的尺寸都为 :math:`(D, H, W)`。但是输入和输出特征的数目不会变化。
 
-    假设输入 `x` 最后3维大小分别为 :math:`(inD, inH, inW)`，则输出的最后3维大小分别为 :math:`(outD, outH, outW)`，运算如下：
+    假设输入 `x` 最后3维大小分别为 :math:`(inD, inH, inW)`，则输出的最后3维大小分别为 :math:`(outD, outH, outW)`。运算如下：
 
     .. math::
         \begin{array}{ll} \\
diff --git a/docs/api/api_python/nn/mindspore.nn.AdaptiveMaxPool3d.rst b/docs/api/api_python/nn/mindspore.nn.AdaptiveMaxPool3d.rst
index f6dcca9d661..4a670a24648 100644
--- a/docs/api/api_python/nn/mindspore.nn.AdaptiveMaxPool3d.rst
+++ b/docs/api/api_python/nn/mindspore.nn.AdaptiveMaxPool3d.rst
@@ -5,7 +5,7 @@ mindspore.nn.AdaptiveMaxPool3d
 
     3维自适应最大值池化。
 
-    对于任何输入尺寸，输出的大小为 :math:`(D, H, W)` ，其中输出特征的数量与输入特征的数量相同。
+    对于任何输入尺寸，输出的大小为 :math:`(D, H, W)` 。输出特征的数量与输入特征的数量相同。
 
     参数：
         - **output_size** (Union[int, tuple]) - 表示输出特征图的尺寸，输入可以是tuple :math:`(D, H, W)`，也可以是一个int值D来表示输出尺寸为 :math:`(D, D, D)` 。:math:`D` ， :math:`H` 和 :math:`W` 可以是int型整数或者None，其中None表示输出大小与对应的输入的大小相同。
diff --git a/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst b/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst
index af4aa9586f4..374c40a24f3 100644
--- a/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst
+++ b/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst
@@ -13,7 +13,7 @@ mindspore.nn.Conv2dTranspose
 
     参数：
         - **in_channels** (int) - Conv2dTranspose层输入Tensor的空间维度。
-        - **out_channels** (dict) - Conv2dTranspose层输出Tensor的空间维度。
+        - **out_channels** (int) - Conv2dTranspose层输出Tensor的空间维度。
         - **kernel_size** (Union[int, tuple[int]]) - 指定二维卷积核的高度和宽度。数据类型为整型或两个整型的tuple。一个整数表示卷积核的高度和宽度均为该值。两个整数的tuple分别表示卷积核的高度和宽度。
         - **stride** (Union[int, tuple[int]]) - 二维卷积核的移动步长。数据类型为整型或两个整型的tuple。一个整数表示在高度和宽度方向的移动步长均为该值。两个整数的tuple分别表示在高度和宽度方向的移动步长。默认值：1。
         - **pad_mode** (str) - 指定填充模式。可选值为"same"、"valid"、"pad"。默认值："same"。
diff --git a/docs/api/api_python/nn/mindspore.nn.DiceLoss.rst b/docs/api/api_python/nn/mindspore.nn.DiceLoss.rst
index 1d0d5b98ca4..772f5637071 100644
--- a/docs/api/api_python/nn/mindspore.nn.DiceLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.DiceLoss.rst
@@ -3,7 +3,7 @@ mindspore.nn.DiceLoss
 
 .. py:class:: mindspore.nn.DiceLoss(smooth=1e-5)
 
-    Dice系数是一个集合相似性loss,用于计算两个样本之间的相似性。当分割结果最好时，Dice系数的值为1，当分割结果最差时，Dice系数的值为0。
+    Dice系数是一个集合相似性loss，用于计算两个样本之间的相似性。当分割结果最好时，Dice系数的值为1，当分割结果最差时，Dice系数的值为0。
 
     Dice系数表示两个对象之间的面积与总面积的比率。
     函数如下：
@@ -11,7 +11,7 @@ mindspore.nn.DiceLoss
     .. math::
         dice = 1 - \frac{2 * |pred \bigcap true|}{|pred| + |true| + smooth}
 
-    :math:`pred` 表示 `logits` ， :math:`true` 表示 `labels` 。
+    :math:`pred` 表示 `logits` ，:math:`true` 表示 `labels` 。
 
     参数：
         - **smooth** (float) - 将添加到分母中，以提高数值稳定性的参数。取值大于0。默认值：1e-5。
diff --git a/docs/api/api_python/nn/mindspore.nn.DynamicLossScaleUpdateCell.rst b/docs/api/api_python/nn/mindspore.nn.DynamicLossScaleUpdateCell.rst
index fb2eb44c79c..b2282128ec9 100644
--- a/docs/api/api_python/nn/mindspore.nn.DynamicLossScaleUpdateCell.rst
+++ b/docs/api/api_python/nn/mindspore.nn.DynamicLossScaleUpdateCell.rst
@@ -7,7 +7,7 @@ mindspore.nn.DynamicLossScaleUpdateCell
 
     使用混合精度功能进行训练时，初始损失缩放系数值为 `loss_scale_value`。在每个训练步骤中，当出现溢出时，通过计算公式 `loss_scale`/`scale_factor` 减小损失缩放系数。如果连续 `scale_window` 步（step）未溢出，则将通过 `loss_scale` * `scale_factor` 增大损失缩放系数。
 
-    该类是 :class:`mindspore.DynamicLossScaleManager` 的 `get_update_cell` 方法的返回值。训练过程中，类 :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该Cell来更新损失缩放系数。
+    该类是 :class:`mindspore.amp.DynamicLossScaleManager` 的 `get_update_cell` 方法的返回值。训练过程中，类 :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该Cell来更新损失缩放系数。
 
     参数：
         - **loss_scale_value** (float) - 初始的损失缩放系数。
diff --git a/docs/api/api_python/nn/mindspore.nn.FixedLossScaleUpdateCell.rst b/docs/api/api_python/nn/mindspore.nn.FixedLossScaleUpdateCell.rst
index fa894da54a0..636a50893c5 100644
--- a/docs/api/api_python/nn/mindspore.nn.FixedLossScaleUpdateCell.rst
+++ b/docs/api/api_python/nn/mindspore.nn.FixedLossScaleUpdateCell.rst
@@ -5,7 +5,7 @@ mindspore.nn.FixedLossScaleUpdateCell
 
     固定损失缩放系数的神经元。
 
-    该类是 :class:`mindspore.FixedLossScaleManager` 的 `get_update_cell` 方法的返回值。训练过程中，类 :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该Cell。
+    该类是 :class:`mindspore.amp.FixedLossScaleManager` 的 `get_update_cell` 方法的返回值。训练过程中，类 :class:`mindspore.nn.TrainOneStepWithLossScaleCell` 会调用该Cell。
 
     参数：
         - **loss_scale_value** (float) - 初始损失缩放系数。
diff --git a/docs/api/api_python/nn/mindspore.nn.GELU.rst b/docs/api/api_python/nn/mindspore.nn.GELU.rst
index 912011c8ac3..495342e6c56 100644
--- a/docs/api/api_python/nn/mindspore.nn.GELU.rst
+++ b/docs/api/api_python/nn/mindspore.nn.GELU.rst
@@ -12,20 +12,19 @@ mindspore.nn.GELU
     .. math::
         GELU(x_i) = x_i*P(X < x_i),
 
-
     其中 :math:`P` 是标准高斯分布的累积分布函数， :math:`x_i` 是输入的元素。
 
     GELU相关图参见 `GELU <https://en.wikipedia.org/wiki/Activation_function#/media/File:Activation_gelu.png>`_  。
 
     参数：
-        - **approximate** (bool) - 是否启用approximation，默认值：True。如果approximate的值为True，则高斯误差线性激活函数为:
+        - **approximate** (bool) - 是否启用approximation，默认值：True。如果approximate的值为True，则高斯误差线性激活函数为：
 
           :math:`0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` ，
 
           否则为： :math:`x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`，其中P(X) ~ N(0, 1) 。
 
     输入：
-        - **x** （Tensor） - 用于计算GELU的Tensor。数据类型为float16或float32。shape是 :math:`(N,*)` ， :math:`*` 表示任意的附加维度数。
+        - **x** (Tensor) - 用于计算GELU的Tensor。数据类型为float16或float32。shape是 :math:`(N,*)` ， :math:`*` 表示任意的附加维度数。
 
     输出：
         Tensor，具有与 `x` 相同的数据类型和shape。
diff --git a/docs/api/api_python/nn/mindspore.nn.RMSProp.rst b/docs/api/api_python/nn/mindspore.nn.RMSProp.rst
index fa2fd12d92d..6b737aef15a 100644
--- a/docs/api/api_python/nn/mindspore.nn.RMSProp.rst
+++ b/docs/api/api_python/nn/mindspore.nn.RMSProp.rst
@@ -5,7 +5,7 @@ mindspore.nn.RMSProp
 
     均方根传播（RMSProp）算法的实现。
 
-    根据RMSProp算法更新 `params`，算法详见 [http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf] 第29页。
+    根据RMSProp算法更新 `params`。算法详见 [http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf] 第29页。
 
     公式如下：
 
@@ -55,7 +55,7 @@ mindspore.nn.RMSProp
           .. include:: mindspore.nn.optim_group_gc.rst
           .. include:: mindspore.nn.optim_group_order.rst
 
-        - **learning_rate** (Union[float, Tensor, Iterable, LearningRateSchedule]) - 默认值：0.1。
+        - **learning_rate** (Union[float, int, Tensor, Iterable, LearningRateSchedule]) - 默认值：0.1。
 
           .. include:: mindspore.nn.optim_arg_dynamic_lr.rst
 
diff --git a/docs/api/api_python/nn/mindspore.nn.SampledSoftmaxLoss.rst b/docs/api/api_python/nn/mindspore.nn.SampledSoftmaxLoss.rst
index dedf9a90565..9dc3d3ccd8d 100644
--- a/docs/api/api_python/nn/mindspore.nn.SampledSoftmaxLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.SampledSoftmaxLoss.rst
@@ -5,12 +5,12 @@ mindspore.nn.SampledSoftmaxLoss
 
     抽样交叉熵损失函数。
 
-    一般在类别数很大时使用，可加速训练以交叉熵为损失函数的分类器。
+    一般在类别数很大时使用。可加速训练以交叉熵为损失函数的分类器。
 
     参数：
         - **num_sampled** (int) - 抽样的类别数。
         - **num_classes** (int) - 类别总数。
-        - **num_true** (int)：每个训练样本的类别数。默认值：1。
+        - **num_true** (int) - 每个训练样本的类别数。默认值：1。
         - **sampled_values** (Union[list, tuple]) - 抽样候选值。由 `*CandidateSampler` 函数返回的(`sampled_candidates`, `true_expected_count` , `sampled_expected_count`)的list或tuple。如果默认值为None，则应用 `UniformCandidateSampler` 。
         - **remove_accidental_hits** (bool) - 是否移除抽样中的目标类等于标签的情况。默认值：True。
         - **seed** (int) - 抽样的随机种子。默认值：0。
diff --git a/docs/api/api_python/nn/mindspore.nn.SoftmaxCrossEntropyWithLogits.rst b/docs/api/api_python/nn/mindspore.nn.SoftmaxCrossEntropyWithLogits.rst
index 0fc0332a2ac..fcc236edda4 100644
--- a/docs/api/api_python/nn/mindspore.nn.SoftmaxCrossEntropyWithLogits.rst
+++ b/docs/api/api_python/nn/mindspore.nn.SoftmaxCrossEntropyWithLogits.rst
@@ -7,13 +7,13 @@ mindspore.nn.SoftmaxCrossEntropyWithLogits
 
     使用交叉熵损失函数计算出输入概率（使用softmax函数计算）和真实值之间的误差。
 
-    对于每个实例  :math:`x_i` ，i的范围为0到N-1，则可得损失为：
+    对于每个实例 :math:`x_i` ，i的范围为0到N-1，则可得损失为：
 
     .. math::
         \ell(x_i, c) = - \log\left(\frac{\exp(x_i[c])}{\sum_j \exp(x_i[j])}\right)
         =  -x_i[c] + \log\left(\sum_j \exp(x_i[j])\right)
 
-    其中  :math:`x_i` 是一维的Tensor， :math:`c` 为one-hot中等于1的位置。
+    其中 :math:`x_i` 是一维的Tensor， :math:`c` 为one-hot中等于1的位置。
 
     .. note::
         虽然目标值是互斥的，即目标值中只有一个为正，但预测的概率不为互斥。只要求输入的预测概率分布有效。
diff --git a/docs/api/api_python/nn/mindspore.nn.TrainOneStepWithLossScaleCell.rst b/docs/api/api_python/nn/mindspore.nn.TrainOneStepWithLossScaleCell.rst
index 94fe736f8b7..05e02c619b0 100644
--- a/docs/api/api_python/nn/mindspore.nn.TrainOneStepWithLossScaleCell.rst
+++ b/docs/api/api_python/nn/mindspore.nn.TrainOneStepWithLossScaleCell.rst
@@ -57,7 +57,7 @@ mindspore.nn.TrainOneStepWithLossScaleCell
         如果使用了Tensor类型的 `scale_sense` ，可调用此函数修改它的值。
 
         参数：
-            - **sens** （Tensor） - 新的损失缩放系数，其shape和类型需要与原始 `scale_sense` 相同。
+            - **sens** (Tensor) - 新的损失缩放系数，其shape和类型需要与原始 `scale_sense` 相同。
 
     .. py:method:: start_overflow_check(pre_cond, compute_input)
 
diff --git a/docs/api/api_python/transformer/mindspore.nn.transformer.CrossEntropyLoss.rst b/docs/api/api_python/transformer/mindspore.nn.transformer.CrossEntropyLoss.rst
index f4234f99951..b2b91f458c5 100644
--- a/docs/api/api_python/transformer/mindspore.nn.transformer.CrossEntropyLoss.rst
+++ b/docs/api/api_python/transformer/mindspore.nn.transformer.CrossEntropyLoss.rst
@@ -3,7 +3,7 @@
     计算输入和输出之间的交叉熵损失。
 
     参数：
-        - **parallel_config** (OpParallelConfig, MoEParallelConfig) - 表示并行配置。默认值为 `default_dpmp_config` ，表示一个带有默认参数的 `OpParallelConfig` 实例。
+        - **parallel_config** (OpParallelConfig) - 表示并行配置。默认值为 `default_dpmp_config` ，表示一个带有默认参数的 `OpParallelConfig` 实例。
 
     输入：
         - **logits** (Tensor) - shape为(N, C)的Tensor。表示的输出logits。其中N表示任意大小的维度，C表示类别个数。数据类型必须为float16或float32。
diff --git a/docs/api/api_python/transformer/mindspore.nn.transformer.FixedSparseAttention.rst b/docs/api/api_python/transformer/mindspore.nn.transformer.FixedSparseAttention.rst
index fb33526ca55..12c9ea3fe4f 100644
--- a/docs/api/api_python/transformer/mindspore.nn.transformer.FixedSparseAttention.rst
+++ b/docs/api/api_python/transformer/mindspore.nn.transformer.FixedSparseAttention.rst
@@ -2,7 +2,7 @@
 
     固定稀疏注意力层。
 
-    此接口实现了Sparse Transformer中使用的稀疏注意力原语。更多详情，请见论文（https://arxiv.org/abs/1904.10509）。
+    此接口实现了Sparse Transformer中使用的稀疏注意力原语，更多详情，请见论文（https://arxiv.org/abs/1904.10509）。
 
     具体来说，它包括以下内容：
 
diff --git a/docs/api/api_python/transformer/mindspore.nn.transformer.VocabEmbedding.rst b/docs/api/api_python/transformer/mindspore.nn.transformer.VocabEmbedding.rst
index 72d12fba013..f4a6f0d0a33 100644
--- a/docs/api/api_python/transformer/mindspore.nn.transformer.VocabEmbedding.rst
+++ b/docs/api/api_python/transformer/mindspore.nn.transformer.VocabEmbedding.rst
@@ -8,7 +8,7 @@
     参数：
         - **vocab_size** (int) - 表示查找表的大小。
         - **embedding_size** (int) - 表示查找表中每个嵌入向量的大小。
-        - **param_init** (Union[Tensor, str, Initializer, numbers.Number] - 表示embedding_table的Initializer。当指定字符串时，请参见 `initializer` 类了解字符串的值。默认值：'normal'。
+        - **param_init** (Union[Tensor, str, Initializer, numbers.Number]) - 表示embedding_table的Initializer。当指定字符串时，请参见 `initializer` 类了解字符串的值。默认值：'normal'。
         - **parallel_config** (EmbeddingOpParallelConfig) - 表示网络的并行配置。默认值为 `default_embedding_parallel_config` ，表示带有默认参数的 `EmbeddingOpParallelConfig` 实例。
 
     输入：
diff --git a/mindspore/python/mindspore/dataset/vision/transforms.py b/mindspore/python/mindspore/dataset/vision/transforms.py
index eaeab2f2259..d01cdbf1095 100644
--- a/mindspore/python/mindspore/dataset/vision/transforms.py
+++ b/mindspore/python/mindspore/dataset/vision/transforms.py
@@ -1350,7 +1350,7 @@ class LinearTransformation(PyTensorOperation):
 
 class MixUp(PyTensorOperation):
     """
-    Randomly mix up a batch of images together with its labels.
+    Randomly mix up a batch of numpy.ndarray images together with its labels.
 
     Each image will be multiplied by a random weight lambda generated from the Beta distribution and then added
     to another image multiplied by 1 - lambda. The same transformation will be applied to their labels with the
@@ -1565,7 +1565,7 @@ class Pad(ImageTensorOperation, PyTensorOperation):
 
     Args:
         padding (Union[int, Sequence[int, int], Sequence[int, int, int, int]]): The number of pixels
-        to pad each border of the image.
+            to pad each border of the image.
             If a single number is provided, it pads all borders with this value.
             If a tuple or lists of 2 values are provided, it pads the (left and top)
             with the first value and (right and bottom) with the second value.
@@ -2942,7 +2942,7 @@ class RandomResizedCropWithBBox(ImageTensorOperation):
         size (Union[int, Sequence[int]]): The size of the output image. The size value(s) must be positive.
             If size is an integer, a square crop of size (size, size) is returned.
             If size is a sequence of length 2, it should be (height, width).
-        scale (Union[list, tuple] optional): Range (min, max) of respective size of the original
+        scale (Union[list, tuple], optional): Range (min, max) of respective size of the original
             size to be cropped, which must be non-negative (default=(0.08, 1.0)).
         ratio (Union[list, tuple], optional): Range (min, max) of aspect ratio to be
             cropped, which must be non-negative (default=(3. / 4., 4. / 3.)).
@@ -3576,7 +3576,7 @@ class ResizeWithBBox(ImageTensorOperation):
             If size is an integer, smaller edge of the image will be resized to this value with
             the same image aspect ratio.
             If size is a sequence of length 2, it should be (height, width).
-        interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR).
+        interpolation (Inter, optional): Image interpolation mode (default=Inter.LINEAR).
             It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC].
 
             - Inter.LINEAR, means interpolation method is bilinear interpolation.
diff --git a/mindspore/python/mindspore/nn/layer/activation.py b/mindspore/python/mindspore/nn/layer/activation.py
index f9eecd0f01c..70f92ee84c5 100644
--- a/mindspore/python/mindspore/nn/layer/activation.py
+++ b/mindspore/python/mindspore/nn/layer/activation.py
@@ -937,13 +937,13 @@ class Sigmoid(Cell):
     Sigmoid_function#/media/File:Logistic-curve.svg>`_.
 
     Inputs:
-        - **x** (Tensor) - The input of Sigmoid with data type of float16 or float32. Tensor of any dimension.
+        - **input_x** (Tensor) - The input of Sigmoid with data type of float16 or float32. Tensor of any dimension.
 
     Outputs:
-        Tensor, with the same type and shape as the `x`.
+        Tensor, with the same type and shape as the `input_x`.
 
     Raises:
-        TypeError: If dtype of `x` is neither float16 nor float32.
+        TypeError: If dtype of `input_x` is neither float16 nor float32.
 
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
diff --git a/mindspore/python/mindspore/nn/layer/math.py b/mindspore/python/mindspore/nn/layer/math.py
index 1c3ab10c74c..6f185ab12e9 100644
--- a/mindspore/python/mindspore/nn/layer/math.py
+++ b/mindspore/python/mindspore/nn/layer/math.py
@@ -887,9 +887,9 @@ class Moments(Cell):
     Calculate the mean and variance of the input `x` along the specified `axis`.
 
     Args:
-        axis (Union[int, tuple(int)]): Calculates the mean and variance along the specified axis.
+        axis (Union[int, tuple(int), None]): Calculates the mean and variance along the specified axis.
             When the value is None, it means to calculate the mean and variance of all values of `x`. Default: None.
-        keep_dims (bool): If True, the calculation result will retain the dimension of `axis`,
+        keep_dims (Union[bool, None]): If True, the calculation result will retain the dimension of `axis`,
             and the dimensions of the mean and variance are the same as the input. If False or None,
             the dimension of `axis` will be reduced. Default: None.
 
diff --git a/mindspore/python/mindspore/nn/layer/padding.py b/mindspore/python/mindspore/nn/layer/padding.py
index d934e35b495..cd6c9e55671 100644
--- a/mindspore/python/mindspore/nn/layer/padding.py
+++ b/mindspore/python/mindspore/nn/layer/padding.py
@@ -542,7 +542,7 @@ class ZeroPad2d(_ConstantPadNd):
     Pads the last two dimensions of input tensor with zero.
 
     Args:
-        padding (union[int, tuple]): The padding size to pad the last two dimensions of input tensor.
+        padding (Union[int, tuple]): The padding size to pad the last two dimensions of input tensor.
             If is int, uses the same padding in boundaries of input's last two dimensions.
             If is tuple and length of padding is 4 uses (padding_0, padding_1, padding_2, padding_3) to pad.
             If the input is `x`, the size of last dimension of output is :math:`padding\_0 + x.shape[-1] + padding\_1`.
diff --git a/mindspore/python/mindspore/nn/loss/loss.py b/mindspore/python/mindspore/nn/loss/loss.py
index c8772dcb62c..e7900e27c07 100644
--- a/mindspore/python/mindspore/nn/loss/loss.py
+++ b/mindspore/python/mindspore/nn/loss/loss.py
@@ -685,7 +685,7 @@ def _check_label_dtype(labels_dtype, cls_name):
 
 class DiceLoss(LossBase):
     r"""
-    The Dice coefficient is a set similarity loss. It is used to calculate the similarity between two samples. The
+    The Dice coefficient is a set similarity loss, which is used to calculate the similarity between two samples. The
     value of the Dice coefficient is 1 when the segmentation result is the best and is 0 when the segmentation result
     is the worst. The Dice coefficient indicates the ratio of the area between two objects to the total area.
     The function is shown as follows:
diff --git a/mindspore/python/mindspore/nn/transformer/layers.py b/mindspore/python/mindspore/nn/transformer/layers.py
index 903a802655f..bedd443b7ab 100644
--- a/mindspore/python/mindspore/nn/transformer/layers.py
+++ b/mindspore/python/mindspore/nn/transformer/layers.py
@@ -514,7 +514,7 @@ class FixedSparseAttention(nn.Cell):
     """
     Fixed Sparse Attention Layer.
 
-    This function contains the sparse attention primitives used in Sparse Transformers (see paper).
+    This function contains the sparse attention primitives used in Sparse Transformers (see paper)
     `Generating Long Sequences with Sparse Transformers <https://arxiv.org/abs/1904.10509>`_.
     Specifically, it includes the following:
     1. A faster implementation of normal attention (the upper triangle is not computed, and many operations are fused).
diff --git a/mindspore/python/mindspore/nn/transformer/transformer.py b/mindspore/python/mindspore/nn/transformer/transformer.py
index a30e09406ee..99d43c05c9a 100644
--- a/mindspore/python/mindspore/nn/transformer/transformer.py
+++ b/mindspore/python/mindspore/nn/transformer/transformer.py
@@ -624,8 +624,8 @@ class VocabEmbedding(Cell):
         The embedding lookup table from the 0-th dim of the parameter table. When the parallel_config.vocab_emb_dp is
         True and in the `AUTO_PARALLEL` mode, the embedding lookup will be trained by the data parallel way, as the
         parameters will be repeated on each device. If false, the embedding table will be sharded into n parts at
-        the 0-th dimension of the embedding table, where the n is the model parallel way determined by the
-        parallel_config (EmbeddingOpParallelConfig).
+        the 0-th dimension of the embedding table, where the n is the model parallel way determined by
+        `parallel_config.model_parallel` (EmbeddingOpParallelConfig).
 
         Note:
             When `AUTO_PARALLEL` or `SEMI_AUTO_PARALLEL` mode is enabled, this layer support only 2-d dimension inputs,
diff --git a/mindspore/python/mindspore/nn/wrap/loss_scale.py b/mindspore/python/mindspore/nn/wrap/loss_scale.py
index 74f60d5effd..d4a7e8fb40d 100644
--- a/mindspore/python/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/python/mindspore/nn/wrap/loss_scale.py
@@ -66,7 +66,7 @@ class DynamicLossScaleUpdateCell(Cell):
     when there is an overflow. And it will be increased by `loss_scale` * `scale_factor` if there is no
     overflow for a continuous `scale_window` steps.
 
-    `get_update_cell` method of :class:`mindspore.DynamicLossScaleManager` will return this class, it will be called
+    `get_update_cell` method of :class:`mindspore.amp.DynamicLossScaleManager` will return this class. It will be called
     by :class:`mindspore.nn.TrainOneStepWithLossScaleCell` during training to update loss scale.
 
     Args:
@@ -165,7 +165,7 @@ class FixedLossScaleUpdateCell(Cell):
     """
     Update cell with fixed loss scaling value.
 
-    `get_update_cell` method of :class:`mindspore.FixedLossScaleManager` will return this class, it will be called
+    `get_update_cell` method of :class:`mindspore.amp.FixedLossScaleManager` will return this class, it will be called
     by :class:`mindspore.nn.TrainOneStepWithLossScaleCell` during trainning.
 
     Args:
diff --git a/mindspore/python/mindspore/ops/function/array_func.py b/mindspore/python/mindspore/ops/function/array_func.py
index c9f1d263cd0..e87873e3c63 100644
--- a/mindspore/python/mindspore/ops/function/array_func.py
+++ b/mindspore/python/mindspore/ops/function/array_func.py
@@ -4039,7 +4039,7 @@ def max(x, axis=0, keep_dims=False):
     Also see: class: `mindspore.ops.ArgMaxWithValue`.
 
     Args:
-        x (Tensor) - The input tensor, can be any dimension. Set the shape of input tensor as
+        x (Tensor): The input tensor, can be any dimension. Set the shape of input tensor as
           :math:`(x_1, x_2, ..., x_N)`.
         axis (int): The dimension to reduce. Default: 0.
         keep_dims (bool): Whether to reduce dimension, if true, the output will keep same dimension with the input,
diff --git a/mindspore/python/mindspore/ops/function/debug_func.py b/mindspore/python/mindspore/ops/function/debug_func.py
index d3cc3c5d58b..2dded0cb532 100644
--- a/mindspore/python/mindspore/ops/function/debug_func.py
+++ b/mindspore/python/mindspore/ops/function/debug_func.py
@@ -25,7 +25,7 @@ def print_(*input_x):
     It can also be saved in a file by setting the parameter  `print_file_path` in `context`.
     Once set, the output will be saved in the file specified by print_file_path.
     :func:`mindspore.parse_print` can be employed to reload the data.
-    For more information, please refer to :func:`mindspore.context.set_context` and :func:`mindspore.parse_print`.
+    For more information, please refer to :func:`mindspore.set_context` and :func:`mindspore.parse_print`.
 
     Note:
         In pynative mode, please use python print function.
diff --git a/mindspore/python/mindspore/train/amp.py b/mindspore/python/mindspore/train/amp.py
index 4158062b9d7..2a228c81b0e 100644
--- a/mindspore/python/mindspore/train/amp.py
+++ b/mindspore/python/mindspore/train/amp.py
@@ -109,6 +109,7 @@ def _auto_black_list(network, black_list=None):
 def auto_mixed_precision(network, amp_level="O0"):
     """
     auto mixed precision function.
+
     Args:
         network (Cell): Definition of the network.
         amp_level (str): Supports ["O0", "O1", "O2", "O3"]. Default: "O0".
diff --git a/mindspore/python/mindspore/train/loss_scale_manager.py b/mindspore/python/mindspore/train/loss_scale_manager.py
index 3c9b4c71509..2d2cff35a91 100644
--- a/mindspore/python/mindspore/train/loss_scale_manager.py
+++ b/mindspore/python/mindspore/train/loss_scale_manager.py
@@ -47,8 +47,8 @@ class LossScaleManager:
 
 class FixedLossScaleManager(LossScaleManager):
     """
-    Loss scale(Magnification factor of gradients when mix precision is used) manager with a fixed loss scale value,
-    inherits from :class:`mindspore.LossScaleManager`.
+    Loss scale (Magnification factor of gradients when mix precision is used) manager with a fixed loss scale value,
+    inherits from :class:`mindspore.amp.LossScaleManager`.
 
     Args:
         loss_scale (float): Magnification factor of gradients. Note that if `drop_overflow_update` is set to False,
@@ -99,7 +99,7 @@ class FixedLossScaleManager(LossScaleManager):
 
     def update_loss_scale(self, overflow):
         """
-        Update loss scale value. The interface at :class:`mindspore.FixedLossScaleManager` will do nothing.
+        Update loss scale value. The interface at :class:`mindspore.amp.FixedLossScaleManager` will do nothing.
 
         Args:
             overflow (bool): Whether it overflows.
@@ -124,7 +124,7 @@ class FixedLossScaleManager(LossScaleManager):
 class DynamicLossScaleManager(LossScaleManager):
     """
     Loss scale(Magnification factor of gradients when mix precision is used) manager with loss scale dynamically
-    adjusted, inherits from :class:`mindspore.LossScaleManager`.
+    adjusted, inherits from :class:`mindspore.amp.LossScaleManager`.
 
     Args:
         init_loss_scale (float): Initialize loss scale. Default: 2**24.