!18963 Front-end annotation correction

Merge pull request !18963 from wangnan39/fix_docs
2021-06-28 10:33:37 +00:00 · 2021-06-28 10:33:37 +00:00 · 3550dc2918
parent 4f0a30a3cf e1edb7314f
commit 3550dc2918
10 changed files with 209 additions and 75 deletions
--- a/mindspore/core/ops/getnext.cc
+++ b/mindspore/core/ops/getnext.cc
@ -13,13 +13,14 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ops/getnext.h"
 #include <set>
 #include <string>
 #include <vector>
 #include <memory>
 #include <algorithm>
-#include "ops/getnext.h"
+
 #include "ops/op_utils.h"
 #include "utils/check_convert_utils.h"
 #include "utils/tensor_construct_utils.h"
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@ -62,10 +62,12 @@ class Cell(Cell_):
        ``Ascend`` ``GPU`` ``CPU``
    Examples:
        >>> import mindspore.nn as nn
        >>> import mindspore.ops as ops
        >>> class MyCell(nn.Cell):
        ...    def __init__(self):
        ...        super(MyCell, self).__init__()
-        ...        self.relu = P.ReLU()
+        ...        self.relu = ops.ReLU()
        ...
        ...    def construct(self, x):
        ...        return self.relu(x)
@ -607,7 +609,7 @@ class Cell(Cell_):
        Compiles cell.
        Args:
-            inputs (tuple): Input parameters.
+            inputs (tuple): Inputs of the Cell object.
        """
        _executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode)
@ -616,7 +618,7 @@ class Cell(Cell_):
        Compiles and runs cell.
        Args:
-            inputs (tuple): Input parameters.
+            inputs (tuple): Inputs of the Cell object.
        Returns:
            Object, the result of executing.
@ -682,8 +684,13 @@ class Cell(Cell_):
        """
        Cast parameter according to auto mix precision level in pynative mode.
        This interface is currently used in the case of auto mix precision and usually need not to be used explicitly.
        Args:
-            param (Parameter): The parameter to cast.
+            param (Parameter): Parameters, the type of which should be cast.
        Returns:
            Parameter, the input parameter with type automatically casted.
        """
        if hasattr(self, "_mindspore_flags"):
            if self._mindspore_flags.get('fp32'):
@ -725,7 +732,11 @@ class Cell(Cell_):
        return None
    def remove_redundant_parameters(self):
-        """Remove the redundant parameters"""
+        """
        Remove the redundant parameters.
        This interface usually need not to be used explicitly.
        """
        cells = self.cells_and_names()
        for _, cell in cells:
            params = cell._params.items()
@ -836,7 +847,7 @@ class Cell(Cell_):
        Adds the given prefix to the names of parameters.
        Args:
-            prefix (str): The prefix string.
+            prefix (str): The prefix string. Default: ''.
            recurse (bool): Whether contains the parameters of subcells. Default: True.
        """
@ -884,6 +895,9 @@ class Cell(Cell_):
            expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
                           that are direct members of this cell. Default: True.
        Returns:
            Iteration, all parameters at the Cell.
        Examples:
            >>> net = Net()
            >>> parameters = []
@ -912,6 +926,9 @@ class Cell(Cell_):
            expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
                           that are direct members of this cell. Default: True.
        Returns:
            Iteration, all the names and corresponding parameters in the cell.
        Examples:
            >>> n = Net()
            >>> names = []
@ -949,6 +966,9 @@ class Cell(Cell_):
            cells (str): Cells to iterate over. Default: None.
            name_prefix (str): Namespace. Default: ''.
        Returns:
            Iteration, all the child cells and corresponding names in the cell.
        Examples:
            >>> n = Net()
            >>> names = []
@ -972,7 +992,12 @@ class Cell(Cell_):
                    yield ele
    def cells(self):
-        """Returns an iterator over immediate cells."""
+        """
        Returns an iterator over immediate cells.
        Returns:
            Iteration, all the child cells in the cell.
        """
        return self.name_cells().values()
    def _set_scope(self, name):
@ -997,7 +1022,12 @@ class Cell(Cell_):
                yield key, value
    def get_scope(self):
-        """Returns the scope of a cell object in one network."""
+        """
        Returns the scope of a cell object in one network.
        Returns:
            String, scope of the cell.
        """
        return self._scope
    def generate_scope(self):
@ -1010,6 +1040,9 @@ class Cell(Cell_):
        Returns an iterator over all cells in the network.
        Include name of the cell and cell itself.
        Returns:
            Dict[String, Cell], all the child cells and corresponding names in the cell.
        """
        value_set = set()
        cells = OrderedDict()
@ -1056,6 +1089,9 @@ class Cell(Cell_):
            dst_type (:class:`mindspore.dtype`): Transfer Cell to Run with dst_type.
                dst_type can be `mindspore.dtype.float16` or `mindspore.dtype.float32`.
        Returns:
            Cell, the cell itself.
        Raises:
            ValueError: If dst_type is not float32 nor float16.
        """
@ -1080,6 +1116,9 @@ class Cell(Cell_):
        Args:
            acc_type (str): accelerate algorithm.
        Returns:
            Cell, the cell itself.
        Raises:
            ValueError: If acc_type is not in the algorithm library.
        """
@ -1098,6 +1137,9 @@ class Cell(Cell_):
        Args:
            requires_grad (bool): Specifies if the net need to grad, if it is
                True, cell will construct backward network in pynative mode. Default: True.
        Returns:
            Cell, the cell itself.
        """
        self.requires_grad = requires_grad
        return self
@ -1112,6 +1154,9 @@ class Cell(Cell_):
        Args:
            mode (bool): Specifies whether the model is training. Default: True.
        Returns:
            Cell, the cell itself.
        """
        if mode is False:
            self._phase = 'predict'
--- a/mindspore/nn/learning_rate_schedule.py
+++ b/mindspore/nn/learning_rate_schedule.py
@ -37,7 +37,10 @@ class LearningRateSchedule(Cell):
            The output must be a Tensor of scalar.
        Inputs:
-            Tensor. The current step number.
+            - **global_step** (Tensor) - The current step number.
        Inputs:
            Tensor. Learning rate at current step with shape :math:`()`.
        """
        raise NotImplementedError
@ -77,10 +80,10 @@ class ExponentialDecayLR(LearningRateSchedule):
        is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -144,10 +147,10 @@ class NaturalExpDecayLR(LearningRateSchedule):
        is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -212,10 +215,10 @@ class InverseDecayLR(LearningRateSchedule):
        is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -269,10 +272,10 @@ class CosineDecayLR(LearningRateSchedule):
        decay_steps (int): A value used to calculate decayed learning rate.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `min_lr` or `max_lr` is not a float.
@ -345,10 +348,10 @@ class PolynomialDecayLR(LearningRateSchedule):
        update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float.
@ -424,10 +427,10 @@ class WarmUpLR(LearningRateSchedule):
        warmup_steps (int): The warm up steps of learning rate.
    Inputs:
-        Tensor. The current step number.
+        - **global_step** (Tensor) - The current step number.
    Outputs:
-        Tensor. The learning rate value for the current step.
+        Tensor. The learning rate value for the current step with shape :math:`()`.
    Raises:
        TypeError: If `learning_rate` is not a float.
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@ -195,17 +195,16 @@ class Adam(Optimizer):
    .. math::
        \begin{array}{ll} \\
-            m = \beta_1 * m + (1 - \beta_1) * g \\
+            m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
-            v = \beta_2 * v + (1 - \beta_2) * g * g \\
+            v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
-            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
+            w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
        \end{array}
    :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
-    :math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
+    :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
    `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
-    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
+    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
    :math:`\epsilon` represents `eps`.
    Note:
        When separating parameter groups, the weight decay in each group will be applied on the parameters if the
@ -371,9 +370,29 @@ class Adam(Optimizer):
 class AdamWeightDecay(Optimizer):
-    """
+    r"""
    Implements the Adam algorithm to fix the weight decay.
    .. math::
        \begin{array}{ll} \\
            m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
            v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
            update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\
            update =
            \begin{cases}
                update + \weight\_decay * w_{t}
                    & \text{ if } \weight\_decay > 0 \\
                \update
                    & \text{ otherwise }
            \end{cases} \\
            w_{t+1}  = w_{t} - lr * update
        \end{array}
    :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
    :math:`g` represents `gradients`, :math:`lr` represents `learning_rate`,
    :math:`\beta_1, \beta_2` represent `beta1` and `beta2`, :math:`t` represents updating step while
    :math:`w` represents `params`.
    Note:
        When separating parameter groups, the weight decay in each group will be applied on the parameters if the
        weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
@ -493,17 +512,16 @@ class AdamOffload(Optimizer):
    .. math::
        \begin{array}{ll} \\
-            m = \beta_1 * m + (1 - \beta_1) * g \\
+            m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
-            v = \beta_2 * v + (1 - \beta_2) * g * g \\
+            v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
-            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
+            w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
        \end{array}
    :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
-    :math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
+    :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
    `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
-    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
+    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
    :math:`\epsilon` represents `eps`.
    Note:
        This optimizer only supports `GRAPH_MODE` currently.
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@ -114,17 +114,16 @@ class LazyAdam(Optimizer):
    .. math::
        \begin{array}{ll} \\
-            m = \beta_1 * m + (1 - \beta_1) * g \\
+            m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
-            v = \beta_2 * v + (1 - \beta_2) * g * g \\
+            v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
            l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
-            w = w - l * \frac{m}{\sqrt{v} + \epsilon}
+            w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
        \end{array}
    :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
-    :math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
+    :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
    `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
-    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
+    `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
    :math:`\epsilon` represents `eps`.
    Note:
        When separating parameter groups, the weight decay in each group will be applied on the parameters if the
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@ -52,13 +52,25 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
 class ProximalAdagrad(Optimizer):
-    """
+    r"""
    Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
    ProximalAdagrad is an online Learning and Stochastic Optimization.
    Refer to paper `Efficient Learning using Forward-Backward Splitting
    <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
    .. math::
        accum_{t+1} = accum_{t} + grad * grad
    .. math::
        \text{prox_v} = var_{t} - lr * grad * \frac{1}{\sqrt{accum_{t+1}}}
    .. math::
        var_{t+1} = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
    Here : where grad, lr, var, accum and t denote the gradients, learning_rate, params and accumulation and current
    step respectively.
    Note:
        When separating parameter groups, the weight decay in each group will be applied on the parameters if the
        weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@ -66,6 +66,7 @@ def _tensors_cast_datatype(datatype, param):
    return F.cast(param, datatype)
 class WithLossCell(Cell):
    r"""
    Cell with loss function.
@ -82,7 +83,7 @@ class WithLossCell(Cell):
        - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
    Outputs:
-        Tensor, a scalar tensor with shape :math:`()`.
+        Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
    Raises:
        TypeError: If dtype of `data` or `label` is neither float16 nor float32.
@ -114,7 +115,7 @@ class WithLossCell(Cell):
    @property
    def backbone_network(self):
        """
-        Returns the backbone network.
+        Get the backbone network.
        Returns:
            Cell, the backbone network.
@ -298,7 +299,7 @@ class TrainOneStepCell(Cell):
        - **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
    Outputs:
-        Tensor, a scalar Tensor with shape :math:`()`.
+        Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
    Raises:
        TypeError: If `sens` is not a number.
@ -408,6 +409,12 @@ class GetNextSingleOp(Cell):
    For detailed information, refer to `ops.operations.GetNext`.
    Inputs:
        No inputs.
    Outputs:
        tuple[Tensor], the data get from Dataset.
    Supported Platforms:
        ``Ascend`` ``GPU``
@ -635,13 +642,19 @@ class WithEvalCell(Cell):
 class ParameterUpdate(Cell):
    """
-    Cell that updates parameters.
+    Cell that updates parameter.
    With this Cell, one can manually update `param` with the input `Tensor`.
    Args:
        param (Parameter): The parameter to be updated manually.
    Inputs:
        - **x** (Tensor) - A tensor whose shape and type are the same with `param`.
    Outputs:
        Tensor, the input `x`.
    Raises:
        KeyError: If parameter with the specified name does not exist.
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@ -72,11 +72,11 @@ class DynamicLossScaleUpdateCell(Cell):
        scale_window (int): Maximum continuous training steps that do not have overflow.
    Inputs:
-        - **inputs** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
+        - **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`.
-        - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
+        - **overflow** (bool) - Whether the overflow occurs or not.
    Outputs:
-        Tensor, a scalar Tensor with shape :math:`()`.
+        bool, the input `overflow`.
    Raises:
        TypeError: If dtype of `inputs` or `label` is neither float16 nor float32.
@ -165,6 +165,13 @@ class FixedLossScaleUpdateCell(Cell):
    Args:
        loss_scale_value (float): Initializes loss scale.
    Inputs:
        - **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`, that will be ignored.
        - **overflow** (bool) - Whether the overflow occurs or not.
    Outputs:
        bool, the input `overflow`.
    Supported Platforms:
        ``Ascend`` ``GPU``
@ -332,7 +339,11 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
    def set_sense_scale(self, sens):
        """
        If the user has set the sens in the training process and wants to reassign the value, he can call
-        this function again to make modification, and sens needs to be of type Tensor."""
+        this function again to make modification, and sens needs to be of type Tensor.
        Inputs:
            - **sens**(Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
        """
        if self.scale_sense and isinstance(sens, Tensor):
            self.scale_sense.set_data(sens)
        else:
@ -347,15 +358,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss
        function, and compute_input should be the input of gradients-computing function.
-        Args:
+        Inputs:
-            pre_cond(object): A precondition for starting overflow detection. It determines the executing order of
+            - **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
-                overflow state clearing and prior processions. It makes sure that the function 'start_overflow' clears
+                of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
-                status after finishing the process of precondition.
+                clears status after finishing the process of precondition.
-            compute_input(object): The input of subsequent process. Overflow detection should be performed on a certain
+            - **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a
-                computation. Set `compute_input` as the input of the computation, to ensure overflow status is cleared
+                certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
-                before executing the computation.
+                cleared before executing the computation.
-        Returns:
+        Outputs:
            Tuple[object, object], the first value is False for GPU backend, while it is a instance of
            NPUAllocFloatStatus for other backend. The status is used to detect overflow during overflow detection.
            The second value is the same as the input of `compute_input`, but contains some information about the
@ -377,12 +388,13 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        Get overflow results after executing the target process for overflow detection.
-        Args:
+        Inputs:
-            status(object): A status instance used to detect the overflow.
+            - **status** (object) - A status instance used to detect the overflow.
-            compute_output: Overflow detection should be performed on a certain computation. Set `compute_output` as
+            - **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output`
-                the output of the computation, to ensure overflow status is acquired before executing the computation.
+                as the output of the computation, to ensure overflow status is acquired before executing the
                computation.
-        Returns:
+        Outputs:
            bool, whether the overflow occurs or not.
        """
        if not self.gpu_target:
@ -409,10 +421,10 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
        """
        Calculate loss scale according to the overflow.
-        Args:
+        Inputs:
-            overflow(bool): Whether the overflow occurs or not.
+            - **overflow** (bool) - Whether the overflow occurs or not.
-        Returns:
+        Outputs:
            bool, overflow value.
        """
        if self.loss_scaling_manager is not None:
--- a/mindspore/train/dataset_helper.py
+++ b/mindspore/train/dataset_helper.py
@ -266,7 +266,7 @@ class DatasetHelper:
        self.iter.release()
    def continue_send(self):
-        """continue send data to device at the beginning of epoch."""
+        """Continue send data to device at the beginning of epoch."""
        self.iter.continue_send()
    def get_data_info(self):
--- a/mindspore/train/loss_scale_manager.py
+++ b/mindspore/train/loss_scale_manager.py
@ -67,23 +67,39 @@ class FixedLossScaleManager(LossScaleManager):
        self._drop_overflow_update = drop_overflow_update
    def get_loss_scale(self):
-        """Get loss scale value."""
+        """
        Get loss scale value.
        Returns:
            bool, `loss_scale` value.
        """
        return self._loss_scale
    def get_drop_overflow_update(self):
-        """Get the flag whether to drop optimizer update when there is an overflow."""
+        """
        Get the flag whether to drop optimizer update when there is an overflow.
        Returns:
            bool, `drop_overflow_update` value.
        """
        return self._drop_overflow_update
    def update_loss_scale(self, overflow):
        """
-        Update loss scale value.
+        Update loss scale value. The interface at `FixedLossScaleManager` will do nothing.
        Args:
            overflow (bool): Whether it overflows.
        """
    def get_update_cell(self):
-        "Returns the cell for `TrainOneStepWithLossScaleCell`"
+        """
        Returns the update cell for `TrainOneStepWithLossScaleCell`.
        Returns:
            None or Cell. Cell object, used to update `loss_scale`, when `drop_overflow_update` is True. None when
            `drop_overflow_update` is False.
        """
        if not self._drop_overflow_update:
            return None
        return nn.FixedLossScaleUpdateCell(self._loss_scale)
@ -127,7 +143,12 @@ class DynamicLossScaleManager(LossScaleManager):
        self.bad_step = 0
    def get_loss_scale(self):
-        """Get loss scale value."""
+        """
        Get loss scale value.
        Returns:
            bool, `loss_scale` value.
        """
        return self.loss_scale
    def update_loss_scale(self, overflow):
@ -152,9 +173,19 @@ class DynamicLossScaleManager(LossScaleManager):
        self.cur_iter += 1
    def get_drop_overflow_update(self):
-        """Get the flag whether to drop optimizer update when there is an overflow."""
+        """
        Get the flag whether to drop optimizer update when there is an overflow.
        Returns:
            bool, always return True at `DynamicLossScaleManager`.
        """
        return True
    def get_update_cell(self):
-        "Returns the cell for `TrainOneStepWithLossScaleCell`"
+        """
        Returns the update cell for `TrainOneStepWithLossScaleCell`.
        Returns:
            Cell, cell object used to update `loss_scale`.
        """
        return nn.DynamicLossScaleUpdateCell(self.loss_scale, self.scale_factor, self.scale_window)