optimized code docs about list of 7 issue items

2021-12-29 17:16:58 +08:00 · 2021-12-29 17:16:58 +08:00 · f5339512f2
parent 5e96dad4c9
commit f5339512f2
22 changed files with 136 additions and 106 deletions
--- a/docs/api/api_python/nn/mindspore.nn.ELU.rst
+++ b/docs/api/api_python/nn/mindspore.nn.ELU.rst
@ -3,7 +3,7 @@ mindspore.nn.ELU

 .. py:class:: mindspore.nn.ELU(alpha=1.0)

-    指数线性单元激活函数（Exponential Linear Uint activation function）。
+    指数线性单元激活函数（Exponential Linear Unit activation function）。

    对输入的每个元素计算ELU。该激活函数定义如下：

--- a/docs/api/api_python/ops/mindspore.ops.Elu.rst
+++ b/docs/api/api_python/ops/mindspore.ops.Elu.rst
@ -3,7 +3,7 @@ mindspore.ops.Elu

 .. py:class:: mindspore.ops.Elu(*args, **kwargs)

-    指数线性单元激活函数（Exponential Linear Uint activation function）。
+    指数线性单元激活函数（Exponential Linear Unit activation function）。

    对输入的每个元素计算Elu。该激活函数定义如下：

--- a/mindspore/python/mindspore/nn/layer/activation.py
+++ b/mindspore/python/mindspore/nn/layer/activation.py
@ -199,7 +199,7 @@ class LogSoftmax(Cell):

 class ELU(Cell):
    r"""
-    Exponential Linear Uint activation function.
+    Exponential Linear Unit activation function.

    Applies the exponential linear unit function element-wise.
    The activation function is defined as:
--- a/mindspore/python/mindspore/nn/layer/conv.py
+++ b/mindspore/python/mindspore/nn/layer/conv.py
@ -514,7 +514,7 @@ class Conv3d(_Conv):
        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers.
            Specifies the depth, height and width of the 3D convolution window.
            Single int means the value is for the depth, height and the width of the kernel.
-            A tuple of 3 ints means the first value is for the depth, second value is for height and the
+            A tuple of 3 ints means the first value is for the depth, the second value is for the height and the
            other is for the width of the kernel.
        stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
            the depth, height and width of movement are both strides, or a tuple of three int numbers that
--- a/mindspore/python/mindspore/nn/layer/image.py
+++ b/mindspore/python/mindspore/nn/layer/image.py
@ -314,7 +314,7 @@ class MSSSIM(Cell):
    Args:
        max_val (Union[int, float]): The dynamic range of the pixel values (255 for 8-bit grayscale images).
          Default: 1.0.
-        power_factors (Union[tuple, list]): Iterable of weights for each scal e.
+        power_factors (Union[tuple, list]): Iterable of weights for each scale.
          Default: (0.0448, 0.2856, 0.3001, 0.2363, 0.1333). Default values obtained by Wang et al.
        filter_size (int): The size of the Gaussian filter. Default: 11.
        filter_sigma (float): The standard deviation of Gaussian kernel. Default: 1.5.
--- a/mindspore/python/mindspore/nn/layer/normalization.py
+++ b/mindspore/python/mindspore/nn/layer/normalization.py
@ -771,7 +771,7 @@ class LayerNorm(Cell):
        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    Args:
-        normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis
+        normalized_shape (Union(tuple[int], list[int])): The normalization is performed over axis
            `begin_norm_axis ... R - 1`.
        begin_norm_axis (int): The first normalization dimension: normalization will be performed along dimensions
            `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1.
--- a/mindspore/python/mindspore/nn/layer/pooling.py
+++ b/mindspore/python/mindspore/nn/layer/pooling.py
@ -178,7 +178,7 @@ class MaxPool1d(_PoolNd):
        - **x** (Tensor) - Tensor of shape :math:`(N, C, L_{in})`.

    Outputs:
-        Tensor of shape :math:`(N, C, L_{out}))`.
+        Tensor of shape :math:`(N, C, L_{out})`.

    Raises:
        TypeError: If `kernel_size` or `strides` is not an int.
--- a/mindspore/python/mindspore/nn/layer/rnns.py
+++ b/mindspore/python/mindspore/nn/layer/rnns.py
@ -57,6 +57,7 @@ def _check_is_tensor(param_name, input_data, cls_name):
        raise TypeError(f"For '{cls_name}', the '{param_name}' should be '{mstype.tensor_type}', "
                        f"but got '{P.typeof(input_data)}'")

+
@constexpr
 def _check_is_tuple(param_name, input_data, cls_name):
    """Internal function, used to check whether the input data is Tensor."""
@ -64,6 +65,7 @@ def _check_is_tuple(param_name, input_data, cls_name):
        raise TypeError(f"For '{cls_name}', the '{param_name}' should be '{mstype.Tuple}', "
                        f"but got '{P.typeof(input_data)}'")

+
@constexpr
 def _check_tuple_length(param_name, input_data, length, cls_name):
    """Internal function, used to check whether the input data is Tensor."""
@ -71,16 +73,19 @@ def _check_tuple_length(param_name, input_data, length, cls_name):
        raise TypeError(f"For '{cls_name}', the length of '{param_name}' should be '{length}', "
                        f"but got '{len(input_data)}'")

+
 def sequence_mask(lengths, maxlen):
    """generate mask matrix by seq_length"""
    range_vector = arange(0, maxlen, 1, lengths.dtype)
    result = range_vector < lengths.view(lengths.shape + (1,))
    return result.astype(mstype.int32)

+
 def select_by_mask(inputs, mask):
    """mask hiddens by mask matrix"""
    return mask.view(mask.shape + (1,)).swapaxes(0, 1) \
-        .expand_as(inputs).astype(mstype.bool_)  * inputs
+               .expand_as(inputs).astype(mstype.bool_) * inputs
+

 def get_hidden(output, seq_length):
    """get hidden state by seq_length"""
@ -88,8 +93,10 @@ def get_hidden(output, seq_length):
    indices = P.Concat(1)((seq_length.view(-1, 1) - 1, batch_index.view(-1, 1)))
    return P.GatherNd()(output, indices)

+
 class _DynamicRNNBase(Cell):
    '''Dynamic RNN module to compute RNN cell by timesteps'''
+
    def __init__(self, mode):
        super().__init__()
        if mode == "RNN_RELU":
@ -112,7 +119,7 @@ class _DynamicRNNBase(Cell):
        t = 0
        h = h_0
        while t < time_step:
-            x_t = x[t:t+1:1]
+            x_t = x[t:t + 1:1]
            x_t = P.Squeeze(0)(x_t)
            h = self.cell(x_t, h, w_ih, w_hh, b_ih, b_hh)
            if self.is_lstm:
@ -142,7 +149,7 @@ class _DynamicRNNBase(Cell):
        state_t = h_t
        t = 0
        while t < time_step:
-            x_t = x[t:t+1:1]
+            x_t = x[t:t + 1:1]
            x_t = P.Squeeze(0)(x_t)
            h_t = self.cell(x_t, state_t, w_ih, w_hh, b_ih, b_hh)
            seq_cond = seq_length > t
@ -164,26 +171,34 @@ class _DynamicRNNBase(Cell):
            return self.recurrent(x, h, w_ih, w_hh, b_ih, b_hh)
        return self.variable_recurrent(x, h, seq_length, w_ih, w_hh, b_ih, b_hh)

+
 class _DynamicRNNRelu(_DynamicRNNBase):
    '''Dynamic RNN module with Relu activation'''
+
    def __init__(self):
        mode = 'RNN_RELU'
        super().__init__(mode)

+
 class _DynamicRNNTanh(_DynamicRNNBase):
    '''Dynamic RNN module with Tanh activation'''
+
    def __init__(self):
        mode = 'RNN_TANH'
        super().__init__(mode)

+
 class _DynamicGRUCPUGPU(_DynamicRNNBase):
    '''Dynamic GRU module on CPU and GPU'''
+
    def __init__(self):
        mode = 'GRU'
        super().__init__(mode)

+
 class _DynamicGRUAscend(Cell):
    '''Dynamic GRU module on Ascend'''
+
    def __init__(self):
        super().__init__()
        self.gru = P.DynamicGRUV2(gate_order='rzh')
@ -195,11 +210,11 @@ class _DynamicGRUAscend(Cell):
            b_ih = P.Zeros()(w_ih.shape[0], w_ih.dtype)
            b_hh = P.Zeros()(w_ih.shape[0], w_ih.dtype)
        outputs, _, _, _, _, _ = self.gru(self.cast(x, self.dtype), \
-                                         self.cast(self.transpose(w_ih, (1, 0)), self.dtype), \
-                                         self.cast(self.transpose(w_hh, (1, 0)), self.dtype), \
-                                         self.cast(b_ih, self.dtype), \
-                                         self.cast(b_hh, self.dtype), \
-                                         None, self.cast(h_0, self.dtype))
+                                          self.cast(self.transpose(w_ih, (1, 0)), self.dtype), \
+                                          self.cast(self.transpose(w_hh, (1, 0)), self.dtype), \
+                                          self.cast(b_ih, self.dtype), \
+                                          self.cast(b_hh, self.dtype), \
+                                          None, self.cast(h_0, self.dtype))
        if seq_length is not None:
            h = get_hidden(outputs, seq_length)
            mask = sequence_mask(seq_length, x.shape[0])
@ -208,8 +223,10 @@ class _DynamicGRUAscend(Cell):
            h = outputs[-1]
        return outputs, h

+
 class _DynamicLSTMCPUGPU(Cell):
    '''Dynamic LSTM module on CPU and GPU'''
+
    def __init__(self):
        super().__init__()
        self.concat = P.Concat()
@ -251,8 +268,10 @@ class _DynamicLSTMCPUGPU(Cell):
            )
        return output, (h_n, c_n)

+
 class _DynamicLSTMAscend(Cell):
    '''Dynamic LSTM module on Ascend'''
+
    def __init__(self):
        super().__init__()
        self.lstm = P.DynamicRNN()
@ -294,8 +313,10 @@ class _DynamicLSTMAscend(Cell):
            c = c[-1]
        return outputs, (h, c)

+
 class _RNNBase(Cell):
    '''Basic class for RNN operators'''
+
    def __init__(self, mode, input_size, hidden_size, num_layers=1, has_bias=True,
                 batch_first=False, dropout=0., bidirectional=False):
        super().__init__()
@ -482,7 +503,7 @@ class _RNNBase(Cell):
        num_directions = 2 if self.bidirectional else 1
        if hx is None:
            hx = _init_state((self.num_layers * num_directions, max_batch_size, self.hidden_size), \
-                              x.dtype, self.is_lstm)
+                             x.dtype, self.is_lstm)
        if self.batch_first:
            x = P.Transpose()(x, (1, 0, 2))
        if self.bidirectional:
@ -495,6 +516,7 @@ class _RNNBase(Cell):
            return x.astype(mstype.float32), h.astype(mstype.float32)
        return x.astype(mstype.float32), (h[0].astype(mstype.float32), h[1].astype(mstype.float32))

+
 class RNN(_RNNBase):
    r"""
    Stacked Elman RNN layers.
@ -527,11 +549,12 @@ class RNN(_RNNBase):
        - **x** (Tensor) - Tensor of data type mindspore.float32 and
          shape (seq_len, batch_size, `input_size`) or (batch_size, seq_len, `input_size`).
        - **hx** (Tensor) - Tensor of data type mindspore.float32 and
-          shape (num_directions * `num_layers`, batch_size, `hidden_size`). Data type of `hx` must be the same as `x`.
-        - **seq_length** (Tensor) - The length of each sequence in a input batch.
+          shape (num_directions * `num_layers`, batch_size, `hidden_size`). The data type of `hx` must be the same as
+          `x`.
+        - **seq_length** (Tensor) - The length of each sequence in an input batch.
          Tensor of shape :math:`(\text{batch_size})`. Default: None.
          This input indicates the real sequence length before padding to avoid padded elements
-          have been used to compute hidden state and affect the final output. It is recommend to
+          have been used to compute hidden state and affect the final output. It is recommended to
          use this input when **x** has padding elements.

    Outputs:
@ -559,6 +582,7 @@ class RNN(_RNNBase):
        >>> print(output.shape)
        (3, 5, 16)
    """
+
    def __init__(self, *args, **kwargs):
        if 'nonlinearity' in kwargs:
            if kwargs['nonlinearity'] == 'tanh':
@ -574,6 +598,7 @@ class RNN(_RNNBase):

        super(RNN, self).__init__(mode, *args, **kwargs)

+
 class GRU(_RNNBase):
    r"""
    Stacked GRU (Gated Recurrent Unit) layers.
@ -622,10 +647,10 @@ class GRU(_RNNBase):
        - **hx** (Tensor) - Tensor of data type mindspore.float32 and
          shape (num_directions * `num_layers`, batch_size, `hidden_size`). The data type of `hx` must be the same as
          `x`.
-        - **seq_length** (Tensor) - The length of each sequence in a input batch.
+        - **seq_length** (Tensor) - The length of each sequence in an input batch.
          Tensor of shape :math:`(\text{batch_size})`. Default: None.
          This input indicates the real sequence length before padding to avoid padded elements
-          have been used to compute hidden state and affect the final output. It is recommend to
+          have been used to compute hidden state and affect the final output. It is recommended to
          use this input when **x** has padding elements.

    Outputs:
@ -652,10 +677,12 @@ class GRU(_RNNBase):
        >>> print(output.shape)
        (3, 5, 16)
    """
+
    def __init__(self, *args, **kwargs):
        mode = 'GRU'
        super(GRU, self).__init__(mode, *args, **kwargs)

+
 class LSTM(_RNNBase):
    r"""
    Stacked LSTM (Long Short-Term Memory) layers.
@ -708,11 +735,11 @@ class LSTM(_RNNBase):
          shape (seq_len, batch_size, `input_size`) or (batch_size, seq_len, `input_size`).
        - **hx** (tuple) - A tuple of two Tensors (h_0, c_0) both of data type mindspore.float32
          and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-          Data type of `hx` must be the same as `x`.
-        - **seq_length** (Tensor) - The length of each sequence in a input batch.
+          The data type of `hx` must be the same as `x`.
+        - **seq_length** (Tensor) - The length of each sequence in an input batch.
          Tensor of shape :math:`(\text{batch_size})`. Default: None.
          This input indicates the real sequence length before padding to avoid padded elements
-          have been used to compute hidden state and affect the final output. It is recommend to
+          have been used to compute hidden state and affect the final output. It is recommended to
          use this input when **x** has padding elements.

    Outputs:
@ -740,6 +767,7 @@ class LSTM(_RNNBase):
        >>> print(output.shape)
        (3, 5, 16)
    """
+
    def __init__(self, *args, **kwargs):
        mode = 'LSTM'
        super(LSTM, self).__init__(mode, *args, **kwargs)
--- a/mindspore/python/mindspore/nn/loss/loss.py
+++ b/mindspore/python/mindspore/nn/loss/loss.py
@ -111,7 +111,7 @@ class LossBase(Cell):
                additional dimensions.
            weights (Union[float, Tensor]): Optional `Tensor` whose rank is either 0, or the same rank as inputs,
                and must be broadcastable to inputs (i.e., all dimensions must be either `1`,
-                or the same as the corresponding inputs dimension).
+                or the same as the corresponding inputs dimension). Default: 1.0.

        Examples:
            >>> class Net(nn.LossBase):
@ -1222,8 +1222,8 @@ class BCEWithLogitsLoss(LossBase):
            If not None, it can be broadcast to a tensor with shape of `logits`,
            data type must be float16 or float32. Default: None.
        pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
-            number of classes. If not None, it must can be broadcast to a tensor with shape of `logits`,
-            data type must be float16 or float32. Default: None.
+            number of classes. If not None, it must be broadcast to a tensor with shape of `logits`, data type
+            must be float16 or float32. Default: None.

    Inputs:
        - **logits** (Tensor) - Input logits with shape :math:`(N, *)` where :math:`*` means, any number
--- a/mindspore/python/mindspore/ops/composite/math_ops.py
+++ b/mindspore/python/mindspore/ops/composite/math_ops.py
@ -862,16 +862,17 @@ def cummin(x, axis):

    It returns the cumulative minimum of elements and the index.

-    ..math::
-
-        y{i} = min(x{1}, x{2}, ... , x{i})
+    .. math::
+        \begin{array}{ll} \\
+            y{i} = min(x{1}, x{2}, ... , x{i})
+        \end{array}

    Args:
        x (Tensor): The input tensor, rank of `input_x` > 0.
        axis (Int): The dimension to do the operation, The axis is in the range from -len(`input_x`.shape)
          to len(`input_x`.shape) - 1. When it's in the range from 0 to len(`input_x`.shape) - 1, it means starting
          from the first dimension and counting forwards, When it's less than 0, it means we're counting backwards
-          from the last dimension. for example, -1 means the last dimension.
+          from the last dimension. For example, -1 means the last dimension.

    Outputs:
        - **output** (Tensor) - The output tensor of the cumulative minimum of elements.
--- a/mindspore/python/mindspore/ops/composite/random_ops.py
+++ b/mindspore/python/mindspore/ops/composite/random_ops.py
@ -335,12 +335,12 @@ def multinomial(inputs, num_sample, replacement=True, seed=None):
        but must be non-negative, finite and have a non-zero sum.

    Args:
-        x (Tensor): The input tensor containing probabilities, must be 1 or 2 dimensions, with
+        inputs (Tensor): The input tensor containing probabilities, must be 1 or 2 dimensions, with
          float32 data type.
        num_sample (int): Number of samples to draw.
        replacement (bool, optional): Whether to draw with replacement or not, default True.
        seed (int, optional): Seed is used as entropy source for the random number engines to generate
-          pseudo-random numbers, must be non-negative. Default: 0.
+          pseudo-random numbers, must be non-negative. Default: None.

    Outputs:
        Tensor, has the same rows with input. The number of sampled indices of each row is `num_samples`.
@ -349,7 +349,7 @@ def multinomial(inputs, num_sample, replacement=True, seed=None):
    Raises:
        TypeError: If `x` is not a Tensor whose dtype is not float32.
        TypeError: If `num_sample` is not an int.
-        TypeError: If `seed` is neither an int nor a optional.
+        TypeError: If `seed` is neither an int nor an optional.

    Supported Platforms:
        ``GPU``
--- a/mindspore/python/mindspore/ops/op_info_register.py
+++ b/mindspore/python/mindspore/ops/op_info_register.py
@ -602,7 +602,7 @@ class TBERegOp(RegOp):
        The behavior type of operator, such as broadcast, reduce and so on.

        Args:
-            pattern (str): Value of op pattern.
+            pattern (str): Value of op pattern. Default: None.
        """
        if pattern is not None and self._is_string(pattern):
            self.op_pattern_ = pattern
--- a/mindspore/python/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/python/mindspore/ops/operations/_inner_ops.py
@ -1438,15 +1438,16 @@ class Cummin(Primitive):

    It returns the cumulative minimum of elements and the index.

-    ..math::
-
-        y{i} = min(x{1}, x{2}, ... , x{i})
+    .. math::
+        \begin{array}{ll} \\
+            y{i} = min(x{1}, x{2}, ... , x{i})
+        \end{array}

    Args:
        - **axis** (int) - The dimension to do the operation, The axis is in the range from -len(`input_x`.shape)
          to len(`input_x`.shape) - 1. When it's in the range from 0 to len(`input_x`.shape) - 1, it means starting
          from the first dimension and counting forwards, When it's less than 0, it means we're counting backwards
-          from the last dimension. for example, -1 means the last dimension.
+          from the last dimension. For example, -1 means the last dimension.
    Inputs:
        - **input_x** (Tensor) - The input tensor, rank of `input_x` > 0.

--- a/mindspore/python/mindspore/ops/operations/array_ops.py
+++ b/mindspore/python/mindspore/ops/operations/array_ops.py
@ -1615,7 +1615,7 @@ class InvertPermutation(PrimitiveWithInfer):
        values can not be negative.

    Inputs:
-        - **input_x** (Union(tuple[int], list[int]) - The input is constructed by multiple
+        - **input_x** (Union(tuple[int], list[int])) - The input is constructed by multiple
          integers, i.e., :math:`(y_1, y_2, ..., y_S)` representing the indices.
          The values must include 0. There can be no duplicate values or negative values.
          Only constant value is allowed. The maximum value must be equal to length of input_x.
@ -4081,7 +4081,7 @@ class ScatterUpdate(_ScatterOpDynamic):
        \text{input_x}[\text{indices}[i, ..., j], :] = \text{updates}[i, ..., j, :]

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4144,7 +4144,7 @@ class ScatterNdUpdate(Primitive):
    :math:`(i_0, i_1, ..., i_{Q-2}, x\_shape_N, ..., x\_shape_{P-1})`.

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4209,7 +4209,7 @@ class ScatterMax(_ScatterOp):
        = max(\text{input_x}[\text{indices}[i, ..., j], :], \text{updates}[i, ..., j, :])

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4263,7 +4263,7 @@ class ScatterMin(_ScatterOp):
        = min(\text{input_x}[\text{indices}[i, ..., j], :], \text{updates}[i, ..., j, :])

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4316,7 +4316,7 @@ class ScatterAdd(_ScatterOpDynamic):
        \text{input_x}[\text{indices}[i, ..., j], :] \mathrel{+}= \text{updates}[i, ..., j, :]

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Note:
@ -4429,7 +4429,7 @@ class ScatterSub(_ScatterOpDynamic):
        \text{input_x}[\text{indices}[i, ..., j], :] \mathrel{-}= \text{updates}[i, ..., j, :]

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4539,7 +4539,7 @@ class ScatterMul(_ScatterOp):
        \text{input_x}[\text{indices}[i, ..., j], :] \mathrel{*}= \text{updates}[i, ..., j, :]

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4642,7 +4642,7 @@ class ScatterDiv(_ScatterOp):
        \text{input_x}[\text{indices}[i, ..., j], :] \mathrel{/}= \text{updates}[i, ..., j, :]

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4751,7 +4751,7 @@ class ScatterNdAdd(Primitive):
    :math:`(i_0, i_1, ..., i_{Q-2}, x\_shape_N, ..., x\_shape_{P-1})`.

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4842,7 +4842,7 @@ class ScatterNdSub(_ScatterNdOp):
    :math:`(i_0, i_1, ..., i_{Q-2}, x\_shape_N, ..., x\_shape_{P-1})`.

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    relatively highest priority data type.

    Args:
@ -4911,7 +4911,7 @@ class ScatterNonAliasingAdd(Primitive):
    This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.

    Inputs of `input_x` and `updates` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
--- a/mindspore/python/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/python/mindspore/ops/operations/debug_ops.py
@ -336,10 +336,10 @@ class HookBackward(PrimitiveWithInfer):
    Args:
        hook_fn (Function): Python function. hook function.
        cell_id (str): Used to identify whether the function registered by the hook is actually registered on
-                       the specified Cell. where the Cell is an object. For example, 'nn.Add' is a Cell object.
+                       the specified Cell. Where the Cell is an object. For example, 'nn.Add' is a Cell object.
                       The default value of cell_id is ", in this case, the system will automatically register
                       when registering. Add a value of cell_id, the value of cell_id currently does not support
-                       custom values。
+                       custom values.

    Inputs:
        - **inputs** (Tensor) - The variable to hook.
--- a/mindspore/python/mindspore/ops/operations/image_ops.py
+++ b/mindspore/python/mindspore/ops/operations/image_ops.py
@ -32,7 +32,7 @@ class CropAndResize(PrimitiveWithInfer):
        method (str): An optional string that specifies the sampling method for resizing.
            It can be "bilinear", "nearest" or "bilinear_v2". The option "bilinear" stands for standard bilinear
            interpolation algorithm, while "bilinear_v2" may result in better result in some cases. Default: "bilinear"
-        extrapolation_value (float): An optional float value used extrapolation, if applicable. Default: 0.
+        extrapolation_value (float): An optional float value used extrapolation, if applicable. Default: 0.0.

    Inputs:
        - **x** (Tensor) - The input image must be a 4-D tensor of shape [batch, image_height, image_width, depth].
--- a/mindspore/python/mindspore/ops/operations/inner_ops.py
+++ b/mindspore/python/mindspore/ops/operations/inner_ops.py
@ -397,7 +397,7 @@ class FusedWeightScaleApplyMomentum(PrimitiveWithInfer):

    Inputs of `variable`, `accumulation` and `gradient` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    relatively highest priority data type.
    Data type conversion of Parameter is not supported. RuntimeError exception will be thrown.

--- a/mindspore/python/mindspore/ops/operations/math_ops.py
+++ b/mindspore/python/mindspore/ops/operations/math_ops.py
@ -361,7 +361,7 @@ class AssignAdd(Primitive):
    Updates a `Parameter` by adding a value to it.

    Inputs of `variable` and `value` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.
    If `value` is a number, the number is automatically converted to Tensor,
    and the data type is consistent with the Tensor data type involved in the operation.
@ -424,7 +424,7 @@ class AssignSub(Primitive):
    Updates a `Parameter` by subtracting a value from it.

    Inputs of `variable` and `value` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.
    If `value` is a number, the number is automatically converted to Tensor,
    and the data type is consistent with the Tensor data type involved in the operation.
@ -586,7 +586,7 @@ class _Reduce(PrimitiveWithInfer):

 class ReduceMean(_Reduce):
    """
-    Reduces a dimension of a tensor by averaging all elements in the dimension, by Default. And also can reduces
+    Reduces a dimension of a tensor by averaging all elements in the dimension, by default. And also can reduce
    a dimension of `x` along the axis. Determine whether the dimensions of the output and input are the same by
    controlling `keep_dims`.

@ -663,8 +663,8 @@ class ReduceMean(_Reduce):

 class ReduceSum(_Reduce):
    """
-    Reduces a dimension of a tensor by summing all elements in the dimension, by Default. And also can reduces
-    a dimension of `x` along the axis. Determine whether the dimensions of the output and input are the same by
+    Reduces a dimension of a tensor by summing all elements in the dimension, by default. And also can reduce a
+    dimension of `x` along the axis. Determine whether the dimensions of the output and input are the same by
    controlling `keep_dims`.

    Args:
@ -1191,7 +1191,7 @@ class Cdist(Primitive):
    Computes batched the p norm distance between each pair of the two collections of row vectors.

    Args:
-        p (float): P value for the p norm distance to calculate between each vector pair ∈[0,∞].
+        p (float): P value for the p norm distance to calculate between each vector pair ∈[0,∞]. Default: 2.0.

    Inputs:
        - **input_x** (Tensor) - Input tensor of shape :math:`(B, P, M)`.
@ -1241,15 +1241,15 @@ class LpNorm(Primitive):

    Args:
        axis(int,list,tuple): Specifies which dimension or dimensions of input to calculate the norm across.
-        p(int): The order of norm.
-        keep_dims(bool): Whether the output tensors have dim retained or not.
+        p(int): The order of norm. Default: 2.
+        keep_dims(bool): Whether the output tensors have dim retained or not. Default: False.
        epsilon(float): A value added to the denominator for numerical stability. Default: 1e-12.

    Inputs:
        - **input** (Tensor) - Input tensor.

    Outputs:
-        Tensor, has the same dtype as `input`, which shape is depend on the args axis.For example, if the size of input
+        Tensor, has the same dtype as `input`, which shape depends on the args axis.For example, if the size of input
        is (2, 3, 4), axis is [0, 1], Outputs' shape will be (4,).

    Raises:
@ -2302,7 +2302,7 @@ class HistogramFixedWidth(PrimitiveWithInfer):

    Inputs:
        - **x** (Tensor) - Numeric Tensor. Must be one of the following types: int32, float32, float16.
-        - **range** (Tensor) - Must has the same data type as `x`, and the shape is [2].
+        - **range** (Tensor) - Must have the same data type as `x`, and the shape is [2].
          x <= range[0] will be mapped to hist[0], x >= range[1] will be mapped to hist[-1].

    Outputs:
@ -3482,7 +3482,7 @@ class ApproximateEqual(_LogicBinaryOp):
    where :math:`\text{tolerance}` indicates Acceptable maximum tolerance.

    Inputs of `x` and `y` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4761,7 +4761,7 @@ class Atan2(_MathBinaryOp):
    such that :math:`x = r*\sin(\theta), y = r*\cos(\theta)`, where :math:`r = \sqrt{x^2 + y^2}`.

    Inputs of `x` and `y` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -4856,7 +4856,7 @@ class BitwiseAnd(_BitwiseBinaryOp):

    Inputs of `x` and `y` comply with the implicit type conversion rules to
    make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -4895,7 +4895,7 @@ class BitwiseOr(_BitwiseBinaryOp):

    Inputs of `x` and `y` comply with the implicit type conversion rules to
    make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -4933,7 +4933,7 @@ class BitwiseXor(_BitwiseBinaryOp):

    Inputs of `x` and `y` comply with the implicit type conversion rules to
    make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
--- a/mindspore/python/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/python/mindspore/ops/operations/nn_ops.py
@ -296,7 +296,7 @@ class Softmax(Primitive):
    Softmax operation.

    Applies the Softmax operation to the input tensor on the specified axis.
-    Supposes a slice in the given aixs :math:`x`, then for each element :math:`x_i`,
+    Supposes a slice in the given axis :math:`x`, then for each element :math:`x_i`,
    the Softmax function is shown as follows:

    .. math::
@ -347,7 +347,7 @@ class LogSoftmax(Primitive):
    Log Softmax activation function.

    Applies the Log Softmax function to the input tensor on the specified axis.
-    Supposes a slice in the given aixs, :math:`x` for each element :math:`x_i`,
+    Supposes a slice in the given axis, :math:`x` for each element :math:`x_i`,
    the Log Softmax function is shown as follows:

    .. math::
@ -2540,7 +2540,7 @@ class ApplyMomentum(Primitive):

    Inputs of `variable`, `accumulation` and `gradient` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Refer to :class:`mindspore.nn.Momentum` for more details about the formula and usage.
@ -3642,7 +3642,7 @@ class PReLU(PrimitiveWithInfer):
    Raises:
        TypeError: If dtype of `x` or `weight` is neither float16 nor float32.
        TypeError: If the `x` or the `weight` is not a Tensor.
-        ValueError: If the `x` is a 0-D or 1-D Tensor on Ascned.
+        ValueError: If the `x` is a 0-D or 1-D Tensor on Ascend.
        ValueError: If the `weight` is not a 1-D Tensor.

    Supported Platforms:
@ -4646,7 +4646,7 @@ class FusedSparseAdam(PrimitiveWithInfer):
    `epsilon`.

    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4796,7 +4796,7 @@ class FusedSparseLazyAdam(PrimitiveWithInfer):
    `epsilon`.

    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -4928,7 +4928,7 @@ class FusedSparseFtrl(PrimitiveWithInfer):
    Merges the duplicate value of the gradient and then updates relevant entries according to the FTRL-proximal scheme.

    All inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5045,7 +5045,7 @@ class FusedSparseProximalAdagrad(PrimitiveWithInfer):
        \end{array}

    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5333,7 +5333,7 @@ class ApplyAdaMax(Primitive):

    Inputs of `var`, `m`, `v` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -5440,7 +5440,7 @@ class ApplyAdadelta(Primitive):

    Inputs of `var`, `accum`, `accum_update` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -5534,7 +5534,7 @@ class ApplyAdagrad(Primitive):

    Inputs of `var`, `accum` and `grad`  comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5613,7 +5613,7 @@ class ApplyAdagradV2(Primitive):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Note:
@ -5698,7 +5698,7 @@ class SparseApplyAdagrad(PrimitiveWithInfer):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5799,7 +5799,7 @@ class SparseApplyAdagradV2(PrimitiveWithInfer):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5901,7 +5901,7 @@ class ApplyProximalAdagrad(Primitive):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -5911,7 +5911,7 @@ class ApplyProximalAdagrad(Primitive):
    Inputs:
        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
          The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated. Must has the same shape and dtype as `var`.
+        - **accum** (Parameter) - Accumulation to be updated. Must have the same shape and dtype as `var`.
        - **lr** (Union[Number, Tensor]) - The learning rate value, must be scalar. The data type must be
          float16 or float32.
        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar. The data type must be
@ -5994,7 +5994,7 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -6010,7 +6010,7 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
          a scalar tensor with float16 or float32 data type.
        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
-          a scalar tensor with float16 or float32 data type..
+          a scalar tensor with float16 or float32 data type.
        - **grad** (Tensor) - A tensor of the same type as `var` and
          grad.shape[1:] = var.shape[1:] if var.shape > 1.
        - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
@ -6109,7 +6109,7 @@ class ApplyAddSign(PrimitiveWithInfer):

    Inputs of `var`, `accum` and `grad`  comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -6200,7 +6200,7 @@ class ApplyPowerSign(PrimitiveWithInfer):
    All of inputs comply with the implicit type conversion rules to make the data types consistent.
    If `lr`, `logbase`, `sign_decay` or `beta` is a number, the number is automatically converted to Tensor,
    and the data type is consistent with the Tensor data type involved in the operation.
-    If inputs are tensors and have different data types, lower priority data type will be converted to
+    If inputs are tensors and have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -6320,7 +6320,7 @@ class ApplyGradientDescent(Primitive):
    where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`.

    Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -6385,7 +6385,7 @@ class ApplyProximalGradientDescent(PrimitiveWithInfer):
    where :math:`\alpha` represents `alpha`, :math:`\delta` represents `delta`.

    Inputs of `var` and `delta` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -6616,7 +6616,7 @@ class SparseApplyFtrl(PrimitiveWithCheck):
    For more details, please refer to :class:`mindspore.nn.FTRL`.

    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -6721,7 +6721,7 @@ class SparseApplyFtrlV2(PrimitiveWithInfer):
    l2_shrinkage, than class SparseApplyFtrl.

    All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.


@ -8202,7 +8202,7 @@ class SparseApplyAdadelta(Primitive):

    Inputs of 'var', 'accum', 'accum_update' and 'grad' comply with the implicit type conversion rules
    to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    relatively highest priority data type.
    RuntimeError exception will be thrown when the data type conversion of Parameter is required.

@ -8416,7 +8416,7 @@ class Conv3DTranspose(PrimitiveWithInfer):
        kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers.
            Specifies the depth, height and width of the 3D convolution window.
            Single int means the value is for the depth, height and the width of the kernel.
-            A tuple of 3 ints means the first value is for the depth, second value is for height and the
+            A tuple of 3 ints means the first value is for the depth, the second value is for the height and the
            other is for the width of the kernel.
        mode (int): Modes for different convolutions. Default is 1. It is currently not used.
        pad_mode (str): Specifies padding mode. The optional values are
@ -8734,7 +8734,7 @@ class ApplyAdagradDA(Primitive):

    Inputs of `var`, `gradient_accumulator`, `gradient_squared_accumulator` and `grad`
    comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -8845,7 +8845,7 @@ class SparseApplyRMSProp(Primitive):

    Inputs of `var`, `ms`, `mom` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Args:
@ -8964,7 +8964,7 @@ class ApplyKerasMomentum(Primitive):

    Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
    to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    relatively highest priority data type.
    RuntimeError exception will be thrown when the data type conversion of Parameter is required.

--- a/mindspore/python/mindspore/ops/operations/other_ops.py
+++ b/mindspore/python/mindspore/ops/operations/other_ops.py
@ -29,7 +29,7 @@ class Assign(Primitive):
    Assigns `Parameter` with a value.

    Inputs of `variable` and `value` comply with the implicit type conversion rules to make the data types consistent.
-    If they have different data types, lower priority data type will be converted to
+    If they have different data types, the lower priority data type will be converted to
    the relatively highest priority data type.

    Inputs:
@ -486,7 +486,7 @@ class CheckBprop(PrimitiveWithInfer):
        - **input_y** (tuple[Tensor]) - The `input_y` contains the inputs of bprop to check against.

    Outputs:
-        (tuple[Tensor]), the `input_x`,
+        Tuple[Tensor], the `input_x`,
        if data type and shape of corresponding elements from `input_x` and `input_y` are the same.

    Raises:
--- a/mindspore/python/mindspore/ops/primitive.py
+++ b/mindspore/python/mindspore/ops/primitive.py
@ -161,9 +161,9 @@ class Primitive(Primitive_):
            In other parallel modes, strategies set here will be ignored.

        Args:
-            in_strategy (tuple): Describe the split strategy of operator input.
-            out_strategy (tuple): Describe the split strategy of operator output,
-                                  it is only for certain operators, such as MatMul.
+            in_strategy (tuple): Describe the split strategy of operator input. Default: None.
+            out_strategy (tuple): Describe the split strategy of operator output, it is only for certain operators,
+                                  such as MatMul. Default: None.
        Examples:
            >>> from mindspore import ops
            >>> add = ops.Add()
--- a/tests/st/model_zoo_tests/DeepFM/src/deepfm.py
+++ b/tests/st/model_zoo_tests/DeepFM/src/deepfm.py
@ -120,8 +120,8 @@ class DenseLayer(nn.Cell):
    Dense Layer for Deep Layer of DeepFM Model;
    Containing: activation, matmul, bias_add;
    Args:
-        input_dim (int): the shape of weight at 0-aixs;
-        output_dim (int): the shape of weight at 1-aixs, and shape of bias
+        input_dim (int): the shape of weight at 0-axis;
+        output_dim (int): the shape of weight at 1-axis, and shape of bias
        weight_bias_init (list): weight and bias init method, "random", "uniform", "one", "zero", "normal";
        act_str (str): activation function method, "relu", "sigmoid", "tanh";
        keep_prob (float): Dropout Layer keep_prob_rate;