!9973 update the doc string of some operations.

From: @wangshuide2020 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
2020-12-16 09:32:29 +08:00 · 2020-12-16 09:32:29 +08:00 · 9eb6fb01bc
parent a1edf7b03f abf0d30537
commit 9eb6fb01bc
16 changed files with 50 additions and 49 deletions
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@ -34,7 +34,7 @@ abs_ = P.Abs()
 def mean(x, axis=(), keep_dims=False):
    """
-    Reduce a dimension of a tensor by averaging all elements in the dimension.
+    Reduces a dimension of a tensor by averaging all elements in the dimension.
    Args:
        axis (Union[None, int, tuple(int)]): Dimensions of reduction,
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@ -338,7 +338,7 @@ class Tensor(Tensor_):
    def mean(self, axis=(), keep_dims=False):
        """
-        Reduce a dimension of a tensor by averaging all elements in the dimension.
+        Reduces a dimension of a tensor by averaging all elements in the dimension.
        Args:
            axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
--- a/mindspore/explainer/_operators.py
+++ b/mindspore/explainer/_operators.py
@ -131,21 +131,21 @@ def matmul(inputs_x: Tensor, inputs_y: Tensor) -> Tensor:
 def maximum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by the maximum value in this dimension."""
+    """Reduces a dimension of a tensor by the maximum value in this dimension."""
    max_op = op.ReduceMax(keep_dims)
    outputs = max_op(inputs, axis)
    return outputs
 def minimum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by the minimum value in the dimension."""
+    """Reduces a dimension of a tensor by the minimum value in the dimension."""
    max_op = op.ReduceMin(keep_dims)
    outputs = max_op(inputs, axis)
    return outputs
 def mean(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by averaging all elements in the dimension."""
+    """Reduces a dimension of a tensor by averaging all elements in the dimension."""
    mean_op = op.ReduceMean(keep_dims)
    outputs = mean_op(inputs, axis)
    return outputs
@ -243,7 +243,7 @@ def softmax(axis: int = -1) -> Callable:
 def summation(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor:
-    """Reduce a dimension of a tensor by summing all elements in the dimension."""
+    """Reduces a dimension of a tensor by summing all elements in the dimension."""
    sum_op = op.ReduceSum(keep_dims)
    outputs = sum_op(inputs, axis)
    return outputs
--- a/mindspore/nn/init.py
+++ b/mindspore/nn/init.py
@ -15,7 +15,7 @@
 """
 Neural Networks Cells.
-Pre-defined building blocks or computing units to construct Neural Networks.
+Pre-defined building blocks or computing units to construct neural networks.
 """
 from . import layer, loss, optim, metrics, wrap, probability, sparse, dynamic_lr
 from .learning_rate_schedule import *
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@ -913,7 +913,7 @@ class MatrixDiagPart(Cell):
 class MatrixSetDiag(Cell):
    r"""
-    Modify the batched diagonal part of a batched tensor.
+    Modifies the batched diagonal part of a batched tensor.
    Inputs:
        - **x** (Tensor) - The batched tensor. Rank k+1, where k >= 1. It can be one of the following data types:
--- a/mindspore/nn/layer/combined.py
+++ b/mindspore/nn/layer/combined.py
@ -30,9 +30,9 @@ __all__ = [
 class Conv2dBnAct(Cell):
    r"""
-    A combination of convolution, Batchnorm, activation layer.
+    A combination of convolution, Batchnorm, and activation layer.
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@ -186,7 +186,7 @@ def _compute_multi_channel_loss(c1, c2, img1, img2, conv, concat, mean):
 class SSIM(Cell):
    r"""
-    Returns SSIM index between img1 and img2.
+    Returns SSIM index between two images.
    Its implementation is based on Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). `Image quality
    assessment: from error visibility to structural similarity <https://ieeexplore.ieee.org/document/1284395>`_.
@ -266,7 +266,7 @@ def _downsample(img1, img2, op):
 class MSSSIM(Cell):
    r"""
-    Returns MS-SSIM index between img1 and img2.
+    Returns MS-SSIM index between two images.
    Its implementation is based on Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. `Multiscale structural similarity
    for image quality assessment <https://ieeexplore.ieee.org/document/1292216>`_.
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@ -43,7 +43,7 @@ def _check_input_dtype(input_dtype, param_name, allow_dtypes, cls_name):
 class LSTM(Cell):
    r"""
-    LSTM (Long Short-Term Memory) layer.
+    Stacked LSTM (Long Short-Term Memory) layers.
    Apply LSTM layer to the input.
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@ -45,7 +45,7 @@ def _check_input_dtype(param_name, input_dtype, allow_dtypes, cls_name):
 class ReduceLogSumExp(Cell):
    r"""
-    Reduce a dimension of a tensor by calculating exponential for all elements in the dimension,
+    Reduces a dimension of a tensor by calculating exponential for all elements in the dimension,
    then calculate logarithm of the sum.
    The dtype of the tensor to be reduced is number.
@ -158,7 +158,7 @@ class Range(Cell):
 class LGamma(Cell):
    r"""
-    Calculate LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
+    Calculates LGamma using Lanczos' approximation refering to "A Precision Approximationof the Gamma Function".
    The algorithm is:
    .. math::
@ -886,7 +886,7 @@ class MatMul(Cell):
 class Moments(Cell):
    """
-    Calculate the mean and variance of `x`.
+    Calculates the mean and variance of `x`.
    Args:
        axis (Union[int, tuple(int)]): Calculates the mean and variance along the specified axis. Default: ().
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@ -62,7 +62,7 @@ def _shape_check(in_shape):
 class MaxPool2d(_PoolNd):
    r"""
-    Max pooling operation for temporal data.
+    2D max pooling operation for temporal data.
    Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes.
@ -139,7 +139,7 @@ class MaxPool2d(_PoolNd):
 class MaxPool1d(_PoolNd):
    r"""
-    Max pooling operation for temporal data.
+    1D max pooling operation for temporal data.
    Applies a 1D max pooling over an input Tensor which can be regarded as a composition of 1D planes.
@ -220,7 +220,7 @@ class MaxPool1d(_PoolNd):
 class AvgPool2d(_PoolNd):
    r"""
-    Average pooling for temporal data.
+    2D average pooling for temporal data.
    Applies a 2D average pooling over an input Tensor which can be regarded as a composition of 2D input planes.
@ -294,7 +294,7 @@ class AvgPool2d(_PoolNd):
 class AvgPool1d(_PoolNd):
    r"""
-    Average pooling for temporal data.
+    1D average pooling for temporal data.
    Applies a 1D average pooling over an input Tensor which can be regarded as a composition of 1D input planes.
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@ -210,7 +210,7 @@ class UniformQuantObserver(_Observer):
 class FakeQuantWithMinMaxObserver(UniformQuantObserver):
    r"""
-    Quantization aware op. This OP provides the fake quantization observer function on data with min and max.
+    Quantization aware operation which provides the fake quantization observer function on data with min and max.
    Args:
        min_init (int, float): The initialized min value. Default: -6.
@ -273,7 +273,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
        self.narrow_range = narrow_range
        self.is_ascend = context.get_context('device_target') == "Ascend"
-        # init tensor min and max for fake quant op
+        # init tensor min and max for fake quantized operation
        if self.per_channel:
            min_array = np.array([self.min_init] * self.num_channels).astype(np.float32)
            max_array = np.array([self.max_init] * self.num_channels).astype(np.float32)
@ -335,9 +335,9 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio
 class Conv2dBnFoldQuantOneConv(Cell):
    r"""
-    2D convolution with BatchNormal op folded construct.
+    2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@ -546,9 +546,9 @@ class Conv2dBnFoldQuantOneConv(Cell):
 class Conv2dBnFoldQuant(Cell):
    r"""
-    2D convolution with BatchNormal op folded construct.
+    2D convolution with BatchNormal operation folded construct.
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@ -730,9 +730,9 @@ class Conv2dBnFoldQuant(Cell):
 class Conv2dBnWithoutFoldQuant(Cell):
    r"""
-    2D convolution + batchnorm without fold with fake quant construct.
+    2D convolution and batchnorm without fold with fake quantized construct.
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@ -844,9 +844,9 @@ class Conv2dBnWithoutFoldQuant(Cell):
 class Conv2dQuant(Cell):
    r"""
-    2D convolution with fake quant op layer.
+    2D convolution with fake quantized operation layer.
-    This part is a more detailed overview of Conv2d op.
+    This part is a more detailed overview of Conv2d operation.
    Args:
        in_channels (int): The number of input channel :math:`C_{in}`.
@ -953,9 +953,9 @@ class Conv2dQuant(Cell):
 class DenseQuant(Cell):
    r"""
-    The fully connected layer with fake quant op.
+    The fully connected layer with fake quantized operation.
-    This part is a more detailed overview of Dense op.
+    This part is a more detailed overview of Dense operation.
    Args:
        in_channels (int): The dimension of the input space.
@ -1057,7 +1057,8 @@ class DenseQuant(Cell):
 class _QuantActivation(Cell):
    r"""
-    Base class for quantization aware training activation function. Add Fake Quant OP after activation OP.
+    Base class for quantization aware training activation function. Add fake quantized operation
    after activation operation.
    """
    def get_origin(self):
@ -1068,14 +1069,14 @@ class ActQuant(_QuantActivation):
    r"""
    Quantization aware training activation function.
-    Add the fake quant op to the end of activation op, by which the output of activation op will be truncated.
+    Add the fake quantized operation to the end of activation operation, by which the output of activation operation
-    Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
+    will be truncated. Please check `FakeQuantWithMinMaxObserver` or other observer for more details.
    Args:
        activation (Cell): Activation cell.
        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        fake_before (bool): Whether add fake quant operation before activation. Default: False.
+        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
        quant_config (QuantConfig): Configs the oberser types and quant configs of weight and activation. Default:
            both set to default FakeQuantWithMinMaxObserver.
        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
@ -1134,9 +1135,9 @@ class ActQuant(_QuantActivation):
 class TensorAddQuant(Cell):
    r"""
-    Add Fake Quant OP after TensorAdd OP.
+    Add fake quantized operation after TensorAdd operation.
-    This part is a more detailed overview of TensorAdd op.
+    This part is a more detailed overview of TensorAdd operation.
    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
@ -1185,9 +1186,9 @@ class TensorAddQuant(Cell):
 class MulQuant(Cell):
    r"""
-    Add Fake Quant OP after Mul OP.
+    Add fake quantized operation after `Mul` operation.
-    This part is a more detailed overview of Mul op.
+    This part is a more detailed overview of `Mul` operation.
    Args:
        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@ -66,7 +66,7 @@ class _Loss(Cell):
 class L1Loss(_Loss):
    r"""
-    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` by element,
+    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` element-wise,
    where :math:`x` is the input Tensor and :math:`y` is the target Tensor.
    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
@ -114,7 +114,7 @@ class L1Loss(_Loss):
 class MSELoss(_Loss):
    r"""
    MSELoss creates a criterion to measure the mean squared error (squared L2-norm) between :math:`x` and :math:`y`
-    by element, where :math:`x` is the input and :math:`y` is the target.
+    element-wise, where :math:`x` is the input and :math:`y` is the target.
    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
    the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
@ -490,7 +490,7 @@ class SampledSoftmaxLoss(_Loss):
 class BCELoss(_Loss):
    r"""
-    BCELoss creates a criterion to measure the Binary Cross Entropy between the true labels and predicted labels.
+    BCELoss creates a criterion to measure the binary cross entropy between the true labels and predicted labels.
    Note:
        Set the predicted labels as :math:`x`, true labels as :math:`y`, the output loss as :math:`\ell(x, y)`.
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@ -465,9 +465,9 @@ class AdamWeightDecay(Optimizer):
 class AdamOffload(Optimizer):
    r"""
-    Updates gradients by the Adaptive Moment Estimation (Adam) algorithm. This optimizer will offload Adam optimizer to
+    This optimizer will offload Adam optimizer to host CPU and keep parameters being updated on the device,
-    host CPU and keep parameters being updated on the device, to minimize the memory cost. Although that would bring
+    to minimize the memory cost. Although that would bring about an increase of performance overhead,
-    about an increase of performance overhead, the optimizer could be used to run a larger model.
+    the optimizer could be used to run a larger model.
    The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@ -51,7 +51,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
 class ProximalAdagrad(Optimizer):
    """
-    Implement the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
+    Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
    ProximalAdagrad is an online Learning and Stochastic Optimization.
    Refer to paper `Efficient Learning using Forward-Backward Splitting
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@ -33,7 +33,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, s
 class SGD(Optimizer):
    r"""
-    Implements stochastic gradient descent (optionally with momentum).
+    Implements stochastic gradient descent. Momentum is optional.
    Introduction to SGD can be found at https://en.wikipedia.org/wiki/Stochastic_gradient_descent.
    Nesterov momentum is based on the formula from paper `On the importance of initialization and
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@ -4306,7 +4306,7 @@ class KLDivLoss(PrimitiveWithInfer):
 class BinaryCrossEntropy(PrimitiveWithInfer):
    r"""
-    Computes the Binary Cross Entropy between the target and the output.
+    Computes the binary cross entropy between the target and the output.
    Note:
        Sets input as :math:`x`, input label as :math:`y`, output as :math:`\ell(x, y)`.