diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index 995c981c10d..44f58b7facc 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -466,7 +466,13 @@ class Parameter(Tensor_): @property def requires_grad(self): - """Return whether the parameter requires gradient.""" + """ + Return whether the parameter requires gradient. + + The main function of requires_grad is to tell auto grad to start recording operations on a Tensor. + If a Tensor has requires_grad=False, then Tensor requires_grad will make auto grad start recording + operations on the tensor. + """ return self.param_info.requires_grad @requires_grad.setter diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index eb6baf65d82..9b44b18186e 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -679,9 +679,9 @@ class Pad(Cell): x = [[1,2,3], [4,5,6], [7,8,9]]. # The above can be seen: 1st dimension of `x` is 3, 2nd dimension of `x` is 3. # Substitute into the formula to get: - # 1st dimension of output is paddings[0][0] + 3 + paddings[0][1] = 1 + 3 + 1 = 4. + # 1st dimension of output is paddings[0][0] + 3 + paddings[0][1] = 1 + 3 + 1 = 5. # 2nd dimension of output is paddings[1][0] + 3 + paddings[1][1] = 2 + 3 + 2 = 7. - # So the shape of output is (4, 7). + # So the shape of output is (5, 7). mode (str): Specifies padding mode. The optional values are "CONSTANT", "REFLECT", "SYMMETRIC". Default: "CONSTANT". @@ -1007,6 +1007,13 @@ class Tril(Cell): """ Returns a tensor with elements above the kth diagonal zeroed. + The lower triangular part of the matrix is defined as the elements on and below the diagonal. + + The parameter `k` controls the diagonal to be considered. + If diagonal = 0, all elements on and below the main diagonal are retained. + Positive values include as many diagonals above the main diagonal, and similarly, + negative values exclude as many diagonals below the main diagonal. + Inputs: - **x** (Tensor) - The input tensor. The data type is Number. :math:`(N,*)` where :math:`*` means, any number of additional dimensions. @@ -1094,6 +1101,12 @@ class Triu(Cell): """ Returns a tensor with elements below the kth diagonal zeroed. + The upper triangular part of the matrix is defined as the elements on and above the diagonal. + + The parameter `k` controls the diagonal to be considered. If `k` = 0, all elements on and above the main diagonal + are retained. Positive values do not include as many diagonals above the main diagonal, and similarly, + negative values include as many diagonals below the main diagonal. + Inputs: - **x** (Tensor) - The input tensor. The data type is Number. :math:`(N,*)` where :math:`*` means, any number of additional dimensions. diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 8bcf26e5260..f27e6f8104f 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -74,8 +74,8 @@ class LSTM(Cell): f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\ \tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\ o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\ - c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\ - h_t = o_t * \tanh(c_t) \\ + c_t = f_t \odot c_{(t-1)} + i_t \odot \tilde{c}_t \\ + h_t = o_t \odot \tanh(c_t) \\ \end{array} Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b` diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 4bbeba85006..92d9469f5fc 100755 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -451,6 +451,9 @@ class Reshape(PrimitiveWithInfer): """ Reshapes the input tensor with the same values based on a given shape tuple. + The 'input_shape' can only have one -1 at most, in which case it’s inferred from the remaining dimensions and + the number of elements in the input. + Inputs: - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. - **input_shape** (tuple[int]) - The input tuple is constructed by multiple @@ -701,6 +704,13 @@ class Transpose(Primitive): """ Permutes the dimensions of the input tensor according to input permutation. + For a 1-D array this has no effect, as a transposed vector is simply the same vector. + To convert a 1-D array into a 2D column vecto please refer the class: mindspore.ops.ExpandDims. + For a 2-D array, this is a standard matrix transpose. For an n-D array, if axes are given, + their order indicates how the axes are permuted (see Examples). + If axes are not provided and a.shape = (i[0], i[1], ... i[n-2], i[n-1]), + then a.transpose().shape = (i[n-1], i[n-2], ... i[1], i[0]). + Inputs: - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. - **input_perm** (tuple[int]) - The permutation to be converted. The elements in `input_perm` are composed of diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index c4ccc4431a3..b2fc5d784ae 100755 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -3435,9 +3435,11 @@ class L2Normalize(PrimitiveWithInfer): This operator will normalize the input using the given axis. The function is shown as follows: .. math:: - \text{output} = \frac{x}{\sqrt{\text{max}(\text{sum} (\text{x}^2), \epsilon)}}, + \displaylines{{\text{output} = \frac{x}{\sqrt{\text{max}(\parallel x_i \parallel^p , \epsilon)} } } \\ + {\parallel x_i \parallel^p = (\sum_{i}^{}\left | x_i \right | ^p )^{1/p}} } - where :math:`\epsilon` is epsilon. + where :math:`\epsilon` is epsilon amd :math:`\sum_{i}^{}\left | x_i \right | ^p` calculate + along the dimension `axis`. Args: axis (Union[list(int), tuple(int), int]): The starting axis for the input to apply the L2 Normalization.