update the documentation of MaxPoolWithArgmax, BroadcastTo, GlobalBatchNorm and DistributedGradReducer operators.

2020-12-31 10:38:45 +08:00 · 2020-12-31 10:38:45 +08:00 · 0f75370377
parent 882a582eb4
commit 0f75370377
4 changed files with 42 additions and 43 deletions
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@ -575,7 +575,7 @@ class GlobalBatchNorm(_BatchNorm):
        >>>
        >>> device_id = int(os.environ["DEVICE_ID"])
        >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True,
-        >>>                     device_id=int(device_id))
+        ...                     device_id=int(device_id))
        >>> init()
        >>> context.reset_auto_parallel_context()
        >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@ -268,47 +268,45 @@ class DistributedGradReducer(Cell):
        >>> context.reset_auto_parallel_context()
        >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
        >>>
-        >>>
        >>> class TrainingWrapper(nn.Cell):
-        >>>     def __init__(self, network, optimizer, sens=1.0):
-        >>>         super(TrainingWrapper, self).__init__(auto_prefix=False)
-        >>>         self.network = network
-        >>>         self.network.add_flags(defer_inline=True)
-        >>>         self.weights = optimizer.parameters
-        >>>         self.optimizer = optimizer
-        >>>         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
-        >>>         self.sens = sens
-        >>>         self.reducer_flag = False
-        >>>         self.grad_reducer = None
-        >>>         self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
-        >>>         if self.parallel_mode in [ParallelMode.DATA_PARALLEL,
-        >>>                                            ParallelMode.HYBRID_PARALLEL]:
-        >>>             self.reducer_flag = True
-        >>>         if self.reducer_flag:
-        >>>             mean = _get_gradients_mean()
-        >>>             degree = _get_device_num()
-        >>>             self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
-        >>>
-        >>>     def construct(self, *args):
-        >>>         weights = self.weights
-        >>>         loss = self.network(*args)
-        >>>         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
-        >>>         grads = self.grad(self.network, weights)(*args, sens)
-        >>>         if self.reducer_flag:
-        >>>             # apply grad reducer on grads
-        >>>             grads = self.grad_reducer(grads)
-        >>>         return F.depend(loss, self.optimizer(grads))
+        ...     def __init__(self, network, optimizer, sens=1.0):
+        ...         super(TrainingWrapper, self).__init__(auto_prefix=False)
+        ...         self.network = network
+        ...         self.network.add_flags(defer_inline=True)
+        ...         self.weights = optimizer.parameters
+        ...         self.optimizer = optimizer
+        ...         self.grad = C.GradOperation(get_by_list=True, sens_param=True)
+        ...         self.sens = sens
+        ...         self.reducer_flag = False
+        ...         self.grad_reducer = None
+        ...         self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
+        ...         if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+        ...             self.reducer_flag = True
+        ...         if self.reducer_flag:
+        ...             mean = _get_gradients_mean()
+        ...             degree = _get_device_num()
+        ...             self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
+        ...
+        ...     def construct(self, *args):
+        ...         weights = self.weights
+        ...         loss = self.network(*args)
+        ...         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
+        ...         grads = self.grad(self.network, weights)(*args, sens)
+        ...         if self.reducer_flag:
+        ...             # apply grad reducer on grads
+        ...             grads = self.grad_reducer(grads)
+        ...         return F.depend(loss, self.optimizer(grads))
        >>>
        >>> class Net(nn.Cell):
-        >>>     def __init__(self, in_features, out_features):
-        >>>         super(Net, self).__init__()
-        >>>         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
-        >>>                                 name='weight')
-        >>>         self.matmul = P.MatMul()
-        >>>
-        >>>     def construct(self, x):
-        >>>         output = self.matmul(x, self.weight)
-        >>>         return output
+        ...     def __init__(self, in_features, out_features):
+        ...         super(Net, self).__init__()
+        ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
+        ...                                 name='weight')
+        ...         self.matmul = P.MatMul()
+        ...
+        ...     def construct(self, x):
+        ...         output = self.matmul(x, self.weight)
+        ...         return output
        >>>
        >>> size, in_features, out_features = 16, 16, 10
        >>> network = Net(in_features, out_features)
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@ -4105,12 +4105,12 @@ class BroadcastTo(PrimitiveWithInfer):
    When input shape is broadcast to target shape, it starts with the trailing dimensions.

    Raises:
-        ValueError: Given a shape tuple, if it has several -1s; or if the -1 is in an invalid position
-            such as one that does not have a opposing dimension in an input tensor; of if the target and
+        ValueError: Given a shape tuple, if it has several -1; or if the -1 is in an invalid position
+            such as one that does not have a opposing dimension in an input tensor; or if the target and
            input shapes are incompatiable.

    Args:
-        shape (tuple): The target shape to broadcast. Can be fully specified, or have '-1's in one position
+        shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
            where it will be substituted by the input tensor's shape in that position, see example.

    Inputs:
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@ -1566,7 +1566,8 @@ class MaxPoolWithArgmax(_Pool):
        Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.

        - **output** (Tensor) -  Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-        - **mask** (Tensor) -  Max values' index represented by the mask.
+          It has the same data type as `input`.
+        - **mask** (Tensor) -  Max values' index represented by the mask. Data type is int32.

    Supported Platforms:
        ``Ascend``