!25530 Fix Boost docs bug.

Merge pull request !25530 from linqingke/bug_fix
2021-11-01 12:14:48 +00:00 · 2021-11-01 12:14:48 +00:00 · 910a83a0fd
parent fbf76c495d b5d54b161c
commit 910a83a0fd
6 changed files with 144 additions and 25 deletions
--- a/mindspore/boost/adasum.py
+++ b/mindspore/boost/adasum.py
@ -150,9 +150,10 @@ class AdaSum(Cell):
    parallel training of Deep Learning models.

    Args:
-        network (Cell): The training network. The network only supports single output.
-        optimizer (Union[Cell]): Optimizer for updating the weights.
-        sens (numbers.Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0.
+        rank (int): Rank number.
+        device_number (int): Device number.
+        group_number (int): Group number.
+        parameter_tuple (Tuple(Parameter)): Tuple of parameters.

    Inputs:
        - **delta_weights** (Tuple(Tensor)) - Tuple of gradients.
--- a/mindspore/boost/base.py
+++ b/mindspore/boost/base.py
@ -25,7 +25,7 @@ __all__ = ["OptimizerProcess", "ParameterProcess"]


 class OptimizerProcess:
-    """
+    r"""
    Process optimizer for Boost. Currently, this class supports adding GC(grad centralization) tags
    and creating new optimizers.

@ -68,7 +68,12 @@ class OptimizerProcess:
        self.origin_params = opt.init_params["params"]

    def build_params_dict(self, network):
-        """Build the params dict of the network"""
+        r"""
+        Build the parameter's dict of the network.
+
+        Inputs:
+            - **network** (Cell) - The training network.
+        """
        cells = network.cells_and_names()
        params_dict = {}
        for _, cell in cells:
@ -77,7 +82,13 @@ class OptimizerProcess:
        return params_dict

    def build_gc_params_group(self, params_dict, parameters):
-        """Build the params group that needs gc"""
+        r"""
+        Build the parameter's group with grad centralization.
+
+        Inputs:
+            - **params_dict** (dict) - The network's parameter dict.
+            - **parameters** (list) - The network's parameter list.
+        """
        group_params = []
        for group_param in parameters:
            if 'order_params' in group_param.keys():
@ -107,7 +118,12 @@ class OptimizerProcess:
        return group_params

    def add_grad_centralization(self, network):
-        """Add gradient centralization."""
+        r"""
+        Add gradient centralization.
+
+        Inputs:
+            - **network** (Cell) - The training network.
+        """
        params_dict = self.build_params_dict(network)

        parameters = self.origin_params
@ -137,7 +153,7 @@ class OptimizerProcess:


 class ParameterProcess:
-    """
+    r"""
    Process parameter for Boost. Currently, this class supports creating group parameters
    and automatically setting gradient segmentation point.

@ -171,7 +187,13 @@ class ParameterProcess:
        self._parameter_indices = 1

    def assign_parameter_group(self, parameters, split_point=None):
-        """Assign parameter group."""
+        r"""
+        Assign parameter group.
+
+        Inputs:
+            - **parameters** (list) - The network's parameter list.
+            - **split_point** (list) - The gradient split point of this network. default: None.
+        """
        if not isinstance(parameters, (list, tuple)) or not parameters:
            return parameters

@ -187,7 +209,13 @@ class ParameterProcess:
        return parameters

    def generate_group_params(self, parameters, origin_params):
-        """Generate group parameters."""
+        r"""
+        Generate group parameters.
+
+        Inputs:
+            - **parameters** (list) - The network's parameter list.
+            - **origin_params** (list) - The network's origin parameter list.
+        """
        origin_params_copy = origin_params
        if origin_params_copy is not None:
            if not isinstance(origin_params_copy, list):
--- a/mindspore/boost/boost.py
+++ b/mindspore/boost/boost.py
@ -37,7 +37,7 @@ _boost_config_level = {


 class AutoBoost:
-    """
+    r"""
    Provide auto accelerating for network.

    Args:
@ -68,7 +68,13 @@ class AutoBoost:
            self._boost_config_func_map[key](self, val)

    def network_auto_process_train(self, network, optimizer):
-        """Network train."""
+        r"""
+        Boost network train.
+
+        Inputs:
+            - **network** (Cell) - The training network.
+            - **optimizer** (Cell) - Optimizer for updating the weights.
+        """
        if self._boost_config["less_bn"]:
            network = LessBN(network, fn_flag=self._fn_flag)
            optimizer_process = OptimizerProcess(optimizer)
@ -90,7 +96,12 @@ class AutoBoost:
        return network, optimizer

    def network_auto_process_eval(self, network):
-        """Network eval."""
+        r"""
+        Boost network eval.
+
+        Args:
+            - **network** (Cell) - The inference network.
+        """
        if self._boost_config["less_bn"]:
            network = LessBN(network)

--- a/mindspore/boost/boost_cell_wrapper.py
+++ b/mindspore/boost/boost_cell_wrapper.py
@ -209,7 +209,12 @@ class BoostTrainOneStepCell(TrainOneStepCell):
        return loss

    def gradient_freeze_process(self, *inputs):
-        """gradient freeze algorithm process."""
+        r"""
+        Gradient freeze algorithm process.
+
+        Inputs:
+            - **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
+        """
        if self.train_strategy is None:
            step = self.step
            max_index = len(self.freeze_nets)
@ -224,7 +229,13 @@ class BoostTrainOneStepCell(TrainOneStepCell):
        return loss

    def gradient_accumulation_process(self, loss, grads):
-        """gradient accumulation algorithm process."""
+        r"""
+        Gradient accumulation algorithm process.
+
+        Inputs:
+            - **loss** (Tensor) -  Tensor with shape :math:`()`.
+            - **grads** (Tuple(Tensor)) - Tuple of gradient tensors.
+        """
        loss = F.depend(loss, self.hyper_map(F.partial(gradient_accumulation_op, self.max_accumulation_step),
                                             self.grad_accumulation, grads))
        self.accumulation_step += 1
@ -242,7 +253,13 @@ class BoostTrainOneStepCell(TrainOneStepCell):
        return loss

    def adasum_process(self, loss, grads):
-        """adasum algorithm process."""
+        r"""
+        Adasum algorithm process.
+
+        Inputs:
+            - **loss** (Tensor) -  Tensor with shape :math:`()`.
+            - **grads** (Tuple(Tensor)) - Tuple of gradient tensors.
+        """
        loss = F.depend(loss, self.optimizer(grads))
        rank_weights = self.weights[self.start[self.server_rank]: self.end[self.server_rank]]
        grad_clone = F.depend(self.grad_clone, loss)
@ -261,7 +278,13 @@ class BoostTrainOneStepCell(TrainOneStepCell):
        return loss

    def check_adasum_enable(self, optimizer, reducer_flag):
-        """check adasum enable."""
+        r"""
+        Check adasum enable.
+
+        Inputs:
+            - **optimizer** (Union[Cell]) - Optimizer for updating the weights.
+            - **reducer_flag** (bool) - Reducer flag.
+        """
        if not getattr(optimizer, "adasum", None) or not reducer_flag:
            return False
        _rank_size = get_group_size()
@ -280,7 +303,7 @@ class BoostTrainOneStepWithLossScaleCell(BoostTrainOneStepCell):
    BoostTrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
    The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
    the value must be provided. If  the Tensor type of `scale_sense` is not given, the loss scale update logic
-    must be provied by Cell type of `scale_sense`.
+    must be provide by Cell type of `scale_sense`.

    Args:
        network (Cell): The training network. The network only supports single output.
--- a/mindspore/boost/grad_freeze.py
+++ b/mindspore/boost/grad_freeze.py
@ -114,7 +114,7 @@ class FreezeOpt(Cell):


 class _TrainFreezeCell(Cell):
-    """
+    r"""
    Gradient freezing training network.

    Args:
@ -157,7 +157,7 @@ class _TrainFreezeCell(Cell):


 class GradientFreeze:
-    """
+    r"""
    Freezing the gradients of some layers randomly. The number and
    probability of frozen layers can be configured by users

@ -180,7 +180,13 @@ class GradientFreeze:
        self._param_processer = ParameterProcess()

    def split_parameters_groups(self, net, freeze_para_groups_number):
-        """Split parameter groups for gradients freezing training."""
+        r"""
+        Split parameter groups for gradients freezing training.
+
+        Inputs:
+            - **net** (Cell) - The training network.
+            - **freeze_para_groups_number** (int) - The number of gradient freeze groups.
+        """
        grouped_params = []
        tmp = []
        for para in net.trainable_params():
@ -201,7 +207,15 @@ class GradientFreeze:
        return freeze_grouped_params

    def generate_freeze_index_sequence(self, parameter_groups_number, freeze_strategy, freeze_p, total_steps):
-        """Generate index sequence for gradient freezing training."""
+        r"""
+        Generate index sequence for gradient freezing training.
+
+        Inputs:
+            - **parameter_groups_number** (int) - The number of parameter groups.
+            - **freeze_strategy** (int) - Gradient freeze grouping strategy, select from [0, 1].
+            - **freeze_p** (float) - Gradient freezing probability.
+            - **total_steps** (int) - Total training steps.
+        """
        total_step = int(total_steps * 1.01)
        if parameter_groups_number <= 1:
            return [0 for _ in range(total_step)]
@ -235,7 +249,13 @@ class GradientFreeze:
            f"Unsupported freezing training strategy '{freeze_strategy}'")

    def freeze_generate(self, network, optimizer):
-        """Generate freeze network and optimizer."""
+        r"""
+        Generate freeze network and optimizer.
+
+        Inputs:
+            - **network** (Cell) - The training network.
+            - **optimizer** (Cell) - Optimizer for updating the weights.
+        """
        train_para_groups = self.split_parameters_groups(
            network, self._param_groups)
        for i in range(self._param_groups):
@ -250,7 +270,43 @@ class GradientFreeze:

 def freeze_cell(reducer_flag, network, optimizer, sens, grad, use_grad_accumulation, mean=None, degree=None,
                max_accumulation_step=1):
-    """Provide freeze network cell."""
+    r"""
+    Generate freeze network and optimizer.
+
+    Inputs:
+        - **reducer_flag** (bool) - Reducer flag.
+        - **network** (Cell) - The training network.
+        - **optimizer** (Cell) - Optimizer for updating the weights.
+        - **sens** (Tensor) -  Tensor with shape :math:`()`
+        - **grad** (Tuple(Tensor)) - Tuple of gradient tensors.
+        - **use_grad_accumulation** (bool) - Use gradient accumulation flag.
+        - **mean** (bool) - Gradients mean flag. default: None.
+        - **degree** (int) - Device number. default: None.
+        - **max_accumulation_step** (int) - Max accumulation steps. default: 1.
+
+    Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, Parameter, nn
+        >>> import mindspore.ops as ops
+        >>> from mindspore.boost.grad_freeze import freeze_cell
+        >>>
+        >>> class Net(nn.Cell):
+        ...     def __init__(self, in_features, out_features):
+        ...         super(Net, self).__init__()
+        ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
+        ...                                 name='weight')
+        ...         self.matmul = ops.MatMul()
+        ...
+        ...     def construct(self, x):
+        ...         output = self.matmul(x, self.weight)
+        ...         return output
+        ...
+        >>> in_features, out_features = 16, 10
+        >>> network = Net(in_features, out_features)
+        >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
+        >>> grad = ops.GradOperation(get_by_list=True, sens_param=True)
+        >>> freeze_nets = freeze_cell(False, network, optimizer, 1.0, grad, False, None, None, 1)
+    """
    if reducer_flag:
        param_processer = ParameterProcess()
        grad_reducers = (DistributedGradReducer(param_processer.assign_parameter_group(opt.parameters),
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@ -232,7 +232,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
    TrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
    The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
    the value must be provided. If  the Tensor type of `scale_sense` is not given, the loss scale update logic
-    must be provied by Cell type of `scale_sense`.
+    must be provide by Cell type of `scale_sense`.

    Args:
        network (Cell): The training network. The network only supports single output.