!18963 Front-end annotation correction

Merge pull request !18963 from wangnan39/fix_docs
This commit is contained in:
i-robot 2021-06-28 10:33:37 +00:00 committed by Gitee
commit 3550dc2918
10 changed files with 209 additions and 75 deletions

View File

@ -13,13 +13,14 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include "ops/getnext.h"
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <algorithm> #include <algorithm>
#include "ops/getnext.h"
#include "ops/op_utils.h" #include "ops/op_utils.h"
#include "utils/check_convert_utils.h" #include "utils/check_convert_utils.h"
#include "utils/tensor_construct_utils.h" #include "utils/tensor_construct_utils.h"

View File

@ -62,10 +62,12 @@ class Cell(Cell_):
``Ascend`` ``GPU`` ``CPU`` ``Ascend`` ``GPU`` ``CPU``
Examples: Examples:
>>> import mindspore.nn as nn
>>> import mindspore.ops as ops
>>> class MyCell(nn.Cell): >>> class MyCell(nn.Cell):
... def __init__(self): ... def __init__(self):
... super(MyCell, self).__init__() ... super(MyCell, self).__init__()
... self.relu = P.ReLU() ... self.relu = ops.ReLU()
... ...
... def construct(self, x): ... def construct(self, x):
... return self.relu(x) ... return self.relu(x)
@ -607,7 +609,7 @@ class Cell(Cell_):
Compiles cell. Compiles cell.
Args: Args:
inputs (tuple): Input parameters. inputs (tuple): Inputs of the Cell object.
""" """
_executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode) _executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode)
@ -616,7 +618,7 @@ class Cell(Cell_):
Compiles and runs cell. Compiles and runs cell.
Args: Args:
inputs (tuple): Input parameters. inputs (tuple): Inputs of the Cell object.
Returns: Returns:
Object, the result of executing. Object, the result of executing.
@ -682,8 +684,13 @@ class Cell(Cell_):
""" """
Cast parameter according to auto mix precision level in pynative mode. Cast parameter according to auto mix precision level in pynative mode.
This interface is currently used in the case of auto mix precision and usually need not to be used explicitly.
Args: Args:
param (Parameter): The parameter to cast. param (Parameter): Parameters, the type of which should be cast.
Returns:
Parameter, the input parameter with type automatically casted.
""" """
if hasattr(self, "_mindspore_flags"): if hasattr(self, "_mindspore_flags"):
if self._mindspore_flags.get('fp32'): if self._mindspore_flags.get('fp32'):
@ -725,7 +732,11 @@ class Cell(Cell_):
return None return None
def remove_redundant_parameters(self): def remove_redundant_parameters(self):
"""Remove the redundant parameters""" """
Remove the redundant parameters.
This interface usually need not to be used explicitly.
"""
cells = self.cells_and_names() cells = self.cells_and_names()
for _, cell in cells: for _, cell in cells:
params = cell._params.items() params = cell._params.items()
@ -836,7 +847,7 @@ class Cell(Cell_):
Adds the given prefix to the names of parameters. Adds the given prefix to the names of parameters.
Args: Args:
prefix (str): The prefix string. prefix (str): The prefix string. Default: ''.
recurse (bool): Whether contains the parameters of subcells. Default: True. recurse (bool): Whether contains the parameters of subcells. Default: True.
""" """
@ -884,6 +895,9 @@ class Cell(Cell_):
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
that are direct members of this cell. Default: True. that are direct members of this cell. Default: True.
Returns:
Iteration, all parameters at the Cell.
Examples: Examples:
>>> net = Net() >>> net = Net()
>>> parameters = [] >>> parameters = []
@ -912,6 +926,9 @@ class Cell(Cell_):
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
that are direct members of this cell. Default: True. that are direct members of this cell. Default: True.
Returns:
Iteration, all the names and corresponding parameters in the cell.
Examples: Examples:
>>> n = Net() >>> n = Net()
>>> names = [] >>> names = []
@ -949,6 +966,9 @@ class Cell(Cell_):
cells (str): Cells to iterate over. Default: None. cells (str): Cells to iterate over. Default: None.
name_prefix (str): Namespace. Default: ''. name_prefix (str): Namespace. Default: ''.
Returns:
Iteration, all the child cells and corresponding names in the cell.
Examples: Examples:
>>> n = Net() >>> n = Net()
>>> names = [] >>> names = []
@ -972,7 +992,12 @@ class Cell(Cell_):
yield ele yield ele
def cells(self): def cells(self):
"""Returns an iterator over immediate cells.""" """
Returns an iterator over immediate cells.
Returns:
Iteration, all the child cells in the cell.
"""
return self.name_cells().values() return self.name_cells().values()
def _set_scope(self, name): def _set_scope(self, name):
@ -997,7 +1022,12 @@ class Cell(Cell_):
yield key, value yield key, value
def get_scope(self): def get_scope(self):
"""Returns the scope of a cell object in one network.""" """
Returns the scope of a cell object in one network.
Returns:
String, scope of the cell.
"""
return self._scope return self._scope
def generate_scope(self): def generate_scope(self):
@ -1010,6 +1040,9 @@ class Cell(Cell_):
Returns an iterator over all cells in the network. Returns an iterator over all cells in the network.
Include name of the cell and cell itself. Include name of the cell and cell itself.
Returns:
Dict[String, Cell], all the child cells and corresponding names in the cell.
""" """
value_set = set() value_set = set()
cells = OrderedDict() cells = OrderedDict()
@ -1056,6 +1089,9 @@ class Cell(Cell_):
dst_type (:class:`mindspore.dtype`): Transfer Cell to Run with dst_type. dst_type (:class:`mindspore.dtype`): Transfer Cell to Run with dst_type.
dst_type can be `mindspore.dtype.float16` or `mindspore.dtype.float32`. dst_type can be `mindspore.dtype.float16` or `mindspore.dtype.float32`.
Returns:
Cell, the cell itself.
Raises: Raises:
ValueError: If dst_type is not float32 nor float16. ValueError: If dst_type is not float32 nor float16.
""" """
@ -1080,6 +1116,9 @@ class Cell(Cell_):
Args: Args:
acc_type (str): accelerate algorithm. acc_type (str): accelerate algorithm.
Returns:
Cell, the cell itself.
Raises: Raises:
ValueError: If acc_type is not in the algorithm library. ValueError: If acc_type is not in the algorithm library.
""" """
@ -1098,6 +1137,9 @@ class Cell(Cell_):
Args: Args:
requires_grad (bool): Specifies if the net need to grad, if it is requires_grad (bool): Specifies if the net need to grad, if it is
True, cell will construct backward network in pynative mode. Default: True. True, cell will construct backward network in pynative mode. Default: True.
Returns:
Cell, the cell itself.
""" """
self.requires_grad = requires_grad self.requires_grad = requires_grad
return self return self
@ -1112,6 +1154,9 @@ class Cell(Cell_):
Args: Args:
mode (bool): Specifies whether the model is training. Default: True. mode (bool): Specifies whether the model is training. Default: True.
Returns:
Cell, the cell itself.
""" """
if mode is False: if mode is False:
self._phase = 'predict' self._phase = 'predict'

View File

@ -37,7 +37,10 @@ class LearningRateSchedule(Cell):
The output must be a Tensor of scalar. The output must be a Tensor of scalar.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Inputs:
Tensor. Learning rate at current step with shape :math:`()`.
""" """
raise NotImplementedError raise NotImplementedError
@ -77,10 +80,10 @@ class ExponentialDecayLR(LearningRateSchedule):
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `learning_rate` or `decay_rate` is not a float. TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -144,10 +147,10 @@ class NaturalExpDecayLR(LearningRateSchedule):
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `learning_rate` or `decay_rate` is not a float. TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -212,10 +215,10 @@ class InverseDecayLR(LearningRateSchedule):
is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False. is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `learning_rate` or `decay_rate` is not a float. TypeError: If `learning_rate` or `decay_rate` is not a float.
@ -269,10 +272,10 @@ class CosineDecayLR(LearningRateSchedule):
decay_steps (int): A value used to calculate decayed learning rate. decay_steps (int): A value used to calculate decayed learning rate.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `min_lr` or `max_lr` is not a float. TypeError: If `min_lr` or `max_lr` is not a float.
@ -345,10 +348,10 @@ class PolynomialDecayLR(LearningRateSchedule):
update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False. update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float. TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float.
@ -424,10 +427,10 @@ class WarmUpLR(LearningRateSchedule):
warmup_steps (int): The warm up steps of learning rate. warmup_steps (int): The warm up steps of learning rate.
Inputs: Inputs:
Tensor. The current step number. - **global_step** (Tensor) - The current step number.
Outputs: Outputs:
Tensor. The learning rate value for the current step. Tensor. The learning rate value for the current step with shape :math:`()`.
Raises: Raises:
TypeError: If `learning_rate` is not a float. TypeError: If `learning_rate` is not a float.

View File

@ -195,17 +195,16 @@ class Adam(Optimizer):
.. math:: .. math::
\begin{array}{ll} \\ \begin{array}{ll} \\
m = \beta_1 * m + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v = \beta_2 * v + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w = w - l * \frac{m}{\sqrt{v} + \epsilon} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`, `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
:math:`\epsilon` represents `eps`.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the
@ -371,9 +370,29 @@ class Adam(Optimizer):
class AdamWeightDecay(Optimizer): class AdamWeightDecay(Optimizer):
""" r"""
Implements the Adam algorithm to fix the weight decay. Implements the Adam algorithm to fix the weight decay.
.. math::
\begin{array}{ll} \\
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\
update =
\begin{cases}
update + \weight\_decay * w_{t}
& \text{ if } \weight\_decay > 0 \\
\update
& \text{ otherwise }
\end{cases} \\
w_{t+1} = w_{t} - lr * update
\end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`lr` represents `learning_rate`,
:math:`\beta_1, \beta_2` represent `beta1` and `beta2`, :math:`t` represents updating step while
:math:`w` represents `params`.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
@ -493,17 +512,16 @@ class AdamOffload(Optimizer):
.. math:: .. math::
\begin{array}{ll} \\ \begin{array}{ll} \\
m = \beta_1 * m + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v = \beta_2 * v + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w = w - l * \frac{m}{\sqrt{v} + \epsilon} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`, `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
:math:`\epsilon` represents `eps`.
Note: Note:
This optimizer only supports `GRAPH_MODE` currently. This optimizer only supports `GRAPH_MODE` currently.

View File

@ -114,17 +114,16 @@ class LazyAdam(Optimizer):
.. math:: .. math::
\begin{array}{ll} \\ \begin{array}{ll} \\
m = \beta_1 * m + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v = \beta_2 * v + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w = w - l * \frac{m}{\sqrt{v} + \epsilon} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`, `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
:math:`\epsilon` represents `eps`.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the

View File

@ -52,13 +52,25 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
class ProximalAdagrad(Optimizer): class ProximalAdagrad(Optimizer):
""" r"""
Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator. Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
ProximalAdagrad is an online Learning and Stochastic Optimization. ProximalAdagrad is an online Learning and Stochastic Optimization.
Refer to paper `Efficient Learning using Forward-Backward Splitting Refer to paper `Efficient Learning using Forward-Backward Splitting
<http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_. <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
.. math::
accum_{t+1} = accum_{t} + grad * grad
.. math::
\text{prox_v} = var_{t} - lr * grad * \frac{1}{\sqrt{accum_{t+1}}}
.. math::
var_{t+1} = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
Here : where grad, lr, var, accum and t denote the gradients, learning_rate, params and accumulation and current
step respectively.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied

View File

@ -66,6 +66,7 @@ def _tensors_cast_datatype(datatype, param):
return F.cast(param, datatype) return F.cast(param, datatype)
class WithLossCell(Cell): class WithLossCell(Cell):
r""" r"""
Cell with loss function. Cell with loss function.
@ -82,7 +83,7 @@ class WithLossCell(Cell):
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`. - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
Outputs: Outputs:
Tensor, a scalar tensor with shape :math:`()`. Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
Raises: Raises:
TypeError: If dtype of `data` or `label` is neither float16 nor float32. TypeError: If dtype of `data` or `label` is neither float16 nor float32.
@ -114,7 +115,7 @@ class WithLossCell(Cell):
@property @property
def backbone_network(self): def backbone_network(self):
""" """
Returns the backbone network. Get the backbone network.
Returns: Returns:
Cell, the backbone network. Cell, the backbone network.
@ -298,7 +299,7 @@ class TrainOneStepCell(Cell):
- **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`. - **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
Outputs: Outputs:
Tensor, a scalar Tensor with shape :math:`()`. Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
Raises: Raises:
TypeError: If `sens` is not a number. TypeError: If `sens` is not a number.
@ -408,6 +409,12 @@ class GetNextSingleOp(Cell):
For detailed information, refer to `ops.operations.GetNext`. For detailed information, refer to `ops.operations.GetNext`.
Inputs:
No inputs.
Outputs:
tuple[Tensor], the data get from Dataset.
Supported Platforms: Supported Platforms:
``Ascend`` ``GPU`` ``Ascend`` ``GPU``
@ -635,13 +642,19 @@ class WithEvalCell(Cell):
class ParameterUpdate(Cell): class ParameterUpdate(Cell):
""" """
Cell that updates parameters. Cell that updates parameter.
With this Cell, one can manually update `param` with the input `Tensor`. With this Cell, one can manually update `param` with the input `Tensor`.
Args: Args:
param (Parameter): The parameter to be updated manually. param (Parameter): The parameter to be updated manually.
Inputs:
- **x** (Tensor) - A tensor whose shape and type are the same with `param`.
Outputs:
Tensor, the input `x`.
Raises: Raises:
KeyError: If parameter with the specified name does not exist. KeyError: If parameter with the specified name does not exist.

View File

@ -72,11 +72,11 @@ class DynamicLossScaleUpdateCell(Cell):
scale_window (int): Maximum continuous training steps that do not have overflow. scale_window (int): Maximum continuous training steps that do not have overflow.
Inputs: Inputs:
- **inputs** (Tensor) - Tensor of shape :math:`(N, \ldots)`. - **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`. - **overflow** (bool) - Whether the overflow occurs or not.
Outputs: Outputs:
Tensor, a scalar Tensor with shape :math:`()`. bool, the input `overflow`.
Raises: Raises:
TypeError: If dtype of `inputs` or `label` is neither float16 nor float32. TypeError: If dtype of `inputs` or `label` is neither float16 nor float32.
@ -165,6 +165,13 @@ class FixedLossScaleUpdateCell(Cell):
Args: Args:
loss_scale_value (float): Initializes loss scale. loss_scale_value (float): Initializes loss scale.
Inputs:
- **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`, that will be ignored.
- **overflow** (bool) - Whether the overflow occurs or not.
Outputs:
bool, the input `overflow`.
Supported Platforms: Supported Platforms:
``Ascend`` ``GPU`` ``Ascend`` ``GPU``
@ -332,7 +339,11 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
def set_sense_scale(self, sens): def set_sense_scale(self, sens):
""" """
If the user has set the sens in the training process and wants to reassign the value, he can call If the user has set the sens in the training process and wants to reassign the value, he can call
this function again to make modification, and sens needs to be of type Tensor.""" this function again to make modification, and sens needs to be of type Tensor.
Inputs:
- **sens**(Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
"""
if self.scale_sense and isinstance(sens, Tensor): if self.scale_sense and isinstance(sens, Tensor):
self.scale_sense.set_data(sens) self.scale_sense.set_data(sens)
else: else:
@ -347,15 +358,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss
function, and compute_input should be the input of gradients-computing function. function, and compute_input should be the input of gradients-computing function.
Args: Inputs:
pre_cond(object): A precondition for starting overflow detection. It determines the executing order of - **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
overflow state clearing and prior processions. It makes sure that the function 'start_overflow' clears of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
status after finishing the process of precondition. clears status after finishing the process of precondition.
compute_input(object): The input of subsequent process. Overflow detection should be performed on a certain - **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a
computation. Set `compute_input` as the input of the computation, to ensure overflow status is cleared certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
before executing the computation. cleared before executing the computation.
Returns: Outputs:
Tuple[object, object], the first value is False for GPU backend, while it is a instance of Tuple[object, object], the first value is False for GPU backend, while it is a instance of
NPUAllocFloatStatus for other backend. The status is used to detect overflow during overflow detection. NPUAllocFloatStatus for other backend. The status is used to detect overflow during overflow detection.
The second value is the same as the input of `compute_input`, but contains some information about the The second value is the same as the input of `compute_input`, but contains some information about the
@ -377,12 +388,13 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
Get overflow results after executing the target process for overflow detection. Get overflow results after executing the target process for overflow detection.
Args: Inputs:
status(object): A status instance used to detect the overflow. - **status** (object) - A status instance used to detect the overflow.
compute_output: Overflow detection should be performed on a certain computation. Set `compute_output` as - **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output`
the output of the computation, to ensure overflow status is acquired before executing the computation. as the output of the computation, to ensure overflow status is acquired before executing the
computation.
Returns: Outputs:
bool, whether the overflow occurs or not. bool, whether the overflow occurs or not.
""" """
if not self.gpu_target: if not self.gpu_target:
@ -409,10 +421,10 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
""" """
Calculate loss scale according to the overflow. Calculate loss scale according to the overflow.
Args: Inputs:
overflow(bool): Whether the overflow occurs or not. - **overflow** (bool) - Whether the overflow occurs or not.
Returns: Outputs:
bool, overflow value. bool, overflow value.
""" """
if self.loss_scaling_manager is not None: if self.loss_scaling_manager is not None:

View File

@ -266,7 +266,7 @@ class DatasetHelper:
self.iter.release() self.iter.release()
def continue_send(self): def continue_send(self):
"""continue send data to device at the beginning of epoch.""" """Continue send data to device at the beginning of epoch."""
self.iter.continue_send() self.iter.continue_send()
def get_data_info(self): def get_data_info(self):

View File

@ -67,23 +67,39 @@ class FixedLossScaleManager(LossScaleManager):
self._drop_overflow_update = drop_overflow_update self._drop_overflow_update = drop_overflow_update
def get_loss_scale(self): def get_loss_scale(self):
"""Get loss scale value.""" """
Get loss scale value.
Returns:
bool, `loss_scale` value.
"""
return self._loss_scale return self._loss_scale
def get_drop_overflow_update(self): def get_drop_overflow_update(self):
"""Get the flag whether to drop optimizer update when there is an overflow.""" """
Get the flag whether to drop optimizer update when there is an overflow.
Returns:
bool, `drop_overflow_update` value.
"""
return self._drop_overflow_update return self._drop_overflow_update
def update_loss_scale(self, overflow): def update_loss_scale(self, overflow):
""" """
Update loss scale value. Update loss scale value. The interface at `FixedLossScaleManager` will do nothing.
Args: Args:
overflow (bool): Whether it overflows. overflow (bool): Whether it overflows.
""" """
def get_update_cell(self): def get_update_cell(self):
"Returns the cell for `TrainOneStepWithLossScaleCell`" """
Returns the update cell for `TrainOneStepWithLossScaleCell`.
Returns:
None or Cell. Cell object, used to update `loss_scale`, when `drop_overflow_update` is True. None when
`drop_overflow_update` is False.
"""
if not self._drop_overflow_update: if not self._drop_overflow_update:
return None return None
return nn.FixedLossScaleUpdateCell(self._loss_scale) return nn.FixedLossScaleUpdateCell(self._loss_scale)
@ -127,7 +143,12 @@ class DynamicLossScaleManager(LossScaleManager):
self.bad_step = 0 self.bad_step = 0
def get_loss_scale(self): def get_loss_scale(self):
"""Get loss scale value.""" """
Get loss scale value.
Returns:
bool, `loss_scale` value.
"""
return self.loss_scale return self.loss_scale
def update_loss_scale(self, overflow): def update_loss_scale(self, overflow):
@ -152,9 +173,19 @@ class DynamicLossScaleManager(LossScaleManager):
self.cur_iter += 1 self.cur_iter += 1
def get_drop_overflow_update(self): def get_drop_overflow_update(self):
"""Get the flag whether to drop optimizer update when there is an overflow.""" """
Get the flag whether to drop optimizer update when there is an overflow.
Returns:
bool, always return True at `DynamicLossScaleManager`.
"""
return True return True
def get_update_cell(self): def get_update_cell(self):
"Returns the cell for `TrainOneStepWithLossScaleCell`" """
Returns the update cell for `TrainOneStepWithLossScaleCell`.
Returns:
Cell, cell object used to update `loss_scale`.
"""
return nn.DynamicLossScaleUpdateCell(self.loss_scale, self.scale_factor, self.scale_window) return nn.DynamicLossScaleUpdateCell(self.loss_scale, self.scale_factor, self.scale_window)