forked from mindspore-Ecosystem/mindspore
!18963 Front-end annotation correction
Merge pull request !18963 from wangnan39/fix_docs
This commit is contained in:
commit
3550dc2918
|
@ -13,13 +13,14 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
#include "ops/getnext.h"
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "ops/getnext.h"
|
|
||||||
#include "ops/op_utils.h"
|
#include "ops/op_utils.h"
|
||||||
#include "utils/check_convert_utils.h"
|
#include "utils/check_convert_utils.h"
|
||||||
#include "utils/tensor_construct_utils.h"
|
#include "utils/tensor_construct_utils.h"
|
||||||
|
|
|
@ -62,10 +62,12 @@ class Cell(Cell_):
|
||||||
``Ascend`` ``GPU`` ``CPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
>>> import mindspore.nn as nn
|
||||||
|
>>> import mindspore.ops as ops
|
||||||
>>> class MyCell(nn.Cell):
|
>>> class MyCell(nn.Cell):
|
||||||
... def __init__(self):
|
... def __init__(self):
|
||||||
... super(MyCell, self).__init__()
|
... super(MyCell, self).__init__()
|
||||||
... self.relu = P.ReLU()
|
... self.relu = ops.ReLU()
|
||||||
...
|
...
|
||||||
... def construct(self, x):
|
... def construct(self, x):
|
||||||
... return self.relu(x)
|
... return self.relu(x)
|
||||||
|
@ -607,7 +609,7 @@ class Cell(Cell_):
|
||||||
Compiles cell.
|
Compiles cell.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputs (tuple): Input parameters.
|
inputs (tuple): Inputs of the Cell object.
|
||||||
"""
|
"""
|
||||||
_executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode)
|
_executor.compile(self, *inputs, phase=self.phase, auto_parallel_mode=self._auto_parallel_mode)
|
||||||
|
|
||||||
|
@ -616,7 +618,7 @@ class Cell(Cell_):
|
||||||
Compiles and runs cell.
|
Compiles and runs cell.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputs (tuple): Input parameters.
|
inputs (tuple): Inputs of the Cell object.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Object, the result of executing.
|
Object, the result of executing.
|
||||||
|
@ -682,8 +684,13 @@ class Cell(Cell_):
|
||||||
"""
|
"""
|
||||||
Cast parameter according to auto mix precision level in pynative mode.
|
Cast parameter according to auto mix precision level in pynative mode.
|
||||||
|
|
||||||
|
This interface is currently used in the case of auto mix precision and usually need not to be used explicitly.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
param (Parameter): The parameter to cast.
|
param (Parameter): Parameters, the type of which should be cast.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parameter, the input parameter with type automatically casted.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_mindspore_flags"):
|
if hasattr(self, "_mindspore_flags"):
|
||||||
if self._mindspore_flags.get('fp32'):
|
if self._mindspore_flags.get('fp32'):
|
||||||
|
@ -725,7 +732,11 @@ class Cell(Cell_):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def remove_redundant_parameters(self):
|
def remove_redundant_parameters(self):
|
||||||
"""Remove the redundant parameters"""
|
"""
|
||||||
|
Remove the redundant parameters.
|
||||||
|
|
||||||
|
This interface usually need not to be used explicitly.
|
||||||
|
"""
|
||||||
cells = self.cells_and_names()
|
cells = self.cells_and_names()
|
||||||
for _, cell in cells:
|
for _, cell in cells:
|
||||||
params = cell._params.items()
|
params = cell._params.items()
|
||||||
|
@ -836,7 +847,7 @@ class Cell(Cell_):
|
||||||
Adds the given prefix to the names of parameters.
|
Adds the given prefix to the names of parameters.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prefix (str): The prefix string.
|
prefix (str): The prefix string. Default: ''.
|
||||||
recurse (bool): Whether contains the parameters of subcells. Default: True.
|
recurse (bool): Whether contains the parameters of subcells. Default: True.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -884,6 +895,9 @@ class Cell(Cell_):
|
||||||
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
|
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
|
||||||
that are direct members of this cell. Default: True.
|
that are direct members of this cell. Default: True.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iteration, all parameters at the Cell.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = Net()
|
>>> net = Net()
|
||||||
>>> parameters = []
|
>>> parameters = []
|
||||||
|
@ -912,6 +926,9 @@ class Cell(Cell_):
|
||||||
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
|
expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
|
||||||
that are direct members of this cell. Default: True.
|
that are direct members of this cell. Default: True.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iteration, all the names and corresponding parameters in the cell.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> n = Net()
|
>>> n = Net()
|
||||||
>>> names = []
|
>>> names = []
|
||||||
|
@ -949,6 +966,9 @@ class Cell(Cell_):
|
||||||
cells (str): Cells to iterate over. Default: None.
|
cells (str): Cells to iterate over. Default: None.
|
||||||
name_prefix (str): Namespace. Default: ''.
|
name_prefix (str): Namespace. Default: ''.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iteration, all the child cells and corresponding names in the cell.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> n = Net()
|
>>> n = Net()
|
||||||
>>> names = []
|
>>> names = []
|
||||||
|
@ -972,7 +992,12 @@ class Cell(Cell_):
|
||||||
yield ele
|
yield ele
|
||||||
|
|
||||||
def cells(self):
|
def cells(self):
|
||||||
"""Returns an iterator over immediate cells."""
|
"""
|
||||||
|
Returns an iterator over immediate cells.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iteration, all the child cells in the cell.
|
||||||
|
"""
|
||||||
return self.name_cells().values()
|
return self.name_cells().values()
|
||||||
|
|
||||||
def _set_scope(self, name):
|
def _set_scope(self, name):
|
||||||
|
@ -997,7 +1022,12 @@ class Cell(Cell_):
|
||||||
yield key, value
|
yield key, value
|
||||||
|
|
||||||
def get_scope(self):
|
def get_scope(self):
|
||||||
"""Returns the scope of a cell object in one network."""
|
"""
|
||||||
|
Returns the scope of a cell object in one network.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String, scope of the cell.
|
||||||
|
"""
|
||||||
return self._scope
|
return self._scope
|
||||||
|
|
||||||
def generate_scope(self):
|
def generate_scope(self):
|
||||||
|
@ -1010,6 +1040,9 @@ class Cell(Cell_):
|
||||||
Returns an iterator over all cells in the network.
|
Returns an iterator over all cells in the network.
|
||||||
|
|
||||||
Include name of the cell and cell itself.
|
Include name of the cell and cell itself.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[String, Cell], all the child cells and corresponding names in the cell.
|
||||||
"""
|
"""
|
||||||
value_set = set()
|
value_set = set()
|
||||||
cells = OrderedDict()
|
cells = OrderedDict()
|
||||||
|
@ -1056,6 +1089,9 @@ class Cell(Cell_):
|
||||||
dst_type (:class:`mindspore.dtype`): Transfer Cell to Run with dst_type.
|
dst_type (:class:`mindspore.dtype`): Transfer Cell to Run with dst_type.
|
||||||
dst_type can be `mindspore.dtype.float16` or `mindspore.dtype.float32`.
|
dst_type can be `mindspore.dtype.float16` or `mindspore.dtype.float32`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cell, the cell itself.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If dst_type is not float32 nor float16.
|
ValueError: If dst_type is not float32 nor float16.
|
||||||
"""
|
"""
|
||||||
|
@ -1080,6 +1116,9 @@ class Cell(Cell_):
|
||||||
Args:
|
Args:
|
||||||
acc_type (str): accelerate algorithm.
|
acc_type (str): accelerate algorithm.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cell, the cell itself.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If acc_type is not in the algorithm library.
|
ValueError: If acc_type is not in the algorithm library.
|
||||||
"""
|
"""
|
||||||
|
@ -1098,6 +1137,9 @@ class Cell(Cell_):
|
||||||
Args:
|
Args:
|
||||||
requires_grad (bool): Specifies if the net need to grad, if it is
|
requires_grad (bool): Specifies if the net need to grad, if it is
|
||||||
True, cell will construct backward network in pynative mode. Default: True.
|
True, cell will construct backward network in pynative mode. Default: True.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cell, the cell itself.
|
||||||
"""
|
"""
|
||||||
self.requires_grad = requires_grad
|
self.requires_grad = requires_grad
|
||||||
return self
|
return self
|
||||||
|
@ -1112,6 +1154,9 @@ class Cell(Cell_):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
mode (bool): Specifies whether the model is training. Default: True.
|
mode (bool): Specifies whether the model is training. Default: True.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cell, the cell itself.
|
||||||
"""
|
"""
|
||||||
if mode is False:
|
if mode is False:
|
||||||
self._phase = 'predict'
|
self._phase = 'predict'
|
||||||
|
|
|
@ -37,7 +37,10 @@ class LearningRateSchedule(Cell):
|
||||||
The output must be a Tensor of scalar.
|
The output must be a Tensor of scalar.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
Tensor. Learning rate at current step with shape :math:`()`.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@ -77,10 +80,10 @@ class ExponentialDecayLR(LearningRateSchedule):
|
||||||
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
||||||
|
@ -144,10 +147,10 @@ class NaturalExpDecayLR(LearningRateSchedule):
|
||||||
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
||||||
|
@ -212,10 +215,10 @@ class InverseDecayLR(LearningRateSchedule):
|
||||||
is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
|
is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: False.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
TypeError: If `learning_rate` or `decay_rate` is not a float.
|
||||||
|
@ -269,10 +272,10 @@ class CosineDecayLR(LearningRateSchedule):
|
||||||
decay_steps (int): A value used to calculate decayed learning rate.
|
decay_steps (int): A value used to calculate decayed learning rate.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `min_lr` or `max_lr` is not a float.
|
TypeError: If `min_lr` or `max_lr` is not a float.
|
||||||
|
@ -345,10 +348,10 @@ class PolynomialDecayLR(LearningRateSchedule):
|
||||||
update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float.
|
TypeError: If `learning_rate`, `end_learning_rate` or `power` is not a float.
|
||||||
|
@ -424,10 +427,10 @@ class WarmUpLR(LearningRateSchedule):
|
||||||
warmup_steps (int): The warm up steps of learning rate.
|
warmup_steps (int): The warm up steps of learning rate.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
Tensor. The current step number.
|
- **global_step** (Tensor) - The current step number.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor. The learning rate value for the current step.
|
Tensor. The learning rate value for the current step with shape :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `learning_rate` is not a float.
|
TypeError: If `learning_rate` is not a float.
|
||||||
|
|
|
@ -195,17 +195,16 @@ class Adam(Optimizer):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\begin{array}{ll} \\
|
\begin{array}{ll} \\
|
||||||
m = \beta_1 * m + (1 - \beta_1) * g \\
|
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||||
v = \beta_2 * v + (1 - \beta_2) * g * g \\
|
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||||
w = w - l * \frac{m}{\sqrt{v} + \epsilon}
|
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||||
\end{array}
|
\end{array}
|
||||||
|
|
||||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
|
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||||
:math:`\epsilon` represents `eps`.
|
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||||
|
@ -371,9 +370,29 @@ class Adam(Optimizer):
|
||||||
|
|
||||||
|
|
||||||
class AdamWeightDecay(Optimizer):
|
class AdamWeightDecay(Optimizer):
|
||||||
"""
|
r"""
|
||||||
Implements the Adam algorithm to fix the weight decay.
|
Implements the Adam algorithm to fix the weight decay.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\begin{array}{ll} \\
|
||||||
|
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||||
|
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||||
|
update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\
|
||||||
|
update =
|
||||||
|
\begin{cases}
|
||||||
|
update + \weight\_decay * w_{t}
|
||||||
|
& \text{ if } \weight\_decay > 0 \\
|
||||||
|
\update
|
||||||
|
& \text{ otherwise }
|
||||||
|
\end{cases} \\
|
||||||
|
w_{t+1} = w_{t} - lr * update
|
||||||
|
\end{array}
|
||||||
|
|
||||||
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||||
|
:math:`g` represents `gradients`, :math:`lr` represents `learning_rate`,
|
||||||
|
:math:`\beta_1, \beta_2` represent `beta1` and `beta2`, :math:`t` represents updating step while
|
||||||
|
:math:`w` represents `params`.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||||
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
|
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
|
||||||
|
@ -493,17 +512,16 @@ class AdamOffload(Optimizer):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\begin{array}{ll} \\
|
\begin{array}{ll} \\
|
||||||
m = \beta_1 * m + (1 - \beta_1) * g \\
|
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||||
v = \beta_2 * v + (1 - \beta_2) * g * g \\
|
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||||
w = w - l * \frac{m}{\sqrt{v} + \epsilon}
|
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||||
\end{array}
|
\end{array}
|
||||||
|
|
||||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
|
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||||
:math:`\epsilon` represents `eps`.
|
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
This optimizer only supports `GRAPH_MODE` currently.
|
This optimizer only supports `GRAPH_MODE` currently.
|
||||||
|
|
|
@ -114,17 +114,16 @@ class LazyAdam(Optimizer):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\begin{array}{ll} \\
|
\begin{array}{ll} \\
|
||||||
m = \beta_1 * m + (1 - \beta_1) * g \\
|
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||||
v = \beta_2 * v + (1 - \beta_2) * g * g \\
|
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||||
w = w - l * \frac{m}{\sqrt{v} + \epsilon}
|
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||||
\end{array}
|
\end{array}
|
||||||
|
|
||||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor `lr`, :math:`\beta_1, \beta_2` represent
|
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||||
:math:`\epsilon` represents `eps`.
|
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||||
|
|
|
@ -52,13 +52,25 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
|
||||||
|
|
||||||
|
|
||||||
class ProximalAdagrad(Optimizer):
|
class ProximalAdagrad(Optimizer):
|
||||||
"""
|
r"""
|
||||||
Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
|
Implements the ProximalAdagrad algorithm with ApplyProximalAdagrad Operator.
|
||||||
|
|
||||||
ProximalAdagrad is an online Learning and Stochastic Optimization.
|
ProximalAdagrad is an online Learning and Stochastic Optimization.
|
||||||
Refer to paper `Efficient Learning using Forward-Backward Splitting
|
Refer to paper `Efficient Learning using Forward-Backward Splitting
|
||||||
<http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
|
<http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
accum_{t+1} = accum_{t} + grad * grad
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\text{prox_v} = var_{t} - lr * grad * \frac{1}{\sqrt{accum_{t+1}}}
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
var_{t+1} = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
|
||||||
|
|
||||||
|
Here : where grad, lr, var, accum and t denote the gradients, learning_rate, params and accumulation and current
|
||||||
|
step respectively.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||||
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
|
weight decay is positive. When not separating parameter groups, the `weight_decay` in the API will be applied
|
||||||
|
|
|
@ -66,6 +66,7 @@ def _tensors_cast_datatype(datatype, param):
|
||||||
return F.cast(param, datatype)
|
return F.cast(param, datatype)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WithLossCell(Cell):
|
class WithLossCell(Cell):
|
||||||
r"""
|
r"""
|
||||||
Cell with loss function.
|
Cell with loss function.
|
||||||
|
@ -82,7 +83,7 @@ class WithLossCell(Cell):
|
||||||
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
|
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor, a scalar tensor with shape :math:`()`.
|
Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If dtype of `data` or `label` is neither float16 nor float32.
|
TypeError: If dtype of `data` or `label` is neither float16 nor float32.
|
||||||
|
@ -114,7 +115,7 @@ class WithLossCell(Cell):
|
||||||
@property
|
@property
|
||||||
def backbone_network(self):
|
def backbone_network(self):
|
||||||
"""
|
"""
|
||||||
Returns the backbone network.
|
Get the backbone network.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Cell, the backbone network.
|
Cell, the backbone network.
|
||||||
|
@ -298,7 +299,7 @@ class TrainOneStepCell(Cell):
|
||||||
- **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
|
- **(\*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor, a scalar Tensor with shape :math:`()`.
|
Tensor, a tensor means the loss value, the shape of which is usually :math:`()`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `sens` is not a number.
|
TypeError: If `sens` is not a number.
|
||||||
|
@ -408,6 +409,12 @@ class GetNextSingleOp(Cell):
|
||||||
|
|
||||||
For detailed information, refer to `ops.operations.GetNext`.
|
For detailed information, refer to `ops.operations.GetNext`.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
No inputs.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
tuple[Tensor], the data get from Dataset.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU``
|
||||||
|
|
||||||
|
@ -635,13 +642,19 @@ class WithEvalCell(Cell):
|
||||||
|
|
||||||
class ParameterUpdate(Cell):
|
class ParameterUpdate(Cell):
|
||||||
"""
|
"""
|
||||||
Cell that updates parameters.
|
Cell that updates parameter.
|
||||||
|
|
||||||
With this Cell, one can manually update `param` with the input `Tensor`.
|
With this Cell, one can manually update `param` with the input `Tensor`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
param (Parameter): The parameter to be updated manually.
|
param (Parameter): The parameter to be updated manually.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **x** (Tensor) - A tensor whose shape and type are the same with `param`.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
Tensor, the input `x`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
KeyError: If parameter with the specified name does not exist.
|
KeyError: If parameter with the specified name does not exist.
|
||||||
|
|
||||||
|
|
|
@ -72,11 +72,11 @@ class DynamicLossScaleUpdateCell(Cell):
|
||||||
scale_window (int): Maximum continuous training steps that do not have overflow.
|
scale_window (int): Maximum continuous training steps that do not have overflow.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
- **inputs** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
|
- **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`.
|
||||||
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
|
- **overflow** (bool) - Whether the overflow occurs or not.
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
Tensor, a scalar Tensor with shape :math:`()`.
|
bool, the input `overflow`.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If dtype of `inputs` or `label` is neither float16 nor float32.
|
TypeError: If dtype of `inputs` or `label` is neither float16 nor float32.
|
||||||
|
@ -165,6 +165,13 @@ class FixedLossScaleUpdateCell(Cell):
|
||||||
Args:
|
Args:
|
||||||
loss_scale_value (float): Initializes loss scale.
|
loss_scale_value (float): Initializes loss scale.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **loss_scale** (Tensor) - The loss scale value during training with shape :math:`()`, that will be ignored.
|
||||||
|
- **overflow** (bool) - Whether the overflow occurs or not.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
bool, the input `overflow`.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU``
|
||||||
|
|
||||||
|
@ -332,7 +339,11 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
||||||
def set_sense_scale(self, sens):
|
def set_sense_scale(self, sens):
|
||||||
"""
|
"""
|
||||||
If the user has set the sens in the training process and wants to reassign the value, he can call
|
If the user has set the sens in the training process and wants to reassign the value, he can call
|
||||||
this function again to make modification, and sens needs to be of type Tensor."""
|
this function again to make modification, and sens needs to be of type Tensor.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **sens**(Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
|
||||||
|
"""
|
||||||
if self.scale_sense and isinstance(sens, Tensor):
|
if self.scale_sense and isinstance(sens, Tensor):
|
||||||
self.scale_sense.set_data(sens)
|
self.scale_sense.set_data(sens)
|
||||||
else:
|
else:
|
||||||
|
@ -347,15 +358,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
||||||
overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss
|
overflow in the process of gradient calculation. In this case, pre_cond should be the output of the loss
|
||||||
function, and compute_input should be the input of gradients-computing function.
|
function, and compute_input should be the input of gradients-computing function.
|
||||||
|
|
||||||
Args:
|
Inputs:
|
||||||
pre_cond(object): A precondition for starting overflow detection. It determines the executing order of
|
- **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
|
||||||
overflow state clearing and prior processions. It makes sure that the function 'start_overflow' clears
|
of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
|
||||||
status after finishing the process of precondition.
|
clears status after finishing the process of precondition.
|
||||||
compute_input(object): The input of subsequent process. Overflow detection should be performed on a certain
|
- **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a
|
||||||
computation. Set `compute_input` as the input of the computation, to ensure overflow status is cleared
|
certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
|
||||||
before executing the computation.
|
cleared before executing the computation.
|
||||||
|
|
||||||
Returns:
|
Outputs:
|
||||||
Tuple[object, object], the first value is False for GPU backend, while it is a instance of
|
Tuple[object, object], the first value is False for GPU backend, while it is a instance of
|
||||||
NPUAllocFloatStatus for other backend. The status is used to detect overflow during overflow detection.
|
NPUAllocFloatStatus for other backend. The status is used to detect overflow during overflow detection.
|
||||||
The second value is the same as the input of `compute_input`, but contains some information about the
|
The second value is the same as the input of `compute_input`, but contains some information about the
|
||||||
|
@ -377,12 +388,13 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
||||||
|
|
||||||
Get overflow results after executing the target process for overflow detection.
|
Get overflow results after executing the target process for overflow detection.
|
||||||
|
|
||||||
Args:
|
Inputs:
|
||||||
status(object): A status instance used to detect the overflow.
|
- **status** (object) - A status instance used to detect the overflow.
|
||||||
compute_output: Overflow detection should be performed on a certain computation. Set `compute_output` as
|
- **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output`
|
||||||
the output of the computation, to ensure overflow status is acquired before executing the computation.
|
as the output of the computation, to ensure overflow status is acquired before executing the
|
||||||
|
computation.
|
||||||
|
|
||||||
Returns:
|
Outputs:
|
||||||
bool, whether the overflow occurs or not.
|
bool, whether the overflow occurs or not.
|
||||||
"""
|
"""
|
||||||
if not self.gpu_target:
|
if not self.gpu_target:
|
||||||
|
@ -409,10 +421,10 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
||||||
"""
|
"""
|
||||||
Calculate loss scale according to the overflow.
|
Calculate loss scale according to the overflow.
|
||||||
|
|
||||||
Args:
|
Inputs:
|
||||||
overflow(bool): Whether the overflow occurs or not.
|
- **overflow** (bool) - Whether the overflow occurs or not.
|
||||||
|
|
||||||
Returns:
|
Outputs:
|
||||||
bool, overflow value.
|
bool, overflow value.
|
||||||
"""
|
"""
|
||||||
if self.loss_scaling_manager is not None:
|
if self.loss_scaling_manager is not None:
|
||||||
|
|
|
@ -266,7 +266,7 @@ class DatasetHelper:
|
||||||
self.iter.release()
|
self.iter.release()
|
||||||
|
|
||||||
def continue_send(self):
|
def continue_send(self):
|
||||||
"""continue send data to device at the beginning of epoch."""
|
"""Continue send data to device at the beginning of epoch."""
|
||||||
self.iter.continue_send()
|
self.iter.continue_send()
|
||||||
|
|
||||||
def get_data_info(self):
|
def get_data_info(self):
|
||||||
|
|
|
@ -67,23 +67,39 @@ class FixedLossScaleManager(LossScaleManager):
|
||||||
self._drop_overflow_update = drop_overflow_update
|
self._drop_overflow_update = drop_overflow_update
|
||||||
|
|
||||||
def get_loss_scale(self):
|
def get_loss_scale(self):
|
||||||
"""Get loss scale value."""
|
"""
|
||||||
|
Get loss scale value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool, `loss_scale` value.
|
||||||
|
"""
|
||||||
return self._loss_scale
|
return self._loss_scale
|
||||||
|
|
||||||
def get_drop_overflow_update(self):
|
def get_drop_overflow_update(self):
|
||||||
"""Get the flag whether to drop optimizer update when there is an overflow."""
|
"""
|
||||||
|
Get the flag whether to drop optimizer update when there is an overflow.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool, `drop_overflow_update` value.
|
||||||
|
"""
|
||||||
return self._drop_overflow_update
|
return self._drop_overflow_update
|
||||||
|
|
||||||
def update_loss_scale(self, overflow):
|
def update_loss_scale(self, overflow):
|
||||||
"""
|
"""
|
||||||
Update loss scale value.
|
Update loss scale value. The interface at `FixedLossScaleManager` will do nothing.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
overflow (bool): Whether it overflows.
|
overflow (bool): Whether it overflows.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_update_cell(self):
|
def get_update_cell(self):
|
||||||
"Returns the cell for `TrainOneStepWithLossScaleCell`"
|
"""
|
||||||
|
Returns the update cell for `TrainOneStepWithLossScaleCell`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None or Cell. Cell object, used to update `loss_scale`, when `drop_overflow_update` is True. None when
|
||||||
|
`drop_overflow_update` is False.
|
||||||
|
"""
|
||||||
if not self._drop_overflow_update:
|
if not self._drop_overflow_update:
|
||||||
return None
|
return None
|
||||||
return nn.FixedLossScaleUpdateCell(self._loss_scale)
|
return nn.FixedLossScaleUpdateCell(self._loss_scale)
|
||||||
|
@ -127,7 +143,12 @@ class DynamicLossScaleManager(LossScaleManager):
|
||||||
self.bad_step = 0
|
self.bad_step = 0
|
||||||
|
|
||||||
def get_loss_scale(self):
|
def get_loss_scale(self):
|
||||||
"""Get loss scale value."""
|
"""
|
||||||
|
Get loss scale value.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool, `loss_scale` value.
|
||||||
|
"""
|
||||||
return self.loss_scale
|
return self.loss_scale
|
||||||
|
|
||||||
def update_loss_scale(self, overflow):
|
def update_loss_scale(self, overflow):
|
||||||
|
@ -152,9 +173,19 @@ class DynamicLossScaleManager(LossScaleManager):
|
||||||
self.cur_iter += 1
|
self.cur_iter += 1
|
||||||
|
|
||||||
def get_drop_overflow_update(self):
|
def get_drop_overflow_update(self):
|
||||||
"""Get the flag whether to drop optimizer update when there is an overflow."""
|
"""
|
||||||
|
Get the flag whether to drop optimizer update when there is an overflow.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool, always return True at `DynamicLossScaleManager`.
|
||||||
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_update_cell(self):
|
def get_update_cell(self):
|
||||||
"Returns the cell for `TrainOneStepWithLossScaleCell`"
|
"""
|
||||||
|
Returns the update cell for `TrainOneStepWithLossScaleCell`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cell, cell object used to update `loss_scale`.
|
||||||
|
"""
|
||||||
return nn.DynamicLossScaleUpdateCell(self.loss_scale, self.scale_factor, self.scale_window)
|
return nn.DynamicLossScaleUpdateCell(self.loss_scale, self.scale_factor, self.scale_window)
|
||||||
|
|
Loading…
Reference in New Issue