!27052 fix new initializer and optimizer

Merge pull request !27052 from wanyiming/fix_init_opt
This commit is contained in:
i-robot 2021-12-03 09:25:50 +00:00 committed by Gitee
commit 5bfb306f77
3 changed files with 20 additions and 18 deletions

View File

@ -434,6 +434,7 @@ class Sparse(Initializer):
_assignment(arr, data) _assignment(arr, data)
@_register()
class Dirac(Initializer): class Dirac(Initializer):
"""Initialize input tensor with the Dirac delta function. It tries to preserves the identity of """Initialize input tensor with the Dirac delta function. It tries to preserves the identity of
input for convolution layers. For group convolution, each group of channels will be preserved respectively. input for convolution layers. For group convolution, each group of channels will be preserved respectively.
@ -442,8 +443,8 @@ class Dirac(Initializer):
groups (int): The number of group in convolution layer. Default: 1. groups (int): The number of group in convolution layer. Default: 1.
Raises: Raises:
ValueError: If the value of group is not in [3, 4, 5] or the first dimension of the initialized ValueError: If the value of group is not in [3, 4, 5].
tensor cannot be divisible by group. ValueError: The first dimension of the initialized tensor cannot be divisible by group.
Examples: Examples:
>>> import mindspore >>> import mindspore
@ -545,7 +546,7 @@ class VarianceScaling(Initializer):
distribution(str): The type of distribution chose to sample values. Default: 'truncated_normal'. distribution(str): The type of distribution chose to sample values. Default: 'truncated_normal'.
Raises: Raises:
ValueError: If scale is not greater than 0.. ValueError: If scale is not greater than 0.
ValueError: If mode is not fan_in, fan_out or fan_avg. ValueError: If mode is not fan_in, fan_out or fan_avg.
ValueError: If distribution is not uniform, truncated_normal or untruncated_normal. ValueError: If distribution is not uniform, truncated_normal or untruncated_normal.
@ -553,7 +554,7 @@ class VarianceScaling(Initializer):
>>> import mindspore >>> import mindspore
>>> from mindspore.common.initializer import initializer, VarianceScaling >>> from mindspore.common.initializer import initializer, VarianceScaling
>>> tensor1 = initializer(VarianceScaling(scale=1.0, mode='fan_out', >>> tensor1 = initializer(VarianceScaling(scale=1.0, mode='fan_out',
>>> distribution='untruncated_normal'), [2, 3], mindspore.float32) ... distribution='untruncated_normal'), [2, 3], mindspore.float32)
>>> tensor2 = initializer('varianceScaling', [2, 3], mindspore.float32) >>> tensor2 = initializer('varianceScaling', [2, 3], mindspore.float32)
""" """
def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal'): def __init__(self, scale=1.0, mode='fan_in', distribution='truncated_normal'):

View File

@ -52,8 +52,8 @@ class ASGD(Optimizer):
Args: Args:
params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the
`parameters` is a list of `dict`, the "params", "lr", "weight_decay", "grad_centralization" and `parameters` is a list of `dict`, the "params", "lr", "weight_decay", "grad_centralization" and
"order_params" are the keys can be parsed. "order_params" are the keys can be parsed.
- params: Required. Parameters in current group. The value must be a list of `Parameter`. - params: Required. Parameters in current group. The value must be a list of `Parameter`.
@ -90,7 +90,7 @@ class ASGD(Optimizer):
lambd (float): The decay term. Default: 1e-4. lambd (float): The decay term. Default: 1e-4.
alpha (float): The power for eta update. Default: 0.75. alpha (float): The power for eta update. Default: 0.75.
t0 (float): The point of starting averaging. Default: 1e6. t0 (float): The point of starting averaging. Default: 1e6.
weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0. weight_decay (int, float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
Inputs: Inputs:
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@ -168,6 +168,7 @@ class ASGD(Optimizer):
for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, self.parameters, self.mu, self.eta, self.ax)): for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, self.parameters, self.mu, self.eta, self.ax)):
lr = lrs[index] if self.is_group_lr else lrs lr = lrs[index] if self.is_group_lr else lrs
lr = self.squeeze(lr)
if self.step == 1.: if self.step == 1.:
self.assign(eta, lr) self.assign(eta, lr)

View File

@ -34,13 +34,13 @@ class Rprop(Optimizer):
.. math:: .. math::
\begin{gather*} \begin{gather*}
&\hspace{0mm} \textbf{if} \: g_{t-1} g_t > 0 \\ &\hspace{-10mm} \textbf{if} \: g_{t-1} g_t > 0 \\
&\hspace{5mm} \Delta_t \leftarrow \mathrm{min}(\Delta_{t-1} \eta_{+}, \Delta_{max}) \\ &\hspace{25mm} \Delta_t \leftarrow \mathrm{min}(\Delta_{t-1} \eta_{+}, \Delta_{max}) \\
&\hspace{0mm} \textbf{else if} \: g_{t-1} g_t < 0 \\ &\hspace{0mm} \textbf{else if} \: g_{t-1} g_t < 0 \\
&\hspace{5mm} \Delta_t \leftarrow \mathrm{max}(\Delta_{t-1} \eta_{-}, \Delta_{min}) \\ &\hspace{25mm} \Delta_t \leftarrow \mathrm{max}(\Delta_{t-1} \eta_{-}, \Delta_{min}) \\
&\hspace{mm} \textbf{else} \: \\ &\hspace{-25mm} \textbf{else} \: \\
&\hspace{5mm} \Delta_t \leftarrow \Delta_{t-1} \\ &\hspace{-5mm} \Delta_t \leftarrow \Delta_{t-1} \\
&\hspace{0mm} w_{t} \leftarrow w_{t-1}- \Delta_{t} \mathrm{sign}(g_t) \\ &\hspace{15mm} w_{t} \leftarrow w_{t-1}- \Delta_{t} \mathrm{sign}(g_t) \\
\end{gather*} \end{gather*}
:math:`\Delta_{min/max}` represents the min/max step size, :math:`\eta_{+/-}` represents the factors of :math:`\Delta_{min/max}` represents the min/max step size, :math:`\eta_{+/-}` represents the factors of
@ -53,8 +53,8 @@ class Rprop(Optimizer):
Args: Args:
params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the
`parameters` is a list of `dict`, the "params", "lr", "weight_decay", "grad_centralization" and `parameters` is a list of `dict`, the "params", "lr", "weight_decay", "grad_centralization" and
"order_params" are the keys can be parsed. "order_params" are the keys can be parsed.
- params: Required. Parameters in current group. The value must be a list of `Parameter`. - params: Required. Parameters in current group. The value must be a list of `Parameter`.
@ -91,7 +91,7 @@ class Rprop(Optimizer):
etas (tuple[float, float]): The factor of multiplicative increasing or etas (tuple[float, float]): The factor of multiplicative increasing or
descreasing(etaminus, etaplus). descreasing(etaminus, etaplus).
step_sizes(tuple[float, float]): The allowed minimal and maximal step size(min_step_sizes, max_step_size). step_sizes(tuple[float, float]): The allowed minimal and maximal step size(min_step_sizes, max_step_size).
weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0. weight_decay (int, float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
Inputs: Inputs:
- **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`. - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@ -104,7 +104,7 @@ class Rprop(Optimizer):
TypeError: If element of `parameters` is neither Parameter nor dict. TypeError: If element of `parameters` is neither Parameter nor dict.
TypeError: If `step_sizes` or `etas` is not a tuple. TypeError: If `step_sizes` or `etas` is not a tuple.
ValueError: If maximal step size is less than minimal step size. ValueError: If maximal step size is less than minimal step size.
ValueError: If the length of `step_sizes` or `ets` is not equal to 2. ValueError: If the length of `step_sizes` or `etas` is not equal to 2.
TypeError: If the element in `etas` or `step_sizes` is not a float. TypeError: If the element in `etas` or `step_sizes` is not a float.
ValueError: If `etaminus` is not in the range of (0, 1) or `etaplus` is not greater than 1. ValueError: If `etaminus` is not in the range of (0, 1) or `etaplus` is not greater than 1.
TypeError: If `weight_decay` is neither float nor int. TypeError: If `weight_decay` is neither float nor int.
@ -136,7 +136,7 @@ class Rprop(Optimizer):
""" """
@opt_init_args_register @opt_init_args_register
def __init__(self, params, learning_rate=0.1, etas=(0.5, 1.2), step_sizes=(1e-6, 50.), weight_decay=0.1): def __init__(self, params, learning_rate=0.1, etas=(0.5, 1.2), step_sizes=(1e-6, 50.), weight_decay=0.):
super(Rprop, self).__init__(learning_rate, params, weight_decay) super(Rprop, self).__init__(learning_rate, params, weight_decay)
if not isinstance(etas, tuple): if not isinstance(etas, tuple):