forked from mindspore-Ecosystem/mindspore
!3217 rename operator of sparse optimizer
Merge pull request !3217 from wangnan39/rename_the_operator_of_sparse_optimizer
This commit is contained in:
commit
5d42d00161
|
@ -40,7 +40,7 @@ class SparseApplyAdamCPUKernel : public CPUKernel {
|
||||||
bool use_nesterov_{false};
|
bool use_nesterov_{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyAdam,
|
MS_REG_CPU_KERNEL(FusedSparseAdam,
|
||||||
KernelAttr()
|
KernelAttr()
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
|
|
|
@ -42,19 +42,7 @@ class SparseApplyFtrlCPUKernel : public CPUKernel {
|
||||||
float lr_power_{0};
|
float lr_power_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyFtrl,
|
MS_REG_CPU_KERNEL(FusedSparseFtrl,
|
||||||
KernelAttr()
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeInt32)
|
|
||||||
.AddOutputAttr(kNumberTypeFloat32)
|
|
||||||
.AddOutputAttr(kNumberTypeFloat32)
|
|
||||||
.AddOutputAttr(kNumberTypeFloat32),
|
|
||||||
SparseApplyFtrlCPUKernel);
|
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyFtrlNoReturn,
|
|
||||||
KernelAttr()
|
KernelAttr()
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
|
|
|
@ -40,7 +40,7 @@ class SparseApplyLazyAdamCPUKernel : public CPUKernel {
|
||||||
bool use_nesterov_{false};
|
bool use_nesterov_{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyLazyAdam,
|
MS_REG_CPU_KERNEL(FusedSparseLazyAdam,
|
||||||
KernelAttr()
|
KernelAttr()
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
|
|
|
@ -39,20 +39,7 @@ class SparseApplyProximalAdagradCPUKernel : public CPUKernel {
|
||||||
size_t var_outer_dim_size_{1};
|
size_t var_outer_dim_size_{1};
|
||||||
};
|
};
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyProximalAdagrad,
|
MS_REG_CPU_KERNEL(FusedSparseProximalAdagrad,
|
||||||
KernelAttr()
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
|
||||||
.AddInputAttr(kNumberTypeInt32)
|
|
||||||
.AddOutputAttr(kNumberTypeFloat32)
|
|
||||||
.AddOutputAttr(kNumberTypeFloat32),
|
|
||||||
SparseApplyProximalAdagradCPUKernel);
|
|
||||||
|
|
||||||
MS_REG_CPU_KERNEL(SparseApplyProximalAdagradNoReturn,
|
|
||||||
KernelAttr()
|
KernelAttr()
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
.AddInputAttr(kNumberTypeFloat32)
|
.AddInputAttr(kNumberTypeFloat32)
|
||||||
|
|
|
@ -299,7 +299,7 @@ class Adam(Optimizer):
|
||||||
|
|
||||||
self.hyper_map = C.HyperMap()
|
self.hyper_map = C.HyperMap()
|
||||||
self.opt = P.Adam(use_locking, use_nesterov)
|
self.opt = P.Adam(use_locking, use_nesterov)
|
||||||
self.sparse_opt = P.SparseApplyAdam(use_locking, use_nesterov)
|
self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
|
||||||
|
|
||||||
def construct(self, gradients):
|
def construct(self, gradients):
|
||||||
params = self.parameters
|
params = self.parameters
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
from mindspore.ops import functional as F, composite as C, operations as P
|
from mindspore.ops import functional as F, composite as C, operations as P
|
||||||
from mindspore.common import Tensor
|
from mindspore.common import Tensor
|
||||||
import mindspore.common.dtype as mstype
|
import mindspore.common.dtype as mstype
|
||||||
from mindspore.ops.operations import _inner_ops as inner
|
|
||||||
from mindspore._checkparam import Validator as validator
|
from mindspore._checkparam import Validator as validator
|
||||||
from mindspore._checkparam import Rel
|
from mindspore._checkparam import Rel
|
||||||
from .optimizer import Optimizer, _apply_decay, _grad_scale
|
from .optimizer import Optimizer, _apply_decay, _grad_scale
|
||||||
|
@ -159,7 +158,7 @@ class FTRL(Optimizer):
|
||||||
self.decay_tf = tuple((lambda: True)() for x in self.parameters)
|
self.decay_tf = tuple((lambda: True)() for x in self.parameters)
|
||||||
self.hyper_map = C.HyperMap()
|
self.hyper_map = C.HyperMap()
|
||||||
self.opt = P.ApplyFtrl(use_locking=use_locking)
|
self.opt = P.ApplyFtrl(use_locking=use_locking)
|
||||||
self.sparse_opt = inner.SparseApplyFtrlNoReturn(learning_rate, l1, l2, lr_power, use_locking=use_locking)
|
self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
|
||||||
|
|
||||||
def construct(self, grads):
|
def construct(self, grads):
|
||||||
params = self.parameters
|
params = self.parameters
|
||||||
|
|
|
@ -182,7 +182,7 @@ class LazyAdam(Optimizer):
|
||||||
|
|
||||||
self.hyper_map = C.HyperMap()
|
self.hyper_map = C.HyperMap()
|
||||||
self.opt = P.Adam(use_locking, use_nesterov)
|
self.opt = P.Adam(use_locking, use_nesterov)
|
||||||
self.sparse_opt = P.SparseApplyLazyAdam(use_locking, use_nesterov)
|
self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
|
||||||
|
|
||||||
def construct(self, gradients):
|
def construct(self, gradients):
|
||||||
gradients = self.decay_weight(gradients)
|
gradients = self.decay_weight(gradients)
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
from mindspore.ops import functional as F, composite as C, operations as P
|
from mindspore.ops import functional as F, composite as C, operations as P
|
||||||
from mindspore.common import Tensor
|
from mindspore.common import Tensor
|
||||||
import mindspore.common.dtype as mstype
|
import mindspore.common.dtype as mstype
|
||||||
from mindspore.ops.operations import _inner_ops as inner
|
|
||||||
from mindspore._checkparam import Validator as validator
|
from mindspore._checkparam import Validator as validator
|
||||||
from mindspore._checkparam import Rel
|
from mindspore._checkparam import Rel
|
||||||
from .optimizer import Optimizer
|
from .optimizer import Optimizer
|
||||||
|
@ -101,7 +100,7 @@ class ProximalAdagrad(Optimizer):
|
||||||
self.weight_decay = weight_decay
|
self.weight_decay = weight_decay
|
||||||
self.hyper_map = C.HyperMap()
|
self.hyper_map = C.HyperMap()
|
||||||
self.opt = P.ApplyProximalAdagrad(use_locking=use_locking)
|
self.opt = P.ApplyProximalAdagrad(use_locking=use_locking)
|
||||||
self.sparse_opt = inner.SparseApplyProximalAdagradNoReturn(use_locking=use_locking)
|
self.sparse_opt = P.FusedSparseProximalAdagrad(use_locking=use_locking)
|
||||||
|
|
||||||
def construct(self, grads):
|
def construct(self, grads):
|
||||||
params = self.parameters
|
params = self.parameters
|
||||||
|
|
|
@ -56,7 +56,7 @@ from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AccumulateNV2, AssignAdd, A
|
||||||
|
|
||||||
from .random_ops import (RandomChoiceWithMask, Normal, Gamma, Poisson, UniformInt, UniformReal,
|
from .random_ops import (RandomChoiceWithMask, Normal, Gamma, Poisson, UniformInt, UniformReal,
|
||||||
RandomCategorical, Laplace)
|
RandomCategorical, Laplace)
|
||||||
from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, ApplyMomentum, BatchNorm,
|
from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, ApplyMomentum, BatchNorm,
|
||||||
BiasAdd, Conv2D,
|
BiasAdd, Conv2D,
|
||||||
DepthwiseConv2dNative,
|
DepthwiseConv2dNative,
|
||||||
DropoutDoMask, DropoutGrad, Dropout,
|
DropoutDoMask, DropoutGrad, Dropout,
|
||||||
|
@ -74,6 +74,7 @@ from .nn_ops import (LSTM, SGD, Adam, SparseApplyAdam, SparseApplyLazyAdam, Appl
|
||||||
SparseSoftmaxCrossEntropyWithLogits, Tanh,
|
SparseSoftmaxCrossEntropyWithLogits, Tanh,
|
||||||
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
|
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
|
||||||
ApplyProximalAdagrad, SparseApplyProximalAdagrad, SparseApplyAdagradV2, SparseApplyFtrlV2,
|
ApplyProximalAdagrad, SparseApplyProximalAdagrad, SparseApplyAdagradV2, SparseApplyFtrlV2,
|
||||||
|
FusedSparseFtrl, FusedSparseProximalAdagrad,
|
||||||
ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
|
ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
|
||||||
ApplyAddSign, ApplyPowerSign, ApplyGradientDescent, ApplyProximalGradientDescent,
|
ApplyAddSign, ApplyPowerSign, ApplyGradientDescent, ApplyProximalGradientDescent,
|
||||||
ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK)
|
ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell, InTopK)
|
||||||
|
@ -114,8 +115,8 @@ __all__ = [
|
||||||
'MaxPool',
|
'MaxPool',
|
||||||
'TopK',
|
'TopK',
|
||||||
'Adam',
|
'Adam',
|
||||||
'SparseApplyAdam',
|
'FusedSparseAdam',
|
||||||
'SparseApplyLazyAdam',
|
'FusedSparseLazyAdam',
|
||||||
'Softplus',
|
'Softplus',
|
||||||
'Softmax',
|
'Softmax',
|
||||||
'Softsign',
|
'Softsign',
|
||||||
|
@ -311,8 +312,10 @@ __all__ = [
|
||||||
"SpaceToBatch",
|
"SpaceToBatch",
|
||||||
"SparseApplyFtrl",
|
"SparseApplyFtrl",
|
||||||
"SparseApplyFtrlV2",
|
"SparseApplyFtrlV2",
|
||||||
|
"FusedSparseFtrl",
|
||||||
"ApplyProximalAdagrad",
|
"ApplyProximalAdagrad",
|
||||||
"SparseApplyProximalAdagrad",
|
"SparseApplyProximalAdagrad",
|
||||||
|
"FusedSparseProximalAdagrad",
|
||||||
"ApplyAdaMax",
|
"ApplyAdaMax",
|
||||||
"ApplyAdadelta",
|
"ApplyAdadelta",
|
||||||
"ApplyAdagrad",
|
"ApplyAdagrad",
|
||||||
|
|
|
@ -18,9 +18,6 @@
|
||||||
from ..._checkparam import Rel
|
from ..._checkparam import Rel
|
||||||
from ..._checkparam import Validator as validator
|
from ..._checkparam import Validator as validator
|
||||||
from ...common import dtype as mstype
|
from ...common import dtype as mstype
|
||||||
from ..._c_expression import signature_rw as sig_rw
|
|
||||||
from ..._c_expression import signature_kind as sig_kind
|
|
||||||
from ..._c_expression import signature_dtype as sig_dtype
|
|
||||||
from ..primitive import PrimitiveWithInfer, prim_attr_register
|
from ..primitive import PrimitiveWithInfer, prim_attr_register
|
||||||
|
|
||||||
|
|
||||||
|
@ -394,183 +391,6 @@ class Dequant(PrimitiveWithInfer):
|
||||||
return mstype.float16
|
return mstype.float16
|
||||||
|
|
||||||
|
|
||||||
class SparseApplyFtrlNoReturn(PrimitiveWithInfer):
|
|
||||||
"""
|
|
||||||
Update relevant entries according to the FTRL-proximal scheme.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
lr (float): The learning rate value, must be positive.
|
|
||||||
l1 (float): l1 regularization strength, must be greater than or equal to zero.
|
|
||||||
l2 (float): l2 regularization strength, must be greater than or equal to zero.
|
|
||||||
lr_power (float): Learning rate power controls how the learning rate decreases during training,
|
|
||||||
must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
|
|
||||||
use_locking (bool): Use locks for update operation if True . Default: False.
|
|
||||||
|
|
||||||
Inputs:
|
|
||||||
- **var** (Parameter): The variable to be updated. The data type must be float32.
|
|
||||||
- **accum** (Parameter): The accum to be updated, must be same type and shape as `var`.
|
|
||||||
- **linear** (Parameter): The linear to be updated, must be same type and shape as `var`.
|
|
||||||
- **grad** (Tensor): A tensor of the same type as `var`, for the gradient.
|
|
||||||
- **indices** (Tensor): A vector of indices into the first dimension of `var` and `accum`. The shape
|
|
||||||
of `indices` must be the same as `grad` in first dimension. The type must be int32.
|
|
||||||
|
|
||||||
Outputs:
|
|
||||||
Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
|
||||||
|
|
||||||
- **var** (Tensor) - A Tensor with shape (1,).
|
|
||||||
- **accum** (Tensor) - A Tensor with shape (1,).
|
|
||||||
- **linear** (Tensor) - A Tensor with shape (1,).
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> import mindspore
|
|
||||||
>>> import mindspore.nn as nn
|
|
||||||
>>> import numpy as np
|
|
||||||
>>> from mindspore import Parameter
|
|
||||||
>>> from mindspore import Tensor
|
|
||||||
>>> from mindspore.ops import operations as P
|
|
||||||
>>> class SparseApplyFtrlNet(nn.Cell):
|
|
||||||
>>> def __init__(self):
|
|
||||||
>>> super(SparseApplyFtrlNet, self).__init__()
|
|
||||||
>>> self.sparse_apply_ftrl = P.SparseApplyFtrlV2(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
|
|
||||||
>>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
|
|
||||||
>>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
|
|
||||||
>>> self.linear = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="linear")
|
|
||||||
>>>
|
|
||||||
>>> def construct(self, grad, indices):
|
|
||||||
>>> out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
|
|
||||||
>>> return out
|
|
||||||
>>>
|
|
||||||
>>> net = SparseApplyFtrlNet()
|
|
||||||
>>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
|
|
||||||
>>> indices = Tensor(np.array([0, 1]).astype(np.int32))
|
|
||||||
>>> output = net(grad, indices)
|
|
||||||
"""
|
|
||||||
__mindspore_signature__ = (
|
|
||||||
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('linear', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1)
|
|
||||||
)
|
|
||||||
|
|
||||||
@prim_attr_register
|
|
||||||
def __init__(self, lr, l1, l2, lr_power, use_locking=False):
|
|
||||||
self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
|
|
||||||
outputs=['output'])
|
|
||||||
validator.check_value_type("lr", lr, [float], self.name)
|
|
||||||
validator.check_value_type("l1", l1, [float], self.name)
|
|
||||||
validator.check_value_type("l2", l2, [float], self.name)
|
|
||||||
validator.check_value_type("lr_power", lr_power, [float], self.name)
|
|
||||||
self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name)
|
|
||||||
self.l1 = validator.check_number_range("l1", l1, 0.0, float("inf"), Rel.INC_LEFT, self.name)
|
|
||||||
self.l2 = validator.check_number_range("l2", l2, 0.0, float("inf"), Rel.INC_LEFT, self.name)
|
|
||||||
self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
|
|
||||||
self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
|
|
||||||
self.add_prim_attr('primitive_target', 'CPU')
|
|
||||||
|
|
||||||
def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
|
|
||||||
validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
|
|
||||||
validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
|
|
||||||
if len(var_shape) > 1:
|
|
||||||
validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
|
|
||||||
validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name)
|
|
||||||
validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
|
|
||||||
return [1], [1], [1]
|
|
||||||
|
|
||||||
def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
|
|
||||||
args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
|
|
||||||
"linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
|
|
||||||
validator.check_tensor_type_same(args, [mstype.float32], self.name)
|
|
||||||
validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
|
|
||||||
return var_dtype, accum_dtype, linear_dtype
|
|
||||||
|
|
||||||
|
|
||||||
class SparseApplyProximalAdagradNoReturn(PrimitiveWithInfer):
|
|
||||||
r"""
|
|
||||||
Updates relevant entries according to the proximal adagrad algorithm.
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
accum += grad * grad
|
|
||||||
.. math::
|
|
||||||
\text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}}
|
|
||||||
.. math::
|
|
||||||
var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
use_locking (bool): If True, updating of the var and accum tensors will be protected. Default: False.
|
|
||||||
|
|
||||||
Inputs:
|
|
||||||
- **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
|
|
||||||
- **accum** (Parameter) - Variable tensor to be updated. Has the same dtype as `var`.
|
|
||||||
- **lr** (Tensor): The learning rate value. The data type must be float32.
|
|
||||||
- **l1** (Tensor): l1 regularization strength. The data type must be float32.
|
|
||||||
- **l2** (Tensor): l2 regularization strength. The data type must be float32.
|
|
||||||
- **grad** (Tensor) - A tensor of the same type as `var`, for the gradient. The data type must be float32.
|
|
||||||
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. The data type
|
|
||||||
must be int32.
|
|
||||||
|
|
||||||
Outputs:
|
|
||||||
Tuple of 2 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
|
||||||
|
|
||||||
- **var** (Tensor) - A Tensor with shape (1,).
|
|
||||||
- **accum** (Tensor) - A Tensor with shape (1,).
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> import numpy as np
|
|
||||||
>>> import mindspore.nn as nn
|
|
||||||
>>> from mindspore import Tensor, Parameter
|
|
||||||
>>> from mindspore.ops import operations as P
|
|
||||||
>>> class Net(nn.Cell):
|
|
||||||
>>> def __init__(self):
|
|
||||||
>>> super(Net, self).__init__()
|
|
||||||
>>> self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagradV2()
|
|
||||||
>>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
|
|
||||||
>>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
|
|
||||||
>>> self.lr = Tensor(0.01, mstype.float32)
|
|
||||||
>>> self.l1 = Tensor(0.0, mstype.float32)
|
|
||||||
>>> self.l2 = Tensor(0.0, mstype.float32)
|
|
||||||
>>> def construct(self, grad, indices):
|
|
||||||
>>> out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
|
|
||||||
>>> self.l2, grad, indices)
|
|
||||||
>>> return out
|
|
||||||
>>> net = Net()
|
|
||||||
>>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
|
|
||||||
>>> indices = Tensor(np.array([0, 1]).astype(np.int32))
|
|
||||||
>>> output = net(grad, indices)
|
|
||||||
"""
|
|
||||||
__mindspore_signature__ = (
|
|
||||||
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('l1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('l2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
|
||||||
('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1)
|
|
||||||
)
|
|
||||||
|
|
||||||
@prim_attr_register
|
|
||||||
def __init__(self, use_locking=False):
|
|
||||||
self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
|
|
||||||
outputs=['output'])
|
|
||||||
self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
|
|
||||||
self.add_prim_attr('primitive_target', 'CPU')
|
|
||||||
|
|
||||||
def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape):
|
|
||||||
validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name)
|
|
||||||
return [1], [1]
|
|
||||||
|
|
||||||
def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype, indices_dtype):
|
|
||||||
args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
|
|
||||||
validator.check_tensor_type_same(args, [mstype.float32], self.name)
|
|
||||||
validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, [mstype.float32], self.name)
|
|
||||||
validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, [mstype.float32], self.name)
|
|
||||||
validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, [mstype.float32], self.name)
|
|
||||||
valid_types = [mstype.int16, mstype.int32, mstype.int64,
|
|
||||||
mstype.uint16, mstype.uint32, mstype.uint64]
|
|
||||||
validator.check_tensor_type_same({'indices': indices_dtype}, valid_types, self.name)
|
|
||||||
return var_dtype, accum_dtype
|
|
||||||
|
|
||||||
|
|
||||||
class LinSpace(PrimitiveWithInfer):
|
class LinSpace(PrimitiveWithInfer):
|
||||||
r"""
|
r"""
|
||||||
Generates values in an interval. And return the corresponding interpolation accroding to assist.
|
Generates values in an interval. And return the corresponding interpolation accroding to assist.
|
||||||
|
|
|
@ -2917,7 +2917,7 @@ class Adam(PrimitiveWithInfer):
|
||||||
return var_dtype, m_dtype, v_dtype
|
return var_dtype, m_dtype, v_dtype
|
||||||
|
|
||||||
|
|
||||||
class SparseApplyAdam(PrimitiveWithInfer):
|
class FusedSparseAdam(PrimitiveWithInfer):
|
||||||
r"""
|
r"""
|
||||||
Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
|
Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
|
||||||
algorithm. This operator is used when the gradient is sparse.
|
algorithm. This operator is used when the gradient is sparse.
|
||||||
|
@ -2979,7 +2979,7 @@ class SparseApplyAdam(PrimitiveWithInfer):
|
||||||
>>> class Net(nn.Cell):
|
>>> class Net(nn.Cell):
|
||||||
>>> def __init__(self):
|
>>> def __init__(self):
|
||||||
>>> super(Net, self).__init__()
|
>>> super(Net, self).__init__()
|
||||||
>>> self.sparse_apply_adam = P.SparseApplyAdam()
|
>>> self.sparse_apply_adam = P.FusedSparseAdam()
|
||||||
>>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
|
>>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
|
||||||
>>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
|
>>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
|
||||||
>>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
|
>>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
|
||||||
|
@ -3025,7 +3025,6 @@ class SparseApplyAdam(PrimitiveWithInfer):
|
||||||
self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
|
self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
|
||||||
'epsilon', 'grad', 'indices'],
|
'epsilon', 'grad', 'indices'],
|
||||||
outputs=['var', 'm', 'v'])
|
outputs=['var', 'm', 'v'])
|
||||||
self.add_prim_attr('primitive_target', 'CPU')
|
|
||||||
|
|
||||||
def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
|
def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
|
||||||
beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
|
beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
|
||||||
|
@ -3051,7 +3050,7 @@ class SparseApplyAdam(PrimitiveWithInfer):
|
||||||
return var_dtype, m_dtype, v_dtype
|
return var_dtype, m_dtype, v_dtype
|
||||||
|
|
||||||
|
|
||||||
class SparseApplyLazyAdam(PrimitiveWithInfer):
|
class FusedSparseLazyAdam(PrimitiveWithInfer):
|
||||||
r"""
|
r"""
|
||||||
Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
|
Merge the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
|
||||||
algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
||||||
|
@ -3114,7 +3113,7 @@ class SparseApplyLazyAdam(PrimitiveWithInfer):
|
||||||
>>> class Net(nn.Cell):
|
>>> class Net(nn.Cell):
|
||||||
>>> def __init__(self):
|
>>> def __init__(self):
|
||||||
>>> super(Net, self).__init__()
|
>>> super(Net, self).__init__()
|
||||||
>>> self.sparse_apply_lazyadam = P.SparseApplyLazyAdam()
|
>>> self.sparse_apply_lazyadam = P.FusedSparseLazyAdam()
|
||||||
>>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
|
>>> self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var")
|
||||||
>>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
|
>>> self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m")
|
||||||
>>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
|
>>> self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
|
||||||
|
@ -3160,7 +3159,6 @@ class SparseApplyLazyAdam(PrimitiveWithInfer):
|
||||||
self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
|
self.init_prim_io_names(inputs=['var', 'm', 'v', 'beta1_power', 'beta2_power', 'lr', 'beta1', 'beta2',
|
||||||
'epsilon', 'grad', 'indices'],
|
'epsilon', 'grad', 'indices'],
|
||||||
outputs=['var', 'm', 'v'])
|
outputs=['var', 'm', 'v'])
|
||||||
self.add_prim_attr('primitive_target', 'CPU')
|
|
||||||
|
|
||||||
def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
|
def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, beta2_power_shape, lr_shape,
|
||||||
beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
|
beta1_shape, beta2_shape, epsilon_shape, grad_shape, indices_shape):
|
||||||
|
@ -3187,6 +3185,182 @@ class SparseApplyLazyAdam(PrimitiveWithInfer):
|
||||||
return var_dtype, m_dtype, v_dtype
|
return var_dtype, m_dtype, v_dtype
|
||||||
|
|
||||||
|
|
||||||
|
class FusedSparseFtrl(PrimitiveWithInfer):
|
||||||
|
"""
|
||||||
|
Merge the duplicate value of the gradient and then update relevant entries according to the FTRL-proximal scheme.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr (float): The learning rate value, must be positive.
|
||||||
|
l1 (float): l1 regularization strength, must be greater than or equal to zero.
|
||||||
|
l2 (float): l2 regularization strength, must be greater than or equal to zero.
|
||||||
|
lr_power (float): Learning rate power controls how the learning rate decreases during training,
|
||||||
|
must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
|
||||||
|
use_locking (bool): Use locks for update operation if True . Default: False.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **var** (Parameter): The variable to be updated. The data type must be float32.
|
||||||
|
- **accum** (Parameter): The accum to be updated, must be same type and shape as `var`.
|
||||||
|
- **linear** (Parameter): The linear to be updated, must be same type and shape as `var`.
|
||||||
|
- **grad** (Tensor): A tensor of the same type as `var`, for the gradient.
|
||||||
|
- **indices** (Tensor): A vector of indices into the first dimension of `var` and `accum`. The shape
|
||||||
|
of `indices` must be the same as `grad` in first dimension. The type must be int32.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
||||||
|
|
||||||
|
- **var** (Tensor) - A Tensor with shape (1,).
|
||||||
|
- **accum** (Tensor) - A Tensor with shape (1,).
|
||||||
|
- **linear** (Tensor) - A Tensor with shape (1,).
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> import mindspore
|
||||||
|
>>> import mindspore.nn as nn
|
||||||
|
>>> import numpy as np
|
||||||
|
>>> from mindspore import Parameter
|
||||||
|
>>> from mindspore import Tensor
|
||||||
|
>>> from mindspore.ops import operations as P
|
||||||
|
>>> class SparseApplyFtrlNet(nn.Cell):
|
||||||
|
>>> def __init__(self):
|
||||||
|
>>> super(SparseApplyFtrlNet, self).__init__()
|
||||||
|
>>> self.sparse_apply_ftrl = P.FusedSparseFtrl(lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
|
||||||
|
>>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
|
||||||
|
>>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
|
||||||
|
>>> self.linear = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="linear")
|
||||||
|
>>>
|
||||||
|
>>> def construct(self, grad, indices):
|
||||||
|
>>> out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
|
||||||
|
>>> return out
|
||||||
|
>>>
|
||||||
|
>>> net = SparseApplyFtrlNet()
|
||||||
|
>>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
|
||||||
|
>>> indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||||
|
>>> output = net(grad, indices)
|
||||||
|
"""
|
||||||
|
__mindspore_signature__ = (
|
||||||
|
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('linear', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1)
|
||||||
|
)
|
||||||
|
|
||||||
|
@prim_attr_register
|
||||||
|
def __init__(self, lr, l1, l2, lr_power, use_locking=False):
|
||||||
|
self.init_prim_io_names(inputs=['var', 'accum', 'linear', 'grad', 'indices'],
|
||||||
|
outputs=['output'])
|
||||||
|
validator.check_value_type("lr", lr, [float], self.name)
|
||||||
|
validator.check_value_type("l1", l1, [float], self.name)
|
||||||
|
validator.check_value_type("l2", l2, [float], self.name)
|
||||||
|
validator.check_value_type("lr_power", lr_power, [float], self.name)
|
||||||
|
self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name)
|
||||||
|
self.l1 = validator.check_number_range("l1", l1, 0.0, float("inf"), Rel.INC_LEFT, self.name)
|
||||||
|
self.l2 = validator.check_number_range("l2", l2, 0.0, float("inf"), Rel.INC_LEFT, self.name)
|
||||||
|
self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
|
||||||
|
self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
|
||||||
|
|
||||||
|
def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
|
||||||
|
validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
|
||||||
|
validator.check('var shape', var_shape, 'linear shape', linear_shape, Rel.EQ, self.name)
|
||||||
|
if len(var_shape) > 1:
|
||||||
|
validator.check('var_shape[1:]', var_shape[1:], 'grad_shape[1:]', grad_shape[1:], Rel.EQ, self.name)
|
||||||
|
validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name)
|
||||||
|
validator.check('grad_shape[0]', grad_shape[0], 'indices_shape[0]', indices_shape[0], Rel.EQ, self.name)
|
||||||
|
return [1], [1], [1]
|
||||||
|
|
||||||
|
def infer_dtype(self, var_dtype, accum_dtype, linear_dtype, grad_dtype, indices_dtype):
|
||||||
|
args = {"var_dtype": var_dtype, "accum_dtype": accum_dtype,
|
||||||
|
"linear_dtype": linear_dtype, "grad_dtype": grad_dtype}
|
||||||
|
validator.check_tensor_type_same(args, [mstype.float32], self.name)
|
||||||
|
validator.check_tensor_type_same({"indices_dtype": indices_dtype}, [mstype.int32], self.name)
|
||||||
|
return var_dtype, accum_dtype, linear_dtype
|
||||||
|
|
||||||
|
|
||||||
|
class FusedSparseProximalAdagrad(PrimitiveWithInfer):
|
||||||
|
r"""
|
||||||
|
Merge the duplicate value of the gradient and then Updates relevant entries according to the proximal adagrad
|
||||||
|
algorithm.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
accum += grad * grad
|
||||||
|
.. math::
|
||||||
|
\text{prox_v} = var - lr * grad * \frac{1}{\sqrt{accum}}
|
||||||
|
.. math::
|
||||||
|
var = \frac{sign(\text{prox_v})}{1 + lr * l2} * \max(\left| \text{prox_v} \right| - lr * l1, 0)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
use_locking (bool): If True, updating of the var and accum tensors will be protected. Default: False.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
|
||||||
|
- **accum** (Parameter) - Variable tensor to be updated. Has the same dtype as `var`.
|
||||||
|
- **lr** (Tensor): The learning rate value. The data type must be float32.
|
||||||
|
- **l1** (Tensor): l1 regularization strength. The data type must be float32.
|
||||||
|
- **l2** (Tensor): l2 regularization strength. The data type must be float32.
|
||||||
|
- **grad** (Tensor) - A tensor of the same type as `var`, for the gradient. The data type must be float32.
|
||||||
|
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`. The data type
|
||||||
|
must be int32.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
Tuple of 2 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
||||||
|
|
||||||
|
- **var** (Tensor) - A Tensor with shape (1,).
|
||||||
|
- **accum** (Tensor) - A Tensor with shape (1,).
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> import numpy as np
|
||||||
|
>>> import mindspore.nn as nn
|
||||||
|
>>> from mindspore import Tensor, Parameter
|
||||||
|
>>> from mindspore.ops import operations as P
|
||||||
|
>>> class Net(nn.Cell):
|
||||||
|
>>> def __init__(self):
|
||||||
|
>>> super(Net, self).__init__()
|
||||||
|
>>> self.sparse_apply_proximal_adagrad = P.FusedSparseProximalAdagrad()
|
||||||
|
>>> self.var = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="var")
|
||||||
|
>>> self.accum = Parameter(Tensor(np.random.rand(3, 1, 2).astype(np.float32)), name="accum")
|
||||||
|
>>> self.lr = Tensor(0.01, mstype.float32)
|
||||||
|
>>> self.l1 = Tensor(0.0, mstype.float32)
|
||||||
|
>>> self.l2 = Tensor(0.0, mstype.float32)
|
||||||
|
>>> def construct(self, grad, indices):
|
||||||
|
>>> out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1,
|
||||||
|
>>> self.l2, grad, indices)
|
||||||
|
>>> return out
|
||||||
|
>>> net = Net()
|
||||||
|
>>> grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
|
||||||
|
>>> indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||||
|
>>> output = net(grad, indices)
|
||||||
|
"""
|
||||||
|
__mindspore_signature__ = (
|
||||||
|
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('l1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('l2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
|
||||||
|
('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T1)
|
||||||
|
)
|
||||||
|
|
||||||
|
@prim_attr_register
|
||||||
|
def __init__(self, use_locking=False):
|
||||||
|
self.init_prim_io_names(inputs=['var', 'accum', 'lr', 'l1', 'l2', 'grad', 'indices'],
|
||||||
|
outputs=['output'])
|
||||||
|
self.use_locking = validator.check_value_type("use_locking", use_locking, [bool], self.name)
|
||||||
|
|
||||||
|
def infer_shape(self, var_shape, accum_shape, lr_shape, l1_shape, l2_shape, grad_shape, indices_shape):
|
||||||
|
validator.check_integer("indices rank", len(indices_shape), 1, Rel.EQ, self.name)
|
||||||
|
return [1], [1]
|
||||||
|
|
||||||
|
def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, l1_dtype, l2_dtype, grad_dtype, indices_dtype):
|
||||||
|
args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
|
||||||
|
validator.check_tensor_type_same(args, [mstype.float32], self.name)
|
||||||
|
validator.check_scalar_or_tensor_type_same({"lr": lr_dtype}, [mstype.float32], self.name)
|
||||||
|
validator.check_scalar_or_tensor_type_same({"l1": l1_dtype}, [mstype.float32], self.name)
|
||||||
|
validator.check_scalar_or_tensor_type_same({"l2": l2_dtype}, [mstype.float32], self.name)
|
||||||
|
valid_types = [mstype.int16, mstype.int32, mstype.int64,
|
||||||
|
mstype.uint16, mstype.uint32, mstype.uint64]
|
||||||
|
validator.check_tensor_type_same({'indices': indices_dtype}, valid_types, self.name)
|
||||||
|
return var_dtype, accum_dtype
|
||||||
|
|
||||||
|
|
||||||
class BinaryCrossEntropy(PrimitiveWithInfer):
|
class BinaryCrossEntropy(PrimitiveWithInfer):
|
||||||
r"""
|
r"""
|
||||||
Computes the Binary Cross Entropy between the target and the output.
|
Computes the Binary Cross Entropy between the target and the output.
|
||||||
|
|
|
@ -33,7 +33,7 @@ epsilon = 1e-8
|
||||||
class Net(nn.Cell):
|
class Net(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Net, self).__init__()
|
super(Net, self).__init__()
|
||||||
self.sparse_apply_adam = P.SparseApplyAdam()
|
self.sparse_apply_adam = P.FusedSparseAdam()
|
||||||
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
||||||
self.m = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="m")
|
self.m = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="m")
|
||||||
self.v = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="v")
|
self.v = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="v")
|
||||||
|
|
|
@ -26,7 +26,7 @@ import mindspore.common.dtype as mstype
|
||||||
class Net(nn.Cell):
|
class Net(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Net, self).__init__()
|
super(Net, self).__init__()
|
||||||
self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=0.001, l1=0.0, l2=0.0, lr_power=-0.5)
|
self.sparse_apply_ftrl = P.FusedSparseFtrl(lr=0.001, l1=0.0, l2=0.0, lr_power=-0.5)
|
||||||
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
||||||
self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="accum")
|
self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="accum")
|
||||||
self.linear = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="linear")
|
self.linear = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="linear")
|
||||||
|
|
|
@ -26,7 +26,7 @@ import mindspore.common.dtype as mstype
|
||||||
class Net(nn.Cell):
|
class Net(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Net, self).__init__()
|
super(Net, self).__init__()
|
||||||
self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
|
self.sparse_apply_proximal_adagrad = P.FusedSparseProximalAdagrad()
|
||||||
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="var")
|
||||||
self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="accum")
|
self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float32)), name="accum")
|
||||||
self.lr = 0.01
|
self.lr = 0.01
|
||||||
|
|
Loading…
Reference in New Issue