forked from mindspore-Ecosystem/mindspore
!43675 shield SparseApplyAdagrad Primitive interface
Merge pull request !43675 from 李林杰/1011_fix_some_issues_master
This commit is contained in:
commit
abbceb3f16
|
@ -3,36 +3,5 @@ mindspore.ops.SparseApplyAdagrad
|
||||||
|
|
||||||
.. py:class:: mindspore.ops.SparseApplyAdagrad(lr, update_slots=True, use_locking=False)
|
.. py:class:: mindspore.ops.SparseApplyAdagrad(lr, update_slots=True, use_locking=False)
|
||||||
|
|
||||||
根据Adagrad算法更新相关参数。
|
已弃用
|
||||||
|
|
||||||
.. math::
|
|
||||||
\begin{array}{ll} \\
|
|
||||||
accum += grad * grad \\
|
|
||||||
var -= lr * grad * \frac{1}{\sqrt{accum}}
|
|
||||||
\end{array}
|
|
||||||
|
|
||||||
`var` 、 `accum` 和 `grad` 的输入遵循隐式类型转换规则,使数据类型一致。如果它们具有不同的数据类型,则低精度数据类型将转换为相对最高精度的数据类型。
|
|
||||||
|
|
||||||
参数:
|
|
||||||
- **lr** (float) - 学习率。
|
|
||||||
- **update_slots** (bool) - 如果为True,则将更新 `accum` 。默认值:True。
|
|
||||||
- **use_locking** (bool) - 是否对参数更新加锁保护。默认值:False。
|
|
||||||
|
|
||||||
输入:
|
|
||||||
- **var** (Parameter) - 要更新的变量。为任意维度,其数据类型为float16或float32。
|
|
||||||
- **accum** (Parameter) - 要更新的累积。shape和数据类型必须与 `var` 相同。
|
|
||||||
- **grad** (Tensor) - 梯度,为一个Tensor。shape和数据类型必须与 `var` 相同,且需要满足 :math:`grad.shape[1:] = var.shape[1:] if var.shape > 1`。
|
|
||||||
- **indices** (Tensor) - `var` 和 `accum` 第一维度的索引向量,数据类型为int32,且需要保证 :math:`indices.shape[0] = grad.shape[0]`。
|
|
||||||
|
|
||||||
输出:
|
|
||||||
2个Tensor组成的tuple,更新后的参数。
|
|
||||||
|
|
||||||
- **var** (Tensor) - shape和数据类型与 `var` 相同。
|
|
||||||
- **accum** (Tensor) - shape和数据类型与 `accum` 相同。
|
|
||||||
|
|
||||||
异常:
|
|
||||||
- **TypeError** - 如果 `lr` 不是float类型。
|
|
||||||
- **TypeError** - 如果 `update_slots` 或者 `use_locking` 不是布尔值。
|
|
||||||
- **TypeError** - 如果 `var` 、 `accum` 、 `lr` 或 `grad` 的数据类型既不是float16也不是float32。
|
|
||||||
- **TypeError** - 如果 `indices` 的数据类型不是int32。
|
|
||||||
- **RuntimeError** - 如果 `var` 、 `accum` 和 `grad` 不支持数据类型转换。
|
|
||||||
|
|
|
@ -76,8 +76,8 @@ def get_broadcast_shape(x_shape, y_shape, prim_name, shape_type="", arg_name1="x
|
||||||
raise ValueError(f"For '{prim_name}', {arg_name1}.shape and {arg_name2}.shape need to "
|
raise ValueError(f"For '{prim_name}', {arg_name1}.shape and {arg_name2}.shape need to "
|
||||||
f"broadcast. The value of {arg_name1}.shape[{i}] or {arg_name2}.shape[{i}]"
|
f"broadcast. The value of {arg_name1}.shape[{i}] or {arg_name2}.shape[{i}]"
|
||||||
f" must be 1 or -1 when they are not the same, "
|
f" must be 1 or -1 when they are not the same, "
|
||||||
f"but got {arg_name1}.shape[{i}] = {x_shape} "
|
f"but got {arg_name1}.shape = {x_shape} "
|
||||||
f"and {arg_name2}.shape[{i}] = {y_shape}.")
|
f"and {arg_name2}.shape = {y_shape}.")
|
||||||
|
|
||||||
broadcast_shape_front = y_shape[0: y_len - length] if length == x_len else x_shape[0: x_len - length]
|
broadcast_shape_front = y_shape[0: y_len - length] if length == x_len else x_shape[0: x_len - length]
|
||||||
broadcast_shape = list(broadcast_shape_front) + broadcast_shape_back
|
broadcast_shape = list(broadcast_shape_front) + broadcast_shape_back
|
||||||
|
|
|
@ -5982,71 +5982,8 @@ class ApplyAdagradV2(Primitive):
|
||||||
|
|
||||||
|
|
||||||
class SparseApplyAdagrad(Primitive):
|
class SparseApplyAdagrad(Primitive):
|
||||||
r"""
|
"""
|
||||||
Updates relevant entries according to the adagrad scheme.
|
Deprecated
|
||||||
|
|
||||||
.. math::
|
|
||||||
\begin{array}{ll} \\
|
|
||||||
accum += grad * grad \\
|
|
||||||
var -= lr * grad * (1 / sqrt(accum))
|
|
||||||
\end{array}
|
|
||||||
|
|
||||||
Inputs of `var`, `accum` and `grad` comply with the implicit type conversion rules
|
|
||||||
to make the data types consistent.
|
|
||||||
If they have different data types, the lower priority data type will be converted to
|
|
||||||
the relatively highest priority data type.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
lr (float): Learning rate.
|
|
||||||
update_slots (bool): If `True`, `accum` will be updated. Default: True.
|
|
||||||
use_locking (bool): If true, the `var` and `accum` tensors will be protected from being updated.
|
|
||||||
Default: False.
|
|
||||||
|
|
||||||
Inputs:
|
|
||||||
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
||||||
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
||||||
- **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
|
|
||||||
- **grad** (Tensor) - Gradients has the same data type as `var` and
|
|
||||||
grad.shape[1:] = var.shape[1:] if var.shape > 1.
|
|
||||||
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
|
|
||||||
The type must be int32 and indices.shape[0] = grad.shape[0].
|
|
||||||
|
|
||||||
Outputs:
|
|
||||||
Tuple of 2 tensors, the updated parameters.
|
|
||||||
|
|
||||||
- **var** (Tensor) - The same shape and data type as `var`.
|
|
||||||
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
TypeError: If `lr` is not a float.
|
|
||||||
TypeError: If neither `update_slots` nor `use_locking` is a bool.
|
|
||||||
TypeError: If dtype of `var`, `accum` or `grad` is neither float16 nor float32.
|
|
||||||
TypeError: If dtype of `indices` is not int32.
|
|
||||||
RuntimeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
|
|
||||||
|
|
||||||
|
|
||||||
Supported Platforms:
|
|
||||||
``Ascend`` ``CPU`` ``GPU``
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> class Net(nn.Cell):
|
|
||||||
... def __init__(self):
|
|
||||||
... super(Net, self).__init__()
|
|
||||||
... self.sparse_apply_adagrad = ops.SparseApplyAdagrad(lr=1e-8)
|
|
||||||
... self.var = Parameter(Tensor(np.array([[[0.2]]]).astype(np.float32)), name="var")
|
|
||||||
... self.accum = Parameter(Tensor(np.array([[[0.1]]]).astype(np.float32)), name="accum")
|
|
||||||
... def construct(self, grad, indices):
|
|
||||||
... out = self.sparse_apply_adagrad(self.var, self.accum, grad, indices)
|
|
||||||
... return out
|
|
||||||
...
|
|
||||||
>>> net = Net()
|
|
||||||
>>> grad = Tensor(np.array([[[0.7]]]).astype(np.float32))
|
|
||||||
>>> indices = Tensor([1], mindspore.int32)
|
|
||||||
>>> output = net(grad, indices)
|
|
||||||
>>> print(output)
|
|
||||||
(Tensor(shape=[1, 1, 1], dtype=Float32, value=
|
|
||||||
[[[ 2.00000003e-01]]]), Tensor(shape=[1, 1, 1], dtype=Float32, value=
|
|
||||||
[[[ 1.00000001e-01]]]))
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__mindspore_signature__ = (
|
__mindspore_signature__ = (
|
||||||
|
@ -6056,6 +5993,7 @@ class SparseApplyAdagrad(Primitive):
|
||||||
sig.make_sig('indices', dtype=sig.sig_dtype.T1)
|
sig.make_sig('indices', dtype=sig.sig_dtype.T1)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@deprecated("1.9", "SparseApplyAdagrad", False)
|
||||||
@prim_attr_register
|
@prim_attr_register
|
||||||
def __init__(self, lr, update_slots=True, use_locking=False):
|
def __init__(self, lr, update_slots=True, use_locking=False):
|
||||||
"""Initialize SparseApplyAdagrad."""
|
"""Initialize SparseApplyAdagrad."""
|
||||||
|
|
Loading…
Reference in New Issue