!47439 grad与getgrad文档修改以及告警清理
Merge pull request !47439 from 李良灿/graddocs
This commit is contained in:
commit
11656d153c
|
@ -97,6 +97,7 @@ mindspore
|
|||
|
||||
mindspore.grad
|
||||
mindspore.value_and_grad
|
||||
mindspore.get_grad
|
||||
mindspore.jacfwd
|
||||
mindspore.jacrev
|
||||
mindspore.jvp
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
mindspore.get_grad
|
||||
==================
|
||||
|
||||
.. py:function:: mindspore.get_grad(gradients, identifier)
|
||||
|
||||
根据输入的identifier,在mindspore.grad的return_ids参数为True时返回的gradients中,找到对应的梯度值的函数。
|
||||
|
||||
根据identifier查找梯度值包含以下两种场景:
|
||||
|
||||
1. identifier为指定求导输入位置的索引;
|
||||
2. identifier为网络变量。
|
||||
|
||||
参数:
|
||||
- **gradients** (Union[tuple[int, Tensor], tuple[tuple, tuple]]) - mindspore.grad参数return_ids为True时的返回值。
|
||||
- **identifier** (Union[int, Parameter]) - 指定求导输入位置的索引或者网络变量。
|
||||
|
||||
返回:
|
||||
identifier所指定的求导输入位置的索引所对应的梯度值,或者网络变量所对应的梯度值。
|
||||
|
||||
异常:
|
||||
- **ValueError** - 无法找到identifier所对应的梯度值。
|
||||
- **TypeError** - 入参类型不符合要求。
|
|
@ -1,7 +1,7 @@
|
|||
mindspore.grad
|
||||
==================
|
||||
|
||||
.. py:function:: mindspore.grad(fn, grad_position=0, weights=None, has_aux=False)
|
||||
.. py:function:: mindspore.grad(fn, grad_position=0, weights=None, has_aux=False, return_ids=False)
|
||||
|
||||
生成求导函数,用于计算给定函数的梯度。
|
||||
|
||||
|
@ -16,9 +16,11 @@ mindspore.grad
|
|||
- **grad_position** (Union[NoneType, int, tuple[int]]) - 指定求导输入位置的索引。若为int类型,表示对单个输入求导;若为tuple类型,表示对tuple内索引的位置求导,其中索引从0开始;若是None,表示不对输入求导,这种场景下, `weights` 非None。默认值:0。
|
||||
- **weights** (Union[ParameterTuple, Parameter, list[Parameter]]) - 训练网络中需要返回梯度的网络变量。一般可通过 `weights = net.trainable_params()` 获取。默认值:None。
|
||||
- **has_aux** (bool) - 是否返回辅助参数的标志。若为True, `fn` 输出数量必须超过一个,其中只有 `fn` 第一个输出参与求导,其他输出值将直接返回。默认值:False。
|
||||
- **return_ids** (bool) - 是否返回由返回的梯度和指定求导输入位置的索引或网络变量组成的tuple。 若为True,其输出中所有的梯度值将被替换为:由该梯度和其输入的位置索引,或者用于计算该梯度的网络变量组成的tuple。默认值:False。
|
||||
|
||||
返回:
|
||||
Function,用于计算给定函数的梯度的求导函数。例如 `out1, out2 = fn(*args)` ,若 `has_aux` 为True,梯度函数将返回 `(gradient, out2)` 形式的结果,其中 `out2` 不参与求导,若为False,将直接返回 `gradient` 。
|
||||
若return_ids为True,梯度函数返回的梯度将被替代为由返回的梯度和指定求导输入位置的索引或网络变量组成的tuple。
|
||||
|
||||
异常:
|
||||
- **ValueError** - 入参 `grad_position` 和 `weights` 同时为None。
|
||||
|
|
|
@ -212,6 +212,7 @@ Automatic Differentiation
|
|||
|
||||
mindspore.grad
|
||||
mindspore.value_and_grad
|
||||
mindspore.get_grad
|
||||
mindspore.jacfwd
|
||||
mindspore.jacrev
|
||||
mindspore.jvp
|
||||
|
|
|
@ -386,7 +386,8 @@ void GetGradAbstract(const AbstractBasePtr &grads_abs, const std::string ¶_n
|
|||
}
|
||||
auto abs0 = grad_abs_tuple->elements()[0];
|
||||
if (abs0->isa<AbstractScalar>()) {
|
||||
auto build_value = abs0->cast_ptr<AbstractScalar>()->BuildValue();
|
||||
auto buildptr = abs0->cast_ptr<AbstractScalar>();
|
||||
auto build_value = buildptr->BuildValue();
|
||||
size_t expect_size = 2;
|
||||
if (grad_abs_tuple->elements().size() >= expect_size) {
|
||||
if (build_value->isa<Int64Imm>()) {
|
||||
|
@ -419,7 +420,11 @@ AbstractBasePtr InferImplGetGrad(const AnalysisEnginePtr &, const PrimitivePtr &
|
|||
int64_t position = -1;
|
||||
std::string para_name;
|
||||
if (hash_id_abs->isa<AbstractScalar>()) {
|
||||
auto build_value = hash_id_abs->cast_ptr<AbstractScalar>()->BuildValue();
|
||||
auto buildptr = hash_id_abs->cast_ptr<AbstractScalar>();
|
||||
if (buildptr == nullptr) {
|
||||
MS_EXCEPTION(TypeError) << "For " << op_name << ", the `x` should be an integer or a Parameter, but got nullptr";
|
||||
}
|
||||
auto build_value = buildptr->BuildValue();
|
||||
if (!build_value->isa<Int64Imm>()) {
|
||||
MS_EXCEPTION(TypeError) << "For " << op_name << ", the `x` should be an int64 number, but got "
|
||||
<< build_value->ToString();
|
||||
|
|
|
@ -431,6 +431,45 @@ class _TaylorOperation(TaylorOperation_):
|
|||
return self.grad_fn
|
||||
|
||||
|
||||
def _combine_with_ids(grad_position, weights, out):
|
||||
""" Making resulting tuple, when return_ids is set to True. """
|
||||
out_with_ids = []
|
||||
position = 0
|
||||
position_tuple = []
|
||||
weight_tuple = []
|
||||
if grad_position == (0,) and weights is not None:
|
||||
position_tuple.append(0)
|
||||
position_tuple.append(out[0])
|
||||
elif grad_position == (0,):
|
||||
position_tuple.append(0)
|
||||
position_tuple.append(out)
|
||||
elif weights is not None:
|
||||
for index in grad_position:
|
||||
position_tuple.append((index, out[0][position]))
|
||||
position += 1
|
||||
position = 0
|
||||
else:
|
||||
for index in grad_position:
|
||||
position_tuple.append((index, out[position]))
|
||||
position += 1
|
||||
position = 0
|
||||
out_with_ids.append(tuple(position_tuple))
|
||||
if weights and isinstance(weights, (list, ParameterTuple)):
|
||||
for weight in weights:
|
||||
weight_tuple.append((weight.name, out[1][position]))
|
||||
position += 1
|
||||
out_with_ids.append(tuple(weight_tuple))
|
||||
elif weights:
|
||||
weight_tuple.append(weights.name)
|
||||
weight_tuple.append(out[1])
|
||||
out_with_ids.append(tuple(weight_tuple))
|
||||
else:
|
||||
out_with_ids = position_tuple
|
||||
if not out_with_ids:
|
||||
raise ValueError(f"output tuple should not be a empty tuple.")
|
||||
return tuple(out_with_ids)
|
||||
|
||||
|
||||
class _Grad(GradOperation_):
|
||||
"""
|
||||
A higher-order function which is used to generate the gradient function by position for the input function.
|
||||
|
@ -520,7 +559,7 @@ class _Grad(GradOperation_):
|
|||
out = _pynative_executor()
|
||||
out = _grads_divided_by_device_num_if_recomputation(out)
|
||||
if self.return_ids and out:
|
||||
out = self._combine_with_ids(grad_position, weights, out)
|
||||
out = _combine_with_ids(grad_position, weights, out)
|
||||
if self.get_value:
|
||||
return res, out
|
||||
if self.has_aux:
|
||||
|
@ -550,42 +589,6 @@ class _Grad(GradOperation_):
|
|||
self.grad_hash_id = (grad_position, weights_id)
|
||||
return self.grad_fn
|
||||
|
||||
def _combine_with_ids(self, grad_position, weights, out):
|
||||
""" Making resulting tuple, when return_ids is set to True. """
|
||||
out_with_ids = []
|
||||
j = 0
|
||||
position_tuple = []
|
||||
weight_tuple = []
|
||||
if grad_position == (0,) and weights is not None:
|
||||
position_tuple.append(0)
|
||||
position_tuple.append(out[0])
|
||||
elif grad_position == (0,):
|
||||
position_tuple.append(0)
|
||||
position_tuple.append(out)
|
||||
elif weights is not None:
|
||||
for i in grad_position:
|
||||
position_tuple.append((i, out[0][j]))
|
||||
j += 1
|
||||
j = 0
|
||||
else:
|
||||
for i in grad_position:
|
||||
position_tuple.append((i, out[j]))
|
||||
j += 1
|
||||
j = 0
|
||||
out_with_ids.append(tuple(position_tuple))
|
||||
if weights and isinstance(weights, (list, ParameterTuple)):
|
||||
for weight in weights:
|
||||
weight_tuple.append((weight.name, out[1][j]))
|
||||
j += 1
|
||||
out_with_ids.append(tuple(weight_tuple))
|
||||
elif weights:
|
||||
weight_tuple.append(weights.name)
|
||||
weight_tuple.append(out[1])
|
||||
out_with_ids.append(tuple(weight_tuple))
|
||||
else:
|
||||
out_with_ids = position_tuple
|
||||
return tuple(out_with_ids)
|
||||
|
||||
def _pynative_forward_run(self, fn, grad, args, kwargs):
|
||||
""" Pynative forward runs to build grad graph. """
|
||||
new_kwargs = kwargs
|
||||
|
|
|
@ -115,10 +115,11 @@ def grad(fn, grad_position=0, weights=None, has_aux=False, return_ids=False):
|
|||
has_aux (bool): If True, only the first output of `fn` contributes the gradient of `fn`, while the other outputs
|
||||
will be returned straightly. It means the `fn` must return more than one outputs in this case.
|
||||
Default: False.
|
||||
return_ids(bool): If True, every output gradient will be combined with its position id or parameter name as a
|
||||
tuple. The format of the output will be the same with the output of grad when return_ids is set to false,
|
||||
but every gradient in the output will be replaced by a tuple of position id or parameter name and its
|
||||
gradient.
|
||||
return_ids(bool): Whether return the tuple made by gradients and the index to specify which inputs
|
||||
to be differentiated or the name of parameters of the training network that need to calculate the gradient.
|
||||
If True, the output gradients will be replaced by the tuples made by gradients and the index to specify
|
||||
which inputs to be differentiated or the name of parameters of the training network.
|
||||
Default: False.
|
||||
|
||||
Returns:
|
||||
Function, the gradient function to calculate gradient for the input function or cell.
|
||||
|
@ -357,7 +358,7 @@ def value_and_grad(fn, grad_position=0, weights=None, has_aux=False):
|
|||
return _get_grad_op(True, True, has_aux, True)(fn, weights, grad_position)
|
||||
|
||||
|
||||
def get_grad(gradients, x):
|
||||
def get_grad(gradients, identifier):
|
||||
"""
|
||||
A function to get get expected gradient from the return value of ops.grad, when it has return_ids parameter set
|
||||
to True, by using the position id of a tensor or the parameter.
|
||||
|
@ -370,8 +371,10 @@ def get_grad(gradients, x):
|
|||
the parameter as the second input.
|
||||
|
||||
Args:
|
||||
The return value of ops.grad.
|
||||
position number of a tensor, or a parameter that is used in ops.grad.
|
||||
gradients (Union[tuple[int, Tensor], tuple[tuple, tuple]]): The return value of mindspore.grad when return_ids
|
||||
is set to True.
|
||||
identifier (Union[int, Parameter]): The position number of a tensor, or a parameter that is used in
|
||||
mindspore.grad.
|
||||
|
||||
Returns:
|
||||
The gradient of the tensor on the position of the position number used as the second input, or the gradient
|
||||
|
@ -404,7 +407,7 @@ def get_grad(gradients, x):
|
|||
>>> print(output)
|
||||
Tensor(shape=[2], dtype=Float32, value=[0.00000000e+00, 6.00000000e+00]
|
||||
"""
|
||||
return inner.GetGrad()(gradients, x)
|
||||
return inner.GetGrad()(gradients, identifier)
|
||||
|
||||
|
||||
def _trans_jet_inputs(primals_item, series_item):
|
||||
|
|
Loading…
Reference in New Issue