!19196 fix print format error in nn.layer

Merge pull request !19196 from wangnan39/master
This commit is contained in:
i-robot 2021-07-01 11:20:01 +00:00 committed by Gitee
commit 9b531e7877
6 changed files with 39 additions and 36 deletions

View File

@ -264,8 +264,8 @@ class Conv2d(_Conv):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={}, ' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}, format={}'.format( 'weight_init={}, bias_init={}, format={}'.format(
self.in_channels, self.in_channels,
@ -456,9 +456,9 @@ class Conv1d(_Conv):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={},' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}'.format( 'weight_init={}, bias_init={}'.format(
self.in_channels, self.in_channels,
self.out_channels, self.out_channels,
@ -639,9 +639,9 @@ class Conv3d(_Conv):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={}' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}, format={}'.format( 'weight_init={}, bias_init={}, format={}'.format(
self.in_channels, self.in_channels,
self.out_channels, self.out_channels,
@ -816,9 +816,9 @@ class Conv3dTranspose(_Conv):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={},' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}'.format(self.in_channels, 'weight_init={}, bias_init={}'.format(self.in_channels,
self.out_channels, self.out_channels,
self.kernel_size, self.kernel_size,
@ -1018,9 +1018,9 @@ class Conv2dTranspose(_Conv):
return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={},' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}'.format(self.in_channels, 'weight_init={}, bias_init={}'.format(self.in_channels,
self.out_channels, self.out_channels,
self.kernel_size, self.kernel_size,
@ -1207,9 +1207,9 @@ class Conv1dTranspose(_Conv):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' \ s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
'stride={}, pad_mode={}, padding={}, dilation={}, ' \ 'stride={}, pad_mode={}, padding={}, dilation={}, ' \
'group={}, has_bias={},' \ 'group={}, has_bias={}, ' \
'weight_init={}, bias_init={}'.format(self.in_channels, 'weight_init={}, bias_init={}'.format(self.in_channels,
self.out_channels, self.out_channels,
self.kernel_size, self.kernel_size,

View File

@ -512,8 +512,8 @@ class Conv2dThor(_ConvThor):
return output return output
def extend_repr(self): def extend_repr(self):
s = 'input_channels={}, output_channels={}, kernel_size={},' 'stride={}, ' \ s = 'input_channels={}, output_channels={}, kernel_size={}, stride={}, ' \
'pad_mode={}, padding={}, dilation={}, ' 'group={}, has_bias={},' \ 'pad_mode={}, padding={}, dilation={}, group={}, has_bias={}, ' \
'weight_init={}, bias_init={}'.format(self.in_channels, self.out_channels, self.kernel_size, 'weight_init={}, bias_init={}'.format(self.in_channels, self.out_channels, self.kernel_size,
self.stride, self.pad_mode, self.padding, self.dilation, self.stride, self.pad_mode, self.padding, self.dilation,
self.group, self.has_bias, self.weight_init, self.bias_init) self.group, self.has_bias, self.weight_init, self.bias_init)

View File

@ -198,13 +198,14 @@ class Adam(Optimizer):
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`. `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
:math:`\epsilon` represents `eps`.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the
@ -380,9 +381,9 @@ class AdamWeightDecay(Optimizer):
update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\ update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\
update = update =
\begin{cases} \begin{cases}
update + \weight\_decay * w_{t} update + weight\_decay * w_{t}
& \text{ if } \weight\_decay > 0 \\ & \text{ if } weight\_decay > 0 \\
\update update
& \text{ otherwise } & \text{ otherwise }
\end{cases} \\ \end{cases} \\
w_{t+1} = w_{t} - lr * update w_{t+1} = w_{t} - lr * update
@ -515,13 +516,14 @@ class AdamOffload(Optimizer):
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`. `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
:math:`\epsilon` represents `eps`.
Note: Note:
This optimizer only supports `GRAPH_MODE` currently. This optimizer only supports `GRAPH_MODE` currently.

View File

@ -117,13 +117,14 @@ class LazyAdam(Optimizer):
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\ m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\ v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\ l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps} w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
\end{array} \end{array}
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`, :math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent :math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent `beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`. `beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
:math:`\epsilon` represents `eps`.
Note: Note:
When separating parameter groups, the weight decay in each group will be applied on the parameters if the When separating parameter groups, the weight decay in each group will be applied on the parameters if the

View File

@ -108,7 +108,7 @@ class _ConvVariational(_Conv):
return outputs return outputs
def extend_repr(self): def extend_repr(self):
s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, pad_mode={}, ' \ s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, pad_mode={}, ' \
'padding={}, dilation={}, group={}, weight_mean={}, weight_std={}, has_bias={}' \ 'padding={}, dilation={}, group={}, weight_mean={}, weight_std={}, has_bias={}' \
.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, self.padding, .format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, self.padding,
self.dilation, self.group, self.weight_posterior.mean, self.weight_posterior.untransformed_std, self.dilation, self.group, self.weight_posterior.mean, self.weight_posterior.untransformed_std,

View File

@ -342,7 +342,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
this function again to make modification, and sens needs to be of type Tensor. this function again to make modification, and sens needs to be of type Tensor.
Inputs: Inputs:
- **sens**(Tensor) - The new sense whose shape and type are the same with original `scale_sense`. - **sens** (Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
""" """
if self.scale_sense and isinstance(sens, Tensor): if self.scale_sense and isinstance(sens, Tensor):
self.scale_sense.set_data(sens) self.scale_sense.set_data(sens)
@ -360,11 +360,11 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
Inputs: Inputs:
- **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order - **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
of overflow state clearing and prior processions. It makes sure that the function 'start_overflow' of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
clears status after finishing the process of precondition. clears status after finishing the process of precondition.
- **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a - **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a
certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
cleared before executing the computation. cleared before executing the computation.
Outputs: Outputs:
Tuple[object, object], the first value is False for GPU backend, while it is a instance of Tuple[object, object], the first value is False for GPU backend, while it is a instance of
@ -391,8 +391,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
Inputs: Inputs:
- **status** (object) - A status instance used to detect the overflow. - **status** (object) - A status instance used to detect the overflow.
- **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output` - **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output`
as the output of the computation, to ensure overflow status is acquired before executing the as the output of the computation, to ensure overflow status is acquired before executing the
computation. computation.
Outputs: Outputs:
bool, whether the overflow occurs or not. bool, whether the overflow occurs or not.