!19196 fix print format error in nn.layer
Merge pull request !19196 from wangnan39/master
This commit is contained in:
commit
9b531e7877
|
@ -264,8 +264,8 @@ class Conv2d(_Conv):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}, format={}'.format(
|
||||
self.in_channels,
|
||||
|
@ -456,9 +456,9 @@ class Conv1d(_Conv):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={},' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}'.format(
|
||||
self.in_channels,
|
||||
self.out_channels,
|
||||
|
@ -639,9 +639,9 @@ class Conv3d(_Conv):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}, format={}'.format(
|
||||
self.in_channels,
|
||||
self.out_channels,
|
||||
|
@ -816,9 +816,9 @@ class Conv3dTranspose(_Conv):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={},' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}'.format(self.in_channels,
|
||||
self.out_channels,
|
||||
self.kernel_size,
|
||||
|
@ -1018,9 +1018,9 @@ class Conv2dTranspose(_Conv):
|
|||
return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out))
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={},' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}'.format(self.in_channels,
|
||||
self.out_channels,
|
||||
self.kernel_size,
|
||||
|
@ -1207,9 +1207,9 @@ class Conv1dTranspose(_Conv):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={},' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, ' \
|
||||
'stride={}, pad_mode={}, padding={}, dilation={}, ' \
|
||||
'group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}'.format(self.in_channels,
|
||||
self.out_channels,
|
||||
self.kernel_size,
|
||||
|
|
|
@ -512,8 +512,8 @@ class Conv2dThor(_ConvThor):
|
|||
return output
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={},' 'stride={}, ' \
|
||||
'pad_mode={}, padding={}, dilation={}, ' 'group={}, has_bias={},' \
|
||||
s = 'input_channels={}, output_channels={}, kernel_size={}, stride={}, ' \
|
||||
'pad_mode={}, padding={}, dilation={}, group={}, has_bias={}, ' \
|
||||
'weight_init={}, bias_init={}'.format(self.in_channels, self.out_channels, self.kernel_size,
|
||||
self.stride, self.pad_mode, self.padding, self.dilation,
|
||||
self.group, self.has_bias, self.weight_init, self.bias_init)
|
||||
|
|
|
@ -198,13 +198,14 @@ class Adam(Optimizer):
|
|||
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
|
||||
\end{array}
|
||||
|
||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
||||
:math:`\epsilon` represents `eps`.
|
||||
|
||||
Note:
|
||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||
|
@ -380,9 +381,9 @@ class AdamWeightDecay(Optimizer):
|
|||
update = \frac{m_{t+1}}{\sqrt{v_{t+1}} + eps} \\
|
||||
update =
|
||||
\begin{cases}
|
||||
update + \weight\_decay * w_{t}
|
||||
& \text{ if } \weight\_decay > 0 \\
|
||||
\update
|
||||
update + weight\_decay * w_{t}
|
||||
& \text{ if } weight\_decay > 0 \\
|
||||
update
|
||||
& \text{ otherwise }
|
||||
\end{cases} \\
|
||||
w_{t+1} = w_{t} - lr * update
|
||||
|
@ -515,13 +516,14 @@ class AdamOffload(Optimizer):
|
|||
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
|
||||
\end{array}
|
||||
|
||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
||||
:math:`\epsilon` represents `eps`.
|
||||
|
||||
Note:
|
||||
This optimizer only supports `GRAPH_MODE` currently.
|
||||
|
|
|
@ -117,13 +117,14 @@ class LazyAdam(Optimizer):
|
|||
m_{t+1} = \beta_1 * m_{t} + (1 - \beta_1) * g \\
|
||||
v_{t+1} = \beta_2 * v_{t} + (1 - \beta_2) * g * g \\
|
||||
l = \alpha * \frac{\sqrt{1-\beta_2^t}}{1-\beta_1^t} \\
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \eps}
|
||||
w_{t+1} = w_{t} - l * \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon}
|
||||
\end{array}
|
||||
|
||||
:math:`m` represents the 1st moment vector `moment1`, :math:`v` represents the 2nd moment vector `moment2`,
|
||||
:math:`g` represents `gradients`, :math:`l` represents scaling factor, :math:`\beta_1, \beta_2` represent
|
||||
`beta1` and `beta2`, :math:`t` represents updating step while :math:`beta_1^t` and :math:`beta_2^t` represent
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`.
|
||||
`beta1_power` and `beta2_power`, :math:`\alpha` represents `learning_rate`, :math:`w` represents `params`,
|
||||
:math:`\epsilon` represents `eps`.
|
||||
|
||||
Note:
|
||||
When separating parameter groups, the weight decay in each group will be applied on the parameters if the
|
||||
|
|
|
@ -108,7 +108,7 @@ class _ConvVariational(_Conv):
|
|||
return outputs
|
||||
|
||||
def extend_repr(self):
|
||||
s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, pad_mode={}, ' \
|
||||
s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, pad_mode={}, ' \
|
||||
'padding={}, dilation={}, group={}, weight_mean={}, weight_std={}, has_bias={}' \
|
||||
.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, self.padding,
|
||||
self.dilation, self.group, self.weight_posterior.mean, self.weight_posterior.untransformed_std,
|
||||
|
|
|
@ -342,7 +342,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|||
this function again to make modification, and sens needs to be of type Tensor.
|
||||
|
||||
Inputs:
|
||||
- **sens**(Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
|
||||
- **sens** (Tensor) - The new sense whose shape and type are the same with original `scale_sense`.
|
||||
"""
|
||||
if self.scale_sense and isinstance(sens, Tensor):
|
||||
self.scale_sense.set_data(sens)
|
||||
|
@ -360,11 +360,11 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|||
|
||||
Inputs:
|
||||
- **pre_cond** (Tensor) - A precondition for starting overflow detection. It determines the executing order
|
||||
of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
|
||||
clears status after finishing the process of precondition.
|
||||
of overflow state clearing and prior processions. It makes sure that the function 'start_overflow'
|
||||
clears status after finishing the process of precondition.
|
||||
- **compute_input** (object) - The input of subsequent process. Overflow detection should be performed on a
|
||||
certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
|
||||
cleared before executing the computation.
|
||||
certain computation. Set `compute_input` as the input of the computation, to ensure overflow status is
|
||||
cleared before executing the computation.
|
||||
|
||||
Outputs:
|
||||
Tuple[object, object], the first value is False for GPU backend, while it is a instance of
|
||||
|
@ -391,8 +391,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
|
|||
Inputs:
|
||||
- **status** (object) - A status instance used to detect the overflow.
|
||||
- **compute_output** - Overflow detection should be performed on a certain computation. Set `compute_output`
|
||||
as the output of the computation, to ensure overflow status is acquired before executing the
|
||||
computation.
|
||||
as the output of the computation, to ensure overflow status is acquired before executing the
|
||||
computation.
|
||||
|
||||
Outputs:
|
||||
bool, whether the overflow occurs or not.
|
||||
|
|
Loading…
Reference in New Issue