!19042 modified the quant API document description
Merge pull request !19042 from Erpim/master
This commit is contained in:
commit
eaacc66601
|
@ -47,8 +47,9 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
|
|||
Config the observer type of weights and data flow with quant params.
|
||||
|
||||
Args:
|
||||
quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element
|
||||
represents weights and second element represents data flow.
|
||||
quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
|
||||
applies to weights and second applies to data flow. Currently, only
|
||||
:class:`FakeQuantWithMinMaxObserver` supported.
|
||||
Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
|
||||
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
|
||||
during train and eval. The first element represents weights and second element represents data flow.
|
||||
|
@ -66,7 +67,7 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
|
|||
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
|
||||
The first element represents weights and the second element represents data flow.
|
||||
Default: (False, False).
|
||||
mode (String): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
|
||||
mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
|
||||
Default: ("DEFAULT").
|
||||
|
||||
Returns:
|
||||
|
@ -545,7 +546,7 @@ class QuantizationAwareTraining(Quantizer):
|
|||
min_init = [-x for x in max_init]
|
||||
return min_init, max_init
|
||||
|
||||
def set_mixed_bits(self, network, strategy):
|
||||
def _set_mixed_bits(self, network, strategy):
|
||||
r"""
|
||||
Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
|
||||
optimize_option.
|
||||
|
@ -563,7 +564,7 @@ class QuantizationAwareTraining(Quantizer):
|
|||
ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
|
||||
"""
|
||||
if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
|
||||
raise ValueError("The `set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
|
||||
raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
|
||||
"optimize_option.")
|
||||
|
||||
self.quantizable_idx = []
|
||||
|
|
|
@ -337,11 +337,8 @@ def query_quant_layers(network):
|
|||
quantization layers are queried before graph compile optimization in the graph mode, thus may be appear some
|
||||
redundant quantized layers, which are not exist in practical execution.
|
||||
|
||||
Input:
|
||||
Args:
|
||||
network (Cell): input network
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
network = Validator.check_isinstance("network", network, nn.Cell)
|
||||
tplt = "{0:60}\t{1:10}"
|
||||
|
@ -359,7 +356,8 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param
|
|||
Args:
|
||||
quant_model(Cell): Quantization model.
|
||||
params_dict(dict): Parameter dict that stores fp32 parameters.
|
||||
quant_new_params(list): Parameters that exist in quantization network but not in non-quantization network.
|
||||
quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
|
||||
network. Default: None.
|
||||
|
||||
Raises:
|
||||
TypeError: If `quant_new_params` is not None and is not list.
|
||||
|
|
|
@ -61,7 +61,7 @@ class Conv2dBnAct(Cell):
|
|||
Initializer and string are the same as 'weight_init'. Refer to the values of
|
||||
Initializer for more details. Default: 'zeros'.
|
||||
has_bn (bool): Specifies to used batchnorm or not. Default: False.
|
||||
momentum (float): Momentum for moving average for batchnorm, must be [0, 1]. Default:0.9
|
||||
momentum (float): Momentum for moving average for batchnorm, must be [0, 1]. Default:0.997
|
||||
eps (float): Term added to the denominator to improve numerical stability for batchnorm, should be greater
|
||||
than 0. Default: 1e-5.
|
||||
activation (Union[str, Cell, Primitive]): Specifies activation type. The optional values are as following:
|
||||
|
@ -69,6 +69,7 @@ class Conv2dBnAct(Cell):
|
|||
'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
|
||||
alpha (float): Slope of the activation function at x < 0 for LeakyReLU. Default: 0.2.
|
||||
after_fake(bool): Determine whether there must be a fake quantization operation after Cond2dBnAct.
|
||||
Default: True.
|
||||
|
||||
Inputs:
|
||||
- **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. The data type is float32.
|
||||
|
@ -170,6 +171,7 @@ class DenseBnAct(Cell):
|
|||
'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
|
||||
alpha (float): Slope of the activation function at x < 0 for LeakyReLU. Default: 0.2.
|
||||
after_fake(bool): Determine whether there must be a fake quantization operation after DenseBnAct.
|
||||
Default: True.
|
||||
|
||||
Inputs:
|
||||
- **x** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. The data type is float32.
|
||||
|
|
|
@ -294,14 +294,14 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
|
|||
|
||||
.. math::
|
||||
|
||||
\frac{\partial \ output}{\partial \ maxq} & = \left\{\begin{matrix}
|
||||
\frac{\partial \ output}{\partial \ maxq} = \left\{\begin{matrix}
|
||||
-\frac{X}{maxq}+\left \lfloor \frac{X}{maxq} \right \rceil \qquad if\quad bound_{lower}< \frac{X}{maxq}< 1\\
|
||||
-1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\le bound_{lower}\\
|
||||
1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\ge 1 \qquad \quad
|
||||
\end{matrix}\right. \\
|
||||
|
||||
bound_{lower}=
|
||||
\end{align}\left\{\begin{matrix}
|
||||
\left\{\begin{matrix}
|
||||
0\qquad \quad if\quad neg\_trunc\\
|
||||
-1\qquad if\quad otherwise
|
||||
\end{matrix}\right.
|
||||
|
@ -336,7 +336,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
|
|||
narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
|
||||
quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
|
||||
neg_trunc (bool): Whether the quantization algorithm uses nagetive truncation or not. Default: False.
|
||||
mode (string): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
|
||||
mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
|
||||
Default: ("DEFAULT")
|
||||
Inputs:
|
||||
- **x** (Tensor) - The input of FakeQuantWithMinMaxObserver. The input dimension is preferably 2D or 4D.
|
||||
|
@ -565,7 +565,7 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
|||
|
||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||
please refer to the implementation of subclass of class:`_Observer`, for example,
|
||||
class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||
:class:`FakeQuantWithMinMaxObserver`.
|
||||
|
||||
.. math::
|
||||
w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma )
|
||||
|
@ -585,11 +585,11 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
|||
stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
|
||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||
padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||
divisible by the number of groups. Default: 1.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False.
|
||||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
|
||||
convolution kernel. Default: 'normal'.
|
||||
|
@ -604,9 +604,10 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
|||
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
|
||||
variance vector. Default: 'ones'.
|
||||
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -802,7 +803,7 @@ class Conv2dBnFoldQuant(Cell):
|
|||
|
||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||
please refer to the implementation of subclass of class:`_Observer`, for example,
|
||||
class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||
:class:`FakeQuantWithMinMaxObserver`.
|
||||
|
||||
.. math::
|
||||
y = x\times w+ b
|
||||
|
@ -823,11 +824,11 @@ class Conv2dBnFoldQuant(Cell):
|
|||
stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
|
||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||
padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||
divisible by the number of groups. Default: 1.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
|
||||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
|
||||
convolution kernel. Default: 'normal'.
|
||||
|
@ -842,9 +843,10 @@ class Conv2dBnFoldQuant(Cell):
|
|||
var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
|
||||
variance vector. Default: 'ones'.
|
||||
fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
|
||||
Default: 100000.
|
||||
|
@ -1059,9 +1061,10 @@ class Conv2dBnWithoutFoldQuant(Cell):
|
|||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
|
||||
Default: 'normal'.
|
||||
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -1202,9 +1205,10 @@ class Conv2dQuant(Cell):
|
|||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
|
||||
Default: 'normal'.
|
||||
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -1339,9 +1343,10 @@ class DenseQuant(Cell):
|
|||
has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
|
||||
activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer,
|
||||
eg. 'relu'. Default: None.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -1464,9 +1469,10 @@ class ActQuant(_QuantActivation):
|
|||
ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
|
||||
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
|
||||
fake_before (bool): Whether add fake quantized operation before activation. Default: False.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -1550,9 +1556,10 @@ class TensorAddQuant(Cell):
|
|||
|
||||
Args:
|
||||
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
@ -1612,9 +1619,10 @@ class MulQuant(Cell):
|
|||
|
||||
Args:
|
||||
ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
|
||||
quant_config (QuantConfig): Configures the oberser types and quant settings of weight and activation. Can be
|
||||
generated by `compression.quant.create_quant_config` method.
|
||||
Default: both set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
|
||||
activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
|
||||
and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
|
||||
Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
|
||||
Inputs:
|
||||
|
|
Loading…
Reference in New Issue