forked from mindspore-Ecosystem/mindspore
All the descriptions of batch normal under the mindspore folder have been uniformly updated to 'Batch Normalization'
This commit is contained in:
parent
4631facee9
commit
865cf68243
|
@ -51,12 +51,12 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
auto prop_kind = dnnl::prop_kind::forward_training;
|
||||
auto normalization_flags = dnnl::normalization_flags::use_scale_shift;
|
||||
|
||||
// fused batch normalization forward description
|
||||
// fused Batch Normalization forward description
|
||||
dnnl::batch_normalization_forward::desc desc =
|
||||
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
|
||||
auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
|
||||
// fused batch normalization backward description
|
||||
// fused Batch Normalization backward description
|
||||
dnnl::batch_normalization_backward::desc backward_desc =
|
||||
dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
|
||||
auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
|
||||
|
|
|
@ -33,7 +33,7 @@ enum OpMergeMode {
|
|||
OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list
|
||||
OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
|
||||
OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
|
||||
OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization`
|
||||
OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization`
|
||||
OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
|
||||
};
|
||||
|
||||
|
|
|
@ -339,7 +339,7 @@ class QuantizationAwareTraining(Quantizer):
|
|||
quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype,
|
||||
fake=True)
|
||||
# change original network BatchNormal OP parameters to quant network
|
||||
# change original network Batch Normalization OP parameters to quant network
|
||||
conv_inner.gamma = subcell.batchnorm.gamma
|
||||
conv_inner.beta = subcell.batchnorm.beta
|
||||
conv_inner.moving_mean = subcell.batchnorm.moving_mean
|
||||
|
@ -363,7 +363,7 @@ class QuantizationAwareTraining(Quantizer):
|
|||
bias_init=conv_inner.bias_init,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype)
|
||||
# change original network BatchNormal OP parameters to quant network
|
||||
# change original network Batch Normalization OP parameters to quant network
|
||||
conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
|
||||
conv_inner.batchnorm.beta = subcell.batchnorm.beta
|
||||
conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""less batch normalization"""
|
||||
"""less Batch Normalization"""
|
||||
import numpy as np
|
||||
from mindspore import nn
|
||||
from mindspore.ops import operations as P
|
||||
|
|
|
@ -233,7 +233,7 @@ def _shape_check_bn(in_shape, in_dims):
|
|||
|
||||
@constexpr
|
||||
def _shape_infer(x_shape, num_feature):
|
||||
"""global batch normalization shape and axes infer"""
|
||||
"""global Batch Normalization shape and axes infer"""
|
||||
if len(x_shape) == 4:
|
||||
axes = (0, 2, 3)
|
||||
re_shape = (1, num_feature, 1, 1)
|
||||
|
@ -245,7 +245,7 @@ def _shape_infer(x_shape, num_feature):
|
|||
|
||||
class BatchNorm1d(_BatchNorm):
|
||||
r"""
|
||||
Batch normalization layer over a 2D input.
|
||||
Batch Normalization layer over a 2D input.
|
||||
|
||||
Batch Normalization is widely used in convolutional networks. This layer
|
||||
applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to
|
||||
|
@ -334,7 +334,7 @@ class BatchNorm1d(_BatchNorm):
|
|||
|
||||
class BatchNorm2d(_BatchNorm):
|
||||
r"""
|
||||
Batch normalization layer over a 4D input.
|
||||
Batch Normalization layer over a 4D input.
|
||||
|
||||
Batch Normalization is widely used in convolutional networks. This layer
|
||||
applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with
|
||||
|
@ -441,7 +441,7 @@ def _check_3d_shape(input_shape):
|
|||
|
||||
class BatchNorm3d(Cell):
|
||||
r"""
|
||||
Batch normalization layer over a 5D input.
|
||||
Batch Normalization layer over a 5D input.
|
||||
|
||||
Batch Normalization is widely used in convolutional networks. This layer
|
||||
applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with
|
||||
|
@ -540,7 +540,7 @@ class GlobalBatchNorm(_BatchNorm):
|
|||
r"""
|
||||
Global normalization layer over a N-dimension input.
|
||||
|
||||
Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization
|
||||
Global Normalization is cross device synchronized Batch Normalization. The implementation of Batch Normalization
|
||||
only normalizes the data within each device. Global normalization will normalize the input within the group.
|
||||
It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
||||
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
||||
|
@ -653,10 +653,10 @@ class GlobalBatchNorm(_BatchNorm):
|
|||
|
||||
class SyncBatchNorm(_BatchNorm):
|
||||
r"""
|
||||
Sync Batch normalization layer over a N-dimension input.
|
||||
Sync Batch Normalization layer over a N-dimension input.
|
||||
|
||||
Sync Batch Normalization is cross device synchronized batch normalization. The implementation of Batch
|
||||
Normalization only normalizes the data within each device. Sync Batch normalization will normalize the input
|
||||
Sync Batch Normalization is cross device synchronized Batch Normalization. The implementation of Batch
|
||||
Normalization only normalizes the data within each device. Sync Batch Normalization will normalize the input
|
||||
within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
||||
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
||||
feature using a mini-batch of data and the learned parameters which can be described in the following formula.
|
||||
|
|
|
@ -47,7 +47,7 @@ __all__ = [
|
|||
|
||||
class BatchNormFoldCell(Cell):
|
||||
"""
|
||||
Batch normalization folded.
|
||||
Batch Normalization folded.
|
||||
|
||||
Args:
|
||||
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
||||
|
@ -402,7 +402,8 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio
|
|||
|
||||
class Conv2dBnFoldQuantOneConv(Cell):
|
||||
r"""
|
||||
2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.
|
||||
2D convolution which use the convolution layer statistics once to calculate Batch Normalization
|
||||
operation folded construct.
|
||||
|
||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||
|
@ -414,8 +415,8 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
|||
stride (int): Specifies stride for all spatial dimensions with the same value.
|
||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||
divisible by the number of groups. Default: 1.
|
||||
|
@ -600,7 +601,7 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
|||
|
||||
class Conv2dBnFoldQuant(Cell):
|
||||
r"""
|
||||
2D convolution with BatchNormal operation folded construct.
|
||||
2D convolution with Batch Normalization operation folded construct.
|
||||
|
||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||
|
@ -612,8 +613,8 @@ class Conv2dBnFoldQuant(Cell):
|
|||
stride (int): Specifies stride for all spatial dimensions with the same value.
|
||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||
divisible by the number of groups. Default: 1.
|
||||
|
@ -635,7 +636,8 @@ class Conv2dBnFoldQuant(Cell):
|
|||
generated by compression.quant.create_quant_config method.
|
||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||
freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.
|
||||
freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
|
||||
Default: 100000.
|
||||
|
||||
Inputs:
|
||||
- **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
|
||||
|
@ -811,8 +813,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
|
|||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||
divisible by the number of groups. Default: 1.
|
||||
has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
|
||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
||||
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
|
||||
Default: 'normal'.
|
||||
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
|
||||
|
|
|
@ -65,7 +65,7 @@ class TimeDistributed(Cell):
|
|||
There are two cases in the implementation.
|
||||
When reshape_with_axis provided, the reshape method will be chosen, which is more efficient;
|
||||
otherwise, the method of dividing the inputs along time axis will be used, which is more general.
|
||||
For example, reshape_with_axis could not be provided when deal with batch normal.
|
||||
For example, reshape_with_axis could not be provided when deal with Batch Normalization.
|
||||
|
||||
Args:
|
||||
layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped.
|
||||
|
|
|
@ -739,7 +739,7 @@ class SyncBatchNorm(PrimitiveWithInfer):
|
|||
r"""
|
||||
Sync Batch Normalization for input data and updated parameters.
|
||||
|
||||
Sync Batch Normalization is cross device synchronized batch normalization. Batch Normalization is
|
||||
Sync Batch Normalization is cross device synchronized Batch Normalization. Batch Normalization is
|
||||
widely used in convolutional neural networks. This operation applies Batch Normalization over input
|
||||
to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating
|
||||
Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_.
|
||||
|
|
|
@ -411,8 +411,8 @@ class FakeQuantPerLayer(PrimitiveWithInfer):
|
|||
ema (bool): Uses EMA algorithm update value min and max. Default: False.
|
||||
ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
|
||||
quant_delay (int): Quantilization delay parameter. Before delay step in training time not update
|
||||
simulate quantization aware funcion. After delay step in training time begin simulate the aware
|
||||
quantize funcion. Default: 0.
|
||||
simulate quantization aware function. After delay step in training time begin simulate the aware
|
||||
quantize function. Default: 0.
|
||||
symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
|
||||
narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
|
||||
training (bool): Training the network or not. Default: True.
|
||||
|
@ -687,7 +687,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer):
|
|||
|
||||
class BatchNormFold(PrimitiveWithInfer):
|
||||
"""
|
||||
Batch normalization folded.
|
||||
Batch Normalization folded.
|
||||
|
||||
Args:
|
||||
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
||||
|
|
|
@ -129,7 +129,7 @@ cfg = {
|
|||
|
||||
def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
|
||||
"""
|
||||
Get Vgg16 neural network with batch normalization.
|
||||
Get Vgg16 neural network with Batch Normalization.
|
||||
|
||||
Args:
|
||||
num_classes (int): Class numbers. Default: 1000.
|
||||
|
@ -137,7 +137,7 @@ def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
|
|||
phase(str): train or test mode.
|
||||
|
||||
Returns:
|
||||
Cell, cell instance of Vgg16 neural network with batch normalization.
|
||||
Cell, cell instance of Vgg16 neural network with Batch Normalization.
|
||||
|
||||
Examples:
|
||||
>>> vgg16(num_classes=1000, args=args, **kwargs)
|
||||
|
|
|
@ -57,7 +57,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):
|
|||
stride (int): The stride of the sliding window.
|
||||
|
||||
Returns:
|
||||
numpy.ndarray, grad of avgerage pooling.
|
||||
numpy.ndarray, grad of average pooling.
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
_, _, height, width = dout.shape
|
||||
|
@ -70,7 +70,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):
|
|||
|
||||
def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
|
||||
eps=1e-05, momentum=0.1, is_training=True):
|
||||
"""Batch normalization over an array."""
|
||||
"""Batch Normalization over an array."""
|
||||
_, c_h_w = x.shape
|
||||
# Handle running_mean and running_var are not None
|
||||
# if running_mean is None:
|
||||
|
@ -106,7 +106,7 @@ def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
|
|||
|
||||
def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
|
||||
eps=1e-05, momentum=0.1, is_training=True):
|
||||
"""Batch normalization over an array."""
|
||||
"""Batch Normalization over an array."""
|
||||
input_shape = x.shape
|
||||
if x.ndim != 2:
|
||||
batch_num = x.shape[0]
|
||||
|
@ -120,7 +120,7 @@ def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
|
|||
|
||||
def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
|
||||
eps=1e-05, momentum=0.1, is_training=True):
|
||||
"""Batch normalization over an array."""
|
||||
"""Batch Normalization over an array."""
|
||||
if x.ndim != 2:
|
||||
batch_num = x.shape[0]
|
||||
x = x.reshape(batch_num, -1)
|
||||
|
@ -141,7 +141,7 @@ def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
|
|||
|
||||
|
||||
def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance):
|
||||
"""Batch normalization over an array."""
|
||||
"""Batch Normalization over an array."""
|
||||
if dy.ndim != 2:
|
||||
batch_size = dy.shape[0]
|
||||
dy = dy.reshape(batch_size, -1)
|
||||
|
@ -275,7 +275,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0,
|
|||
col = im2col(x, filter_h, filter_w, stride, pad, dilation)
|
||||
col_w = np.reshape(weight, (filter_num, -1)).T
|
||||
out = np.dot(col, col_w)
|
||||
out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2)
|
||||
out = out.reshape((batch_num, out_h, out_w, -1)).transpose(0, 3, 1, 2)
|
||||
if bias is not None:
|
||||
out += bias
|
||||
return out
|
||||
|
@ -287,7 +287,7 @@ def conv2d_backprop_filter(dout, x, w_size, stride=1, pad=0):
|
|||
dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num)
|
||||
col = im2col(x, filter_height, filter_width, stride, pad)
|
||||
dw = np.dot(col.T, dout)
|
||||
dw = dw.transpose(1, 0).reshape(filter_num, channel, filter_height, filter_width)
|
||||
dw = dw.transpose(1, 0).reshape((filter_num, channel, filter_height, filter_width))
|
||||
return dw
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue