!15365 All the descriptions of batch normal under the mindspore folder have been uniformly updated to "Batch Normalization"
From: @dinglinhe123 Reviewed-by: @liangchenghui,@wuxuejian Signed-off-by: @liangchenghui
This commit is contained in:
commit
eeb32f410e
|
@ -51,12 +51,12 @@ void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
auto prop_kind = dnnl::prop_kind::forward_training;
|
auto prop_kind = dnnl::prop_kind::forward_training;
|
||||||
auto normalization_flags = dnnl::normalization_flags::use_scale_shift;
|
auto normalization_flags = dnnl::normalization_flags::use_scale_shift;
|
||||||
|
|
||||||
// fused batch normalization forward description
|
// fused Batch Normalization forward description
|
||||||
dnnl::batch_normalization_forward::desc desc =
|
dnnl::batch_normalization_forward::desc desc =
|
||||||
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
|
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
|
||||||
auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||||
|
|
||||||
// fused batch normalization backward description
|
// fused Batch Normalization backward description
|
||||||
dnnl::batch_normalization_backward::desc backward_desc =
|
dnnl::batch_normalization_backward::desc backward_desc =
|
||||||
dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
|
dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
|
||||||
auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
|
auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
|
||||||
|
|
|
@ -33,7 +33,7 @@ enum OpMergeMode {
|
||||||
OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list
|
OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list
|
||||||
OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
|
OP_MERGE_CONV = 2, // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
|
||||||
OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
|
OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
|
||||||
OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization`
|
OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization`
|
||||||
OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
|
OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -339,7 +339,7 @@ class QuantizationAwareTraining(Quantizer):
|
||||||
quant_config=self.quant_config,
|
quant_config=self.quant_config,
|
||||||
quant_dtype=self.weight_dtype,
|
quant_dtype=self.weight_dtype,
|
||||||
fake=True)
|
fake=True)
|
||||||
# change original network BatchNormal OP parameters to quant network
|
# change original network Batch Normalization OP parameters to quant network
|
||||||
conv_inner.gamma = subcell.batchnorm.gamma
|
conv_inner.gamma = subcell.batchnorm.gamma
|
||||||
conv_inner.beta = subcell.batchnorm.beta
|
conv_inner.beta = subcell.batchnorm.beta
|
||||||
conv_inner.moving_mean = subcell.batchnorm.moving_mean
|
conv_inner.moving_mean = subcell.batchnorm.moving_mean
|
||||||
|
@ -363,7 +363,7 @@ class QuantizationAwareTraining(Quantizer):
|
||||||
bias_init=conv_inner.bias_init,
|
bias_init=conv_inner.bias_init,
|
||||||
quant_config=self.quant_config,
|
quant_config=self.quant_config,
|
||||||
quant_dtype=self.weight_dtype)
|
quant_dtype=self.weight_dtype)
|
||||||
# change original network BatchNormal OP parameters to quant network
|
# change original network Batch Normalization OP parameters to quant network
|
||||||
conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
|
conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
|
||||||
conv_inner.batchnorm.beta = subcell.batchnorm.beta
|
conv_inner.batchnorm.beta = subcell.batchnorm.beta
|
||||||
conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
|
conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
"""less batch normalization"""
|
"""less Batch Normalization"""
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mindspore import nn
|
from mindspore import nn
|
||||||
from mindspore.ops import operations as P
|
from mindspore.ops import operations as P
|
||||||
|
|
|
@ -233,7 +233,7 @@ def _shape_check_bn(in_shape, in_dims):
|
||||||
|
|
||||||
@constexpr
|
@constexpr
|
||||||
def _shape_infer(x_shape, num_feature):
|
def _shape_infer(x_shape, num_feature):
|
||||||
"""global batch normalization shape and axes infer"""
|
"""global Batch Normalization shape and axes infer"""
|
||||||
if len(x_shape) == 4:
|
if len(x_shape) == 4:
|
||||||
axes = (0, 2, 3)
|
axes = (0, 2, 3)
|
||||||
re_shape = (1, num_feature, 1, 1)
|
re_shape = (1, num_feature, 1, 1)
|
||||||
|
@ -245,7 +245,7 @@ def _shape_infer(x_shape, num_feature):
|
||||||
|
|
||||||
class BatchNorm1d(_BatchNorm):
|
class BatchNorm1d(_BatchNorm):
|
||||||
r"""
|
r"""
|
||||||
Batch normalization layer over a 2D input.
|
Batch Normalization layer over a 2D input.
|
||||||
|
|
||||||
Batch Normalization is widely used in convolutional networks. This layer
|
Batch Normalization is widely used in convolutional networks. This layer
|
||||||
applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to
|
applies Batch Normalization over a 2D input (a mini-batch of 1D inputs) to
|
||||||
|
@ -334,7 +334,7 @@ class BatchNorm1d(_BatchNorm):
|
||||||
|
|
||||||
class BatchNorm2d(_BatchNorm):
|
class BatchNorm2d(_BatchNorm):
|
||||||
r"""
|
r"""
|
||||||
Batch normalization layer over a 4D input.
|
Batch Normalization layer over a 4D input.
|
||||||
|
|
||||||
Batch Normalization is widely used in convolutional networks. This layer
|
Batch Normalization is widely used in convolutional networks. This layer
|
||||||
applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with
|
applies Batch Normalization over a 4D input (a mini-batch of 2D inputs with
|
||||||
|
@ -441,7 +441,7 @@ def _check_3d_shape(input_shape):
|
||||||
|
|
||||||
class BatchNorm3d(Cell):
|
class BatchNorm3d(Cell):
|
||||||
r"""
|
r"""
|
||||||
Batch normalization layer over a 5D input.
|
Batch Normalization layer over a 5D input.
|
||||||
|
|
||||||
Batch Normalization is widely used in convolutional networks. This layer
|
Batch Normalization is widely used in convolutional networks. This layer
|
||||||
applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with
|
applies Batch Normalization over a 5D input (a mini-batch of 3D inputs with
|
||||||
|
@ -540,7 +540,7 @@ class GlobalBatchNorm(_BatchNorm):
|
||||||
r"""
|
r"""
|
||||||
Global normalization layer over a N-dimension input.
|
Global normalization layer over a N-dimension input.
|
||||||
|
|
||||||
Global Normalization is cross device synchronized batch normalization. The implementation of Batch Normalization
|
Global Normalization is cross device synchronized Batch Normalization. The implementation of Batch Normalization
|
||||||
only normalizes the data within each device. Global normalization will normalize the input within the group.
|
only normalizes the data within each device. Global normalization will normalize the input within the group.
|
||||||
It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
||||||
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
||||||
|
@ -653,10 +653,10 @@ class GlobalBatchNorm(_BatchNorm):
|
||||||
|
|
||||||
class SyncBatchNorm(_BatchNorm):
|
class SyncBatchNorm(_BatchNorm):
|
||||||
r"""
|
r"""
|
||||||
Sync Batch normalization layer over a N-dimension input.
|
Sync Batch Normalization layer over a N-dimension input.
|
||||||
|
|
||||||
Sync Batch Normalization is cross device synchronized batch normalization. The implementation of Batch
|
Sync Batch Normalization is cross device synchronized Batch Normalization. The implementation of Batch
|
||||||
Normalization only normalizes the data within each device. Sync Batch normalization will normalize the input
|
Normalization only normalizes the data within each device. Sync Batch Normalization will normalize the input
|
||||||
within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
within the group. It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
|
||||||
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
|
||||||
feature using a mini-batch of data and the learned parameters which can be described in the following formula.
|
feature using a mini-batch of data and the learned parameters which can be described in the following formula.
|
||||||
|
|
|
@ -47,7 +47,7 @@ __all__ = [
|
||||||
|
|
||||||
class BatchNormFoldCell(Cell):
|
class BatchNormFoldCell(Cell):
|
||||||
"""
|
"""
|
||||||
Batch normalization folded.
|
Batch Normalization folded.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
||||||
|
@ -402,7 +402,8 @@ quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver, activatio
|
||||||
|
|
||||||
class Conv2dBnFoldQuantOneConv(Cell):
|
class Conv2dBnFoldQuantOneConv(Cell):
|
||||||
r"""
|
r"""
|
||||||
2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct.
|
2D convolution which use the convolution layer statistics once to calculate Batch Normalization
|
||||||
|
operation folded construct.
|
||||||
|
|
||||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||||
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||||
|
@ -414,8 +415,8 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
||||||
stride (int): Specifies stride for all spatial dimensions with the same value.
|
stride (int): Specifies stride for all spatial dimensions with the same value.
|
||||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||||
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
||||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||||
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||||
divisible by the number of groups. Default: 1.
|
divisible by the number of groups. Default: 1.
|
||||||
|
@ -600,7 +601,7 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
||||||
|
|
||||||
class Conv2dBnFoldQuant(Cell):
|
class Conv2dBnFoldQuant(Cell):
|
||||||
r"""
|
r"""
|
||||||
2D convolution with BatchNormal operation folded construct.
|
2D convolution with Batch Normalization operation folded construct.
|
||||||
|
|
||||||
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
This part is a more detailed overview of Conv2d operation. For more detials about Quantilization,
|
||||||
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
|
||||||
|
@ -612,8 +613,8 @@ class Conv2dBnFoldQuant(Cell):
|
||||||
stride (int): Specifies stride for all spatial dimensions with the same value.
|
stride (int): Specifies stride for all spatial dimensions with the same value.
|
||||||
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
|
||||||
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
padding (int): Implicit paddings on both sides of the input. Default: 0.
|
||||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||||
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
|
||||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||||
divisible by the number of groups. Default: 1.
|
divisible by the number of groups. Default: 1.
|
||||||
|
@ -635,7 +636,8 @@ class Conv2dBnFoldQuant(Cell):
|
||||||
generated by compression.quant.create_quant_config method.
|
generated by compression.quant.create_quant_config method.
|
||||||
Default: both set to default FakeQuantWithMinMaxObserver.
|
Default: both set to default FakeQuantWithMinMaxObserver.
|
||||||
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
|
||||||
freeze_bn (int): The quantization freeze BatchNormal op is according to the global step. Default: 100000.
|
freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
|
||||||
|
Default: 100000.
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
- **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
|
- **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
|
||||||
|
@ -811,8 +813,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
|
||||||
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
|
||||||
divisible by the number of groups. Default: 1.
|
divisible by the number of groups. Default: 1.
|
||||||
has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
|
has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
|
||||||
eps (float): Parameters for BatchNormal. Default: 1e-5.
|
eps (float): Parameters for Batch Normalization. Default: 1e-5.
|
||||||
momentum (float): Parameters for BatchNormal op. Default: 0.997.
|
momentum (float): Parameters for Batch Normalization op. Default: 0.997.
|
||||||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
|
weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
|
||||||
Default: 'normal'.
|
Default: 'normal'.
|
||||||
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
|
bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
|
||||||
|
|
|
@ -65,7 +65,7 @@ class TimeDistributed(Cell):
|
||||||
There are two cases in the implementation.
|
There are two cases in the implementation.
|
||||||
When reshape_with_axis provided, the reshape method will be chosen, which is more efficient;
|
When reshape_with_axis provided, the reshape method will be chosen, which is more efficient;
|
||||||
otherwise, the method of dividing the inputs along time axis will be used, which is more general.
|
otherwise, the method of dividing the inputs along time axis will be used, which is more general.
|
||||||
For example, reshape_with_axis could not be provided when deal with batch normal.
|
For example, reshape_with_axis could not be provided when deal with Batch Normalization.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped.
|
layer(Union[Cell, Primitive]): The Cell or Primitive which will be wrapped.
|
||||||
|
|
|
@ -739,7 +739,7 @@ class SyncBatchNorm(PrimitiveWithInfer):
|
||||||
r"""
|
r"""
|
||||||
Sync Batch Normalization for input data and updated parameters.
|
Sync Batch Normalization for input data and updated parameters.
|
||||||
|
|
||||||
Sync Batch Normalization is cross device synchronized batch normalization. Batch Normalization is
|
Sync Batch Normalization is cross device synchronized Batch Normalization. Batch Normalization is
|
||||||
widely used in convolutional neural networks. This operation applies Batch Normalization over input
|
widely used in convolutional neural networks. This operation applies Batch Normalization over input
|
||||||
to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating
|
to avoid internal covariate shift as described in the paper `Batch Normalization: Accelerating
|
||||||
Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_.
|
Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_.
|
||||||
|
|
|
@ -411,8 +411,8 @@ class FakeQuantPerLayer(PrimitiveWithInfer):
|
||||||
ema (bool): Uses EMA algorithm update value min and max. Default: False.
|
ema (bool): Uses EMA algorithm update value min and max. Default: False.
|
||||||
ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
|
ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
|
||||||
quant_delay (int): Quantilization delay parameter. Before delay step in training time not update
|
quant_delay (int): Quantilization delay parameter. Before delay step in training time not update
|
||||||
simulate quantization aware funcion. After delay step in training time begin simulate the aware
|
simulate quantization aware function. After delay step in training time begin simulate the aware
|
||||||
quantize funcion. Default: 0.
|
quantize function. Default: 0.
|
||||||
symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
|
symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
|
||||||
narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
|
narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
|
||||||
training (bool): Training the network or not. Default: True.
|
training (bool): Training the network or not. Default: True.
|
||||||
|
@ -687,7 +687,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer):
|
||||||
|
|
||||||
class BatchNormFold(PrimitiveWithInfer):
|
class BatchNormFold(PrimitiveWithInfer):
|
||||||
"""
|
"""
|
||||||
Batch normalization folded.
|
Batch Normalization folded.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
momentum (float): Momentum value must be [0, 1]. Default: 0.9.
|
||||||
|
|
|
@ -129,7 +129,7 @@ cfg = {
|
||||||
|
|
||||||
def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
|
def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
|
||||||
"""
|
"""
|
||||||
Get Vgg16 neural network with batch normalization.
|
Get Vgg16 neural network with Batch Normalization.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
num_classes (int): Class numbers. Default: 1000.
|
num_classes (int): Class numbers. Default: 1000.
|
||||||
|
@ -137,7 +137,7 @@ def vgg16(num_classes=1000, args=None, phase="train", **kwargs):
|
||||||
phase(str): train or test mode.
|
phase(str): train or test mode.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Cell, cell instance of Vgg16 neural network with batch normalization.
|
Cell, cell instance of Vgg16 neural network with Batch Normalization.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> vgg16(num_classes=1000, args=args, **kwargs)
|
>>> vgg16(num_classes=1000, args=args, **kwargs)
|
||||||
|
|
|
@ -57,7 +57,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):
|
||||||
stride (int): The stride of the sliding window.
|
stride (int): The stride of the sliding window.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
numpy.ndarray, grad of avgerage pooling.
|
numpy.ndarray, grad of average pooling.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
_, _, height, width = dout.shape
|
_, _, height, width = dout.shape
|
||||||
|
@ -70,7 +70,7 @@ def avg_pool_grad(dout, origin_shape, pool_h, pool_w, stride):
|
||||||
|
|
||||||
def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
|
def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
|
||||||
eps=1e-05, momentum=0.1, is_training=True):
|
eps=1e-05, momentum=0.1, is_training=True):
|
||||||
"""Batch normalization over an array."""
|
"""Batch Normalization over an array."""
|
||||||
_, c_h_w = x.shape
|
_, c_h_w = x.shape
|
||||||
# Handle running_mean and running_var are not None
|
# Handle running_mean and running_var are not None
|
||||||
# if running_mean is None:
|
# if running_mean is None:
|
||||||
|
@ -106,7 +106,7 @@ def _batch_norm(x, scale, shift, running_mean=None, running_var=None,
|
||||||
|
|
||||||
def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
|
def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
|
||||||
eps=1e-05, momentum=0.1, is_training=True):
|
eps=1e-05, momentum=0.1, is_training=True):
|
||||||
"""Batch normalization over an array."""
|
"""Batch Normalization over an array."""
|
||||||
input_shape = x.shape
|
input_shape = x.shape
|
||||||
if x.ndim != 2:
|
if x.ndim != 2:
|
||||||
batch_num = x.shape[0]
|
batch_num = x.shape[0]
|
||||||
|
@ -120,7 +120,7 @@ def batch_norm(x, scale=1, shift=0, mean=None, variance=None,
|
||||||
|
|
||||||
def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
|
def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
|
||||||
eps=1e-05, momentum=0.1, is_training=True):
|
eps=1e-05, momentum=0.1, is_training=True):
|
||||||
"""Batch normalization over an array."""
|
"""Batch Normalization over an array."""
|
||||||
if x.ndim != 2:
|
if x.ndim != 2:
|
||||||
batch_num = x.shape[0]
|
batch_num = x.shape[0]
|
||||||
x = x.reshape(batch_num, -1)
|
x = x.reshape(batch_num, -1)
|
||||||
|
@ -141,7 +141,7 @@ def _batch_norm_grad(dout, x, scale, save_mean, save_inv_variance, \
|
||||||
|
|
||||||
|
|
||||||
def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance):
|
def batch_norm_grad(dy, x, scale, save_mean, save_inv_variance):
|
||||||
"""Batch normalization over an array."""
|
"""Batch Normalization over an array."""
|
||||||
if dy.ndim != 2:
|
if dy.ndim != 2:
|
||||||
batch_size = dy.shape[0]
|
batch_size = dy.shape[0]
|
||||||
dy = dy.reshape(batch_size, -1)
|
dy = dy.reshape(batch_size, -1)
|
||||||
|
@ -275,7 +275,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0,
|
||||||
col = im2col(x, filter_h, filter_w, stride, pad, dilation)
|
col = im2col(x, filter_h, filter_w, stride, pad, dilation)
|
||||||
col_w = np.reshape(weight, (filter_num, -1)).T
|
col_w = np.reshape(weight, (filter_num, -1)).T
|
||||||
out = np.dot(col, col_w)
|
out = np.dot(col, col_w)
|
||||||
out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2)
|
out = out.reshape((batch_num, out_h, out_w, -1)).transpose(0, 3, 1, 2)
|
||||||
if bias is not None:
|
if bias is not None:
|
||||||
out += bias
|
out += bias
|
||||||
return out
|
return out
|
||||||
|
@ -287,7 +287,7 @@ def conv2d_backprop_filter(dout, x, w_size, stride=1, pad=0):
|
||||||
dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num)
|
dout = dout.transpose(0, 2, 3, 1).reshape(-1, filter_num)
|
||||||
col = im2col(x, filter_height, filter_width, stride, pad)
|
col = im2col(x, filter_height, filter_width, stride, pad)
|
||||||
dw = np.dot(col.T, dout)
|
dw = np.dot(col.T, dout)
|
||||||
dw = dw.transpose(1, 0).reshape(filter_num, channel, filter_height, filter_width)
|
dw = dw.transpose(1, 0).reshape((filter_num, channel, filter_height, filter_width))
|
||||||
return dw
|
return dw
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue