forked from mindspore-Ecosystem/mindspore
modify weight init
This commit is contained in:
parent
e5fafc5e00
commit
73bd2e9afb
|
@ -34,7 +34,6 @@ from mindspore.communication.management import init
|
|||
import mindspore.nn as nn
|
||||
import mindspore.common.initializer as weight_init
|
||||
from crossentropy import CrossEntropy
|
||||
from var_init import default_recurisive_init, KaimingNormal
|
||||
|
||||
random.seed(1)
|
||||
np.random.seed(1)
|
||||
|
@ -68,8 +67,11 @@ if __name__ == '__main__':
|
|||
default_recurisive_init(net)
|
||||
for _, cell in net.cells_and_names():
|
||||
if isinstance(cell, nn.Conv2d):
|
||||
cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
|
||||
mode='fan_out', nonlinearity='relu'),
|
||||
cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
|
||||
cell.weight.default_input.shape(),
|
||||
cell.weight.default_input.dtype())
|
||||
if isinstance(cell, nn.Dense):
|
||||
cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
|
||||
cell.weight.default_input.shape(),
|
||||
cell.weight.default_input.dtype())
|
||||
if not config.label_smooth:
|
||||
|
|
|
@ -1,192 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""weight initial"""
|
||||
import math
|
||||
import numpy as np
|
||||
from mindspore.common import initializer as init
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
|
||||
def calculate_gain(nonlinearity, param=None):
|
||||
r"""Return the recommended gain value for the given nonlinearity function.
|
||||
The values are as follows:
|
||||
================= ====================================================
|
||||
nonlinearity gain
|
||||
================= ====================================================
|
||||
Linear / Identity :math:`1`
|
||||
Conv{1,2,3}D :math:`1`
|
||||
Sigmoid :math:`1`
|
||||
Tanh :math:`\frac{5}{3}`
|
||||
ReLU :math:`\sqrt{2}`
|
||||
Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
|
||||
================= ====================================================
|
||||
Args:
|
||||
nonlinearity: the non-linear function (`nn.functional` name)
|
||||
param: optional parameter for the non-linear function
|
||||
"""
|
||||
linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
|
||||
gain = 0
|
||||
if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
|
||||
gain = 1
|
||||
elif nonlinearity == 'tanh':
|
||||
gain = 5.0 / 3
|
||||
elif nonlinearity == 'relu':
|
||||
gain = math.sqrt(2.0)
|
||||
elif nonlinearity == 'leaky_relu':
|
||||
if param is None:
|
||||
negative_slope = 0.01
|
||||
elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
|
||||
# True/False are instances of int, hence check above
|
||||
negative_slope = param
|
||||
else:
|
||||
raise ValueError("negative_slope {} not a valid number".format(param))
|
||||
gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
|
||||
else:
|
||||
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
|
||||
return gain
|
||||
|
||||
def _calculate_correct_fan(array, mode):
|
||||
mode = mode.lower()
|
||||
valid_modes = ['fan_in', 'fan_out']
|
||||
if mode not in valid_modes:
|
||||
raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
|
||||
fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
|
||||
return fan_in if mode == 'fan_in' else fan_out
|
||||
|
||||
def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
r"""Fills the input `Tensor` with values according to the method
|
||||
described in `Delving deep into rectifiers: Surpassing human-level
|
||||
performance on ImageNet classification` - He, K. et al. (2015), using a
|
||||
uniform distribution. The resulting tensor will have values sampled from
|
||||
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where
|
||||
.. math::
|
||||
\text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
|
||||
Also known as He initialization.
|
||||
|
||||
Args:
|
||||
array: an n-dimensional `tensor`
|
||||
a: the negative slope of the rectifier used after this layer (only
|
||||
used with ``'leaky_relu'``)
|
||||
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
|
||||
preserves the magnitude of the variance of the weights in the
|
||||
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
|
||||
backwards pass.
|
||||
nonlinearity: the non-linear function (`nn.functional` name),
|
||||
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
|
||||
"""
|
||||
fan = _calculate_correct_fan(array, mode)
|
||||
gain = calculate_gain(nonlinearity, a)
|
||||
std = gain / math.sqrt(fan)
|
||||
bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
|
||||
return np.random.uniform(-bound, bound, array.shape)
|
||||
|
||||
def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
r"""Fills the input `Tensor` with values according to the method
|
||||
described in `Delving deep into rectifiers: Surpassing human-level
|
||||
performance on ImageNet classification` - He, K. et al. (2015), using a
|
||||
normal distribution. The resulting tensor will have values sampled from
|
||||
:math:`\mathcal{N}(0, \text{std}^2)` where
|
||||
.. math::
|
||||
\text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
|
||||
Also known as He initialization.
|
||||
|
||||
Args:
|
||||
array: an n-dimensional `tensor`
|
||||
a: the negative slope of the rectifier used after this layer (only
|
||||
used with ``'leaky_relu'``)
|
||||
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
|
||||
preserves the magnitude of the variance of the weights in the
|
||||
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
|
||||
backwards pass.
|
||||
nonlinearity: the non-linear function (`nn.functional` name),
|
||||
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
|
||||
"""
|
||||
fan = _calculate_correct_fan(array, mode)
|
||||
gain = calculate_gain(nonlinearity, a)
|
||||
std = gain / math.sqrt(fan)
|
||||
return np.random.normal(0, std, array.shape)
|
||||
|
||||
def _calculate_fan_in_and_fan_out(array):
|
||||
"""calculate the fan_in and fan_out for input array"""
|
||||
dimensions = len(array.shape)
|
||||
if dimensions < 2:
|
||||
raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
|
||||
num_input_fmaps = array.shape[1]
|
||||
num_output_fmaps = array.shape[0]
|
||||
receptive_field_size = 1
|
||||
if dimensions > 2:
|
||||
receptive_field_size = array[0][0].size
|
||||
fan_in = num_input_fmaps * receptive_field_size
|
||||
fan_out = num_output_fmaps * receptive_field_size
|
||||
return fan_in, fan_out
|
||||
|
||||
def assignment(arr, num):
|
||||
"""Assign the value of num to arr"""
|
||||
if arr.shape == ():
|
||||
arr = arr.reshape((1))
|
||||
arr[:] = num
|
||||
arr = arr.reshape(())
|
||||
else:
|
||||
if isinstance(num, np.ndarray):
|
||||
arr[:] = num[:]
|
||||
else:
|
||||
arr[:] = num
|
||||
return arr
|
||||
|
||||
class KaimingUniform(init.Initializer):
|
||||
def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
super(KaimingUniform, self).__init__()
|
||||
self.a = a
|
||||
self.mode = mode
|
||||
self.nonlinearity = nonlinearity
|
||||
def _initialize(self, arr):
|
||||
tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
|
||||
assignment(arr, tmp)
|
||||
|
||||
class KaimingNormal(init.Initializer):
|
||||
def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
|
||||
super(KaimingNormal, self).__init__()
|
||||
self.a = a
|
||||
self.mode = mode
|
||||
self.nonlinearity = nonlinearity
|
||||
def _initialize(self, arr):
|
||||
tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
|
||||
assignment(arr, tmp)
|
||||
|
||||
def default_recurisive_init(custom_cell):
|
||||
"""weight init for conv2d and dense"""
|
||||
for _, cell in custom_cell.cells_and_names():
|
||||
if isinstance(cell, nn.Conv2d):
|
||||
cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
|
||||
cell.weight.default_input.shape(),
|
||||
cell.weight.default_input.dtype())
|
||||
if cell.bias is not None:
|
||||
fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
|
||||
bound = 1 / math.sqrt(fan_in)
|
||||
cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
|
||||
cell.bias.default_input.shape()),
|
||||
cell.bias.default_input.dtype())
|
||||
elif isinstance(cell, nn.Dense):
|
||||
cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
|
||||
cell.weight.default_input.shape(),
|
||||
cell.weight.default_input.dtype())
|
||||
if cell.bias is not None:
|
||||
fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
|
||||
bound = 1 / math.sqrt(fan_in)
|
||||
cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
|
||||
cell.bias.default_input.shape()),
|
||||
cell.bias.default_input.dtype())
|
||||
elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
|
||||
pass
|
Loading…
Reference in New Issue