forked from mindspore-Ecosystem/mindspore
!9257 make parameter name optional
From: @caozhou_huawei Reviewed-by: @kingxian,@zh_qh Signed-off-by: @kingxian
This commit is contained in:
commit
825ba197f2
|
@ -30,6 +30,7 @@ __all__ = ['Parameter', 'ParameterTuple']
|
|||
PARAMETER_NAME_DEFAULT = "Parameter"
|
||||
PARAMETER_NAME_PREFIX_MAX_LEN = 1024
|
||||
|
||||
|
||||
def _is_in_parallel_mode():
|
||||
"""Get parallel mode."""
|
||||
return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
|
||||
|
@ -51,10 +52,12 @@ class Parameter(MetaTensor_):
|
|||
A Parameter has to belong to a Cell.
|
||||
If there is an operator in the network that requires part of the inputs to be Parameter,
|
||||
then the Parameters as this part of the inputs are not allowed to be cast.
|
||||
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
|
||||
otherwise, the parameter name may be different than expected.
|
||||
|
||||
Args:
|
||||
default_input (Union[Tensor, MetaTensor, Number]): Parameter data, to be set initialized.
|
||||
name (str): Name of the child parameter.
|
||||
name (str): Name of the child parameter. Default: None.
|
||||
requires_grad (bool): True if the parameter requires gradient. Default: True.
|
||||
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode,
|
||||
broadcast and gradients communication would not be applied to parameters. Default: False.
|
||||
|
@ -72,7 +75,7 @@ class Parameter(MetaTensor_):
|
|||
>>> def __init__(self):
|
||||
>>> super(Net, self).__init__()
|
||||
>>> self.matmul = P.MatMul()
|
||||
>>> self.weight = Parameter(Tensor(np.ones((1,2))), name="w", requires_grad=True)
|
||||
>>> self.weight = Parameter(Tensor(np.ones((1,2))), requires_grad=True)
|
||||
>>>
|
||||
>>> def construct(self, x):
|
||||
>>> out = self.matmul(self.weight, x)
|
||||
|
@ -88,7 +91,7 @@ class Parameter(MetaTensor_):
|
|||
"""
|
||||
__base_type__ = {}
|
||||
|
||||
def __new__(cls, default_input, name, *args, **kwargs):
|
||||
def __new__(cls, default_input, *args, **kwargs):
|
||||
input_class, *class_init_args = Parameter._get_parameter_new_args(default_input)
|
||||
new_type = Parameter._get_base_class(input_class)
|
||||
obj = input_class.__new__(new_type)
|
||||
|
@ -112,7 +115,7 @@ class Parameter(MetaTensor_):
|
|||
return (
|
||||
Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel))
|
||||
|
||||
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False):
|
||||
def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False):
|
||||
self._param_info = ParamInfo()
|
||||
self.name = name
|
||||
self.requires_grad = requires_grad
|
||||
|
@ -276,24 +279,20 @@ class Parameter(MetaTensor_):
|
|||
"""
|
||||
self._is_init = is_init_
|
||||
|
||||
def clone(self, prefix, init='same'):
|
||||
def clone(self, init='same'):
|
||||
"""
|
||||
Clone the parameter.
|
||||
|
||||
Args:
|
||||
prefix (str): Namespace of parameter. The cloned Parameter name is
|
||||
combined of prefix and current name: `f"{perfix}.{self.name}"`.
|
||||
init (Union[Tensor, str, MetaTensor, numbers.Number]): Initialize the shape of the parameter.
|
||||
Default: 'same'.
|
||||
|
||||
Returns:
|
||||
Parameter, a new parameter.
|
||||
"""
|
||||
Validator.check_str_by_regular(prefix)
|
||||
x = copy(self)
|
||||
# pylint: disable=protected-access
|
||||
x._param_info = self._param_info.clone()
|
||||
x._param_info.name = prefix + '.' + self._param_info.name
|
||||
x.is_init = False
|
||||
x.is_param_ps = self.is_param_ps
|
||||
x.init_in_server = self.init_in_server
|
||||
|
@ -464,10 +463,25 @@ class ParameterTuple(tuple):
|
|||
def __new__(cls, iterable):
|
||||
"""Create instance object of ParameterTuple."""
|
||||
data = tuple(iterable)
|
||||
ids = set()
|
||||
orders = {}
|
||||
for x in data:
|
||||
if not isinstance(x, Parameter):
|
||||
raise TypeError(f"ParameterTuple input should be `Parameter` collection."
|
||||
f"But got a {type(iterable)}, {iterable}")
|
||||
if id(x) not in ids:
|
||||
ids.add(id(x))
|
||||
if x.name not in orders.keys():
|
||||
orders[x.name] = [0, x]
|
||||
else:
|
||||
if isinstance(orders[x.name], list):
|
||||
name = x.name
|
||||
orders[name][1].name = name + "_" + str(0)
|
||||
x.name = x.name + "_" + str(1)
|
||||
orders[name] = 1
|
||||
else:
|
||||
orders[x.name] += 1
|
||||
x.name = x.name + "_" + str(orders[x.name])
|
||||
return tuple.__new__(ParameterTuple, tuple(data))
|
||||
|
||||
def clone(self, prefix, init='same'):
|
||||
|
@ -484,7 +498,8 @@ class ParameterTuple(tuple):
|
|||
Validator.check_str_by_regular(prefix)
|
||||
new = []
|
||||
for x in self:
|
||||
x1 = x.clone(prefix, init)
|
||||
x1 = x.clone(init)
|
||||
x1.name = prefix + "." + x1.name
|
||||
new.append(x1)
|
||||
return ParameterTuple(new)
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ import os
|
|||
from collections import OrderedDict
|
||||
import numpy
|
||||
from mindspore import log as logger
|
||||
from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
|
||||
from .. import context
|
||||
from ..common import dtype as mstype
|
||||
from ..common.api import _executor, _pynative_exec
|
||||
|
@ -619,6 +620,8 @@ class Cell(Cell_):
|
|||
raise KeyError("Duplicated parameter name '{}'.".format(param_name))
|
||||
if not isinstance(param, Parameter) and param is not None:
|
||||
raise TypeError("The type of parameter should be 'Parameter' if not None.")
|
||||
if isinstance(param, Parameter) and param.name == PARAMETER_NAME_DEFAULT:
|
||||
param.name = param_name
|
||||
self._params[param_name] = param
|
||||
|
||||
def cast_param(self, param):
|
||||
|
|
|
@ -55,11 +55,11 @@ class DepthWiseConv(nn.Cell):
|
|||
self.bias_add = P.BiasAdd()
|
||||
|
||||
weight_shape = [channel_multiplier, in_planes, kernel_size, kernel_size]
|
||||
self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape), name='weight')
|
||||
self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape))
|
||||
|
||||
if has_bias:
|
||||
bias_shape = [channel_multiplier * in_planes]
|
||||
self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
|
||||
self.bias = Parameter(initializer('zeros', bias_shape))
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
|
|
|
@ -469,12 +469,12 @@ class DepthWiseConv(nn.Cell):
|
|||
self.depthwise_conv = P.Conv2D(out_channel=in_planes * 1, kernel_size=kernel_size,
|
||||
stride=stride, pad_mode="same", group=in_planes)
|
||||
self.weight = Parameter(initializer(
|
||||
weight_init, [in_planes * 1, 1, kernel_size, kernel_size]), name='depthwise_weight')
|
||||
weight_init, [in_planes * 1, 1, kernel_size, kernel_size]))
|
||||
else:
|
||||
self.depthwise_conv = P.DepthwiseConv2dNative(
|
||||
channel_multiplier=1, kernel_size=kernel_size, stride=stride, pad_mode='same',)
|
||||
self.weight = Parameter(initializer(
|
||||
weight_init, [1, in_planes, kernel_size, kernel_size]), name='depthwise_weight')
|
||||
weight_init, [1, in_planes, kernel_size, kernel_size]))
|
||||
|
||||
def construct(self, x):
|
||||
x = self.depthwise_conv(x, self.weight)
|
||||
|
|
|
@ -28,9 +28,8 @@ class DenseNoTranpose(nn.Cell):
|
|||
def __init__(self, input_channels, output_channels, weight_init):
|
||||
super(DenseNoTranpose, self).__init__()
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16),
|
||||
name="weight")
|
||||
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
|
||||
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
|
||||
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=False)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
|
|
@ -26,9 +26,8 @@ class DenseNoTranpose(nn.Cell):
|
|||
"""Dense method"""
|
||||
def __init__(self, input_channels, output_channels, weight_init):
|
||||
super(DenseNoTranpose, self).__init__()
|
||||
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16),
|
||||
name="weight")
|
||||
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
|
||||
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
|
||||
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
|
||||
self.matmul = P.MatMul(transpose_b=False)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ class THOR_GPU(Optimizer):
|
|||
Validator.check_value_type("momentum", momentum, [float], self.cls_name)
|
||||
if isinstance(momentum, float) and momentum < 0.0:
|
||||
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32))
|
||||
self.params = self.parameters
|
||||
self.use_nesterov = Validator.check_bool(use_nesterov)
|
||||
self.moments = self.params.clone(prefix="moments", init='zeros')
|
||||
|
@ -160,7 +160,7 @@ class THOR(Optimizer):
|
|||
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
|
||||
if isinstance(momentum, float) and momentum < 0.0:
|
||||
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32))
|
||||
self.params = self.parameters
|
||||
self.moments = self.params.clone(prefix="moments", init='zeros')
|
||||
self.hyper_map = C.HyperMap()
|
||||
|
|
|
@ -109,11 +109,10 @@ class _Conv(Cell):
|
|||
'attr \'group\' of \'Conv2D\' Op.')
|
||||
|
||||
self.weight = Parameter(initializer(
|
||||
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
|
||||
weight_init, [out_channels, in_channels // group, *kernel_size]))
|
||||
|
||||
if Validator.check_bool(has_bias):
|
||||
self.bias = Parameter(_initializer(
|
||||
bias_init, [out_channels]), name='bias')
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
else:
|
||||
if bias_init != 'zeros':
|
||||
logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
|
||||
|
@ -174,12 +173,10 @@ class Conv2d_Thor_GPU(_Conv):
|
|||
|
||||
split_dim = 128
|
||||
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim)
|
||||
self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32), requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32), requires_grad=False)
|
||||
self.broadcast_to = P.BroadcastTo(matrix_A_shape)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same")
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.shape = P.Shape()
|
||||
|
@ -195,7 +192,7 @@ class Conv2d_Thor_GPU(_Conv):
|
|||
self.axis = 0
|
||||
self.sqrt = P.Sqrt()
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=False)
|
||||
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
|
||||
self.damping = Parameter(Tensor(damping), requires_grad=False)
|
||||
self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
|
||||
self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
|
||||
self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
|
||||
|
@ -301,14 +298,14 @@ class Dense_Thor_GPU(Cell):
|
|||
weight_init.shape[1] != in_channels:
|
||||
raise ValueError("weight_init shape error")
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
|
||||
|
||||
if self.has_bias:
|
||||
if isinstance(bias_init, Tensor):
|
||||
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
|
||||
raise ValueError("bias_init shape error")
|
||||
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
@ -317,12 +314,10 @@ class Dense_Thor_GPU(Cell):
|
|||
self.activation_flag = self.activation is not None
|
||||
split_dim = 128
|
||||
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)),
|
||||
name="matrix_G_inv", requires_grad=False)
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
|
||||
self.broadcast_to = P.BroadcastTo(matrix_A_shape)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.shape = P.Shape()
|
||||
self.reshape = P.Reshape()
|
||||
self.transpose = P.Transpose()
|
||||
|
@ -331,7 +326,7 @@ class Dense_Thor_GPU(Cell):
|
|||
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
|
||||
self.batch_size = Tensor(batch_size, mstype.float16)
|
||||
self.getG = P.InsertGradientOf(self.save_gradient)
|
||||
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
|
||||
self.damping = Parameter(Tensor(damping), requires_grad=False)
|
||||
self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
|
||||
self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
|
||||
self.cast = P.Cast()
|
||||
|
@ -467,20 +462,20 @@ class Conv2d_Thor(_Conv):
|
|||
self.matrix_G_device_shape[3])
|
||||
self.matrix_A_inv = Parameter(
|
||||
Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
|
||||
requires_grad=False)
|
||||
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(
|
||||
Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
|
||||
name="matrix_G_inv", requires_grad=False)
|
||||
requires_grad=False)
|
||||
|
||||
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
|
||||
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
|
||||
self.fake_G = Tensor(
|
||||
np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
|
||||
|
||||
self.shape = P.Shape()
|
||||
self.reshape = P.Reshape()
|
||||
self.transpose = P.Transpose()
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.mul = P.Mul()
|
||||
self.cast = P.Cast()
|
||||
self.damping = Tensor(damping)
|
||||
|
@ -648,14 +643,14 @@ class Dense_Thor(Cell):
|
|||
weight_init.shape[1] != in_channels:
|
||||
raise ValueError("weight_init shape error")
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
|
||||
|
||||
if self.has_bias:
|
||||
if isinstance(bias_init, Tensor):
|
||||
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
|
||||
raise ValueError("bias_init shape error")
|
||||
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
@ -663,10 +658,8 @@ class Dense_Thor(Cell):
|
|||
self.activation = get_activation(activation)
|
||||
self.activation_flag = self.activation is not None
|
||||
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
|
||||
requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
|
||||
requires_grad=False)
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), requires_grad=False)
|
||||
self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
|
@ -676,7 +669,7 @@ class Dense_Thor(Cell):
|
|||
self.shape = P.Shape()
|
||||
self.reshape = P.Reshape()
|
||||
self.transpose = P.Transpose()
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.mul = P.Mul()
|
||||
self.cast = P.Cast()
|
||||
self.damping = Tensor(damping)
|
||||
|
@ -689,8 +682,8 @@ class Dense_Thor(Cell):
|
|||
self.assignadd = P.AssignAdd()
|
||||
self.freq = Tensor(frequency, mstype.int32)
|
||||
self.axis = 0
|
||||
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
|
||||
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
|
||||
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
|
||||
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
|
||||
self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001])
|
||||
self.fused_abs_max2 = P.CusFusedAbsMax1()
|
||||
self.log = P.Log()
|
||||
|
|
|
@ -33,13 +33,12 @@ class CTCLoss(_Loss):
|
|||
|
||||
def __init__(self, max_sequence_length, max_label_length, batch_size):
|
||||
super(CTCLoss, self).__init__()
|
||||
self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32),
|
||||
name="sequence_length")
|
||||
self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32))
|
||||
labels_indices = []
|
||||
for i in range(batch_size):
|
||||
for j in range(max_label_length):
|
||||
labels_indices.append([i, j])
|
||||
self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64), name="labels_indices")
|
||||
self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64))
|
||||
self.reshape = P.Reshape()
|
||||
self.ctc_loss = P.CTCLoss(ctc_merge_repeated=True)
|
||||
|
||||
|
|
|
@ -45,12 +45,10 @@ class StackedRNN(nn.Cell):
|
|||
self.rnn1 = P.DynamicRNN(forget_bias=0.0)
|
||||
self.rnn2 = P.DynamicRNN(forget_bias=0.0)
|
||||
|
||||
self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16),
|
||||
name="w1")
|
||||
self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16),
|
||||
name="w2")
|
||||
self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b1")
|
||||
self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b2")
|
||||
self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16))
|
||||
self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16))
|
||||
self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
|
||||
self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
|
||||
|
||||
self.h1 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
|
||||
self.h2 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
|
||||
|
@ -98,7 +96,7 @@ class StackedRNNForGPU(nn.Cell):
|
|||
self.cast = P.Cast()
|
||||
k = (1 / hidden_size) ** 0.5
|
||||
weight_shape = 4 * hidden_size * (input_size + 3 * hidden_size + 4)
|
||||
self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32), name='weight')
|
||||
self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32))
|
||||
self.h = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))
|
||||
self.c = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))
|
||||
|
||||
|
|
|
@ -39,7 +39,6 @@ class MeanConv(nn.Cell):
|
|||
"""
|
||||
|
||||
def __init__(self,
|
||||
name,
|
||||
feature_in_dim,
|
||||
feature_out_dim,
|
||||
activation,
|
||||
|
@ -47,8 +46,7 @@ class MeanConv(nn.Cell):
|
|||
super(MeanConv, self).__init__()
|
||||
|
||||
self.out_weight = Parameter(
|
||||
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32),
|
||||
name=name + 'out_weight')
|
||||
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
|
||||
|
||||
if activation == "tanh":
|
||||
self.act = P.Tanh()
|
||||
|
@ -90,15 +88,13 @@ class AttenConv(nn.Cell):
|
|||
"""
|
||||
|
||||
def __init__(self,
|
||||
name,
|
||||
feature_in_dim,
|
||||
feature_out_dim,
|
||||
dropout=0.2):
|
||||
super(AttenConv, self).__init__()
|
||||
|
||||
self.out_weight = Parameter(
|
||||
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32),
|
||||
name=name + 'out_weight')
|
||||
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
|
||||
self.cast = P.Cast()
|
||||
self.squeeze = P.Squeeze(1)
|
||||
self.concat = P.Concat(axis=1)
|
||||
|
@ -147,10 +143,8 @@ class BGCF(nn.Cell):
|
|||
input_dim):
|
||||
super(BGCF, self).__init__()
|
||||
|
||||
self.user_embeddings = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32),
|
||||
name='user_embed')
|
||||
self.item_embeddings = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32),
|
||||
name='item_embed')
|
||||
self.user_embed = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32))
|
||||
self.item_embed = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32))
|
||||
self.cast = P.Cast()
|
||||
self.tanh = P.Tanh()
|
||||
self.shape = P.Shape()
|
||||
|
@ -163,30 +157,27 @@ class BGCF(nn.Cell):
|
|||
(self.input_dim, self.num_user, self.num_item) = dataset_argv
|
||||
self.layer_dim = architect_argv
|
||||
|
||||
self.gnew_agg_mean = MeanConv('gnew_agg_mean', self.input_dim, self.layer_dim,
|
||||
self.gnew_agg_mean = MeanConv(self.input_dim, self.layer_dim,
|
||||
activation=activation, dropout=neigh_drop_rate[1])
|
||||
self.gnew_agg_mean.to_float(mstype.float16)
|
||||
|
||||
self.gnew_agg_user = AttenConv('gnew_agg_att_user', self.input_dim,
|
||||
self.layer_dim, dropout=neigh_drop_rate[2])
|
||||
self.gnew_agg_user = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
|
||||
self.gnew_agg_user.to_float(mstype.float16)
|
||||
|
||||
self.gnew_agg_item = AttenConv('gnew_agg_att_item', self.input_dim,
|
||||
self.layer_dim, dropout=neigh_drop_rate[2])
|
||||
self.gnew_agg_item = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
|
||||
self.gnew_agg_item.to_float(mstype.float16)
|
||||
|
||||
self.user_feature_dim = self.input_dim
|
||||
self.item_feature_dim = self.input_dim
|
||||
|
||||
self.final_weight = Parameter(
|
||||
initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32),
|
||||
name='final_weight')
|
||||
initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32))
|
||||
|
||||
self.raw_agg_funcs_user = MeanConv('raw_agg_user', self.input_dim, self.layer_dim,
|
||||
self.raw_agg_funcs_user = MeanConv(self.input_dim, self.layer_dim,
|
||||
activation=activation, dropout=neigh_drop_rate[0])
|
||||
self.raw_agg_funcs_user.to_float(mstype.float16)
|
||||
|
||||
self.raw_agg_funcs_item = MeanConv('raw_agg_item', self.input_dim, self.layer_dim,
|
||||
self.raw_agg_funcs_item = MeanConv(self.input_dim, self.layer_dim,
|
||||
activation=activation, dropout=neigh_drop_rate[0])
|
||||
self.raw_agg_funcs_item.to_float(mstype.float16)
|
||||
|
||||
|
@ -207,14 +198,14 @@ class BGCF(nn.Cell):
|
|||
neg_gnew_neighs,
|
||||
neg_item_num):
|
||||
"""Aggregate user and item embeddings"""
|
||||
all_user_embed = self.gather(self.user_embeddings, self.concat_0((u_id, pos_users)), 0)
|
||||
all_user_embed = self.gather(self.user_embed, self.concat_0((u_id, pos_users)), 0)
|
||||
|
||||
u_self_matrix_at_layers = self.gather(self.user_embeddings, u_group_nodes, 0)
|
||||
u_neigh_matrix_at_layers = self.gather(self.item_embeddings, u_neighs, 0)
|
||||
u_self_matrix_at_layers = self.gather(self.user_embed, u_group_nodes, 0)
|
||||
u_neigh_matrix_at_layers = self.gather(self.item_embed, u_neighs, 0)
|
||||
|
||||
u_output_mean = self.raw_agg_funcs_user(u_self_matrix_at_layers, u_neigh_matrix_at_layers)
|
||||
|
||||
u_gnew_neighs_matrix = self.gather(self.item_embeddings, u_gnew_neighs, 0)
|
||||
u_gnew_neighs_matrix = self.gather(self.item_embed, u_gnew_neighs, 0)
|
||||
u_output_from_gnew_mean = self.gnew_agg_mean(u_self_matrix_at_layers, u_gnew_neighs_matrix)
|
||||
|
||||
u_output_from_gnew_att = self.gnew_agg_user(u_self_matrix_at_layers,
|
||||
|
@ -223,14 +214,14 @@ class BGCF(nn.Cell):
|
|||
u_output = self.concat_1((u_output_mean, u_output_from_gnew_mean, u_output_from_gnew_att))
|
||||
all_user_rep = self.tanh(u_output)
|
||||
|
||||
all_pos_item_embed = self.gather(self.item_embeddings, self.concat_0((pos_item_id, pos_items)), 0)
|
||||
all_pos_item_embed = self.gather(self.item_embed, self.concat_0((pos_item_id, pos_items)), 0)
|
||||
|
||||
i_self_matrix_at_layers = self.gather(self.item_embeddings, i_group_nodes, 0)
|
||||
i_neigh_matrix_at_layers = self.gather(self.user_embeddings, i_neighs, 0)
|
||||
i_self_matrix_at_layers = self.gather(self.item_embed, i_group_nodes, 0)
|
||||
i_neigh_matrix_at_layers = self.gather(self.user_embed, i_neighs, 0)
|
||||
|
||||
i_output_mean = self.raw_agg_funcs_item(i_self_matrix_at_layers, i_neigh_matrix_at_layers)
|
||||
|
||||
i_gnew_neighs_matrix = self.gather(self.user_embeddings, i_gnew_neighs, 0)
|
||||
i_gnew_neighs_matrix = self.gather(self.user_embed, i_gnew_neighs, 0)
|
||||
i_output_from_gnew_mean = self.gnew_agg_mean(i_self_matrix_at_layers, i_gnew_neighs_matrix)
|
||||
|
||||
i_output_from_gnew_att = self.gnew_agg_item(i_self_matrix_at_layers,
|
||||
|
@ -239,14 +230,14 @@ class BGCF(nn.Cell):
|
|||
i_output = self.concat_1((i_output_mean, i_output_from_gnew_mean, i_output_from_gnew_att))
|
||||
all_pos_item_rep = self.tanh(i_output)
|
||||
|
||||
neg_item_embed = self.gather(self.item_embeddings, neg_item_id, 0)
|
||||
neg_item_embed = self.gather(self.item_embed, neg_item_id, 0)
|
||||
|
||||
neg_self_matrix_at_layers = self.gather(self.item_embeddings, neg_group_nodes, 0)
|
||||
neg_neigh_matrix_at_layers = self.gather(self.user_embeddings, neg_neighs, 0)
|
||||
neg_self_matrix_at_layers = self.gather(self.item_embed, neg_group_nodes, 0)
|
||||
neg_neigh_matrix_at_layers = self.gather(self.user_embed, neg_neighs, 0)
|
||||
|
||||
neg_output_mean = self.raw_agg_funcs_item(neg_self_matrix_at_layers, neg_neigh_matrix_at_layers)
|
||||
|
||||
neg_gnew_neighs_matrix = self.gather(self.user_embeddings, neg_gnew_neighs, 0)
|
||||
neg_gnew_neighs_matrix = self.gather(self.user_embed, neg_gnew_neighs, 0)
|
||||
neg_output_from_gnew_mean = self.gnew_agg_mean(neg_self_matrix_at_layers, neg_gnew_neighs_matrix)
|
||||
|
||||
neg_output_from_gnew_att = self.gnew_agg_item(neg_self_matrix_at_layers,
|
||||
|
|
|
@ -80,14 +80,14 @@ class GNNFeatureTransform(nn.Cell):
|
|||
weight_init.shape[1] != in_channels:
|
||||
raise ValueError("weight_init shape error")
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
|
||||
|
||||
if self.has_bias:
|
||||
if isinstance(bias_init, Tensor):
|
||||
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
|
||||
raise ValueError("bias_init shape error")
|
||||
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
@ -280,7 +280,7 @@ class AttentionHead(nn.Cell):
|
|||
self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio)
|
||||
self.matmul = P.MatMul()
|
||||
self.bias_add = P.BiasAdd()
|
||||
self.bias = Parameter(initializer('zeros', self.out_channel), name='bias')
|
||||
self.bias = Parameter(initializer('zeros', self.out_channel))
|
||||
self.residual = residual
|
||||
if self.residual:
|
||||
if in_channel != out_channel:
|
||||
|
|
|
@ -80,8 +80,8 @@ class BertPretrainEva(nn.Cell):
|
|||
self.equal = P.Equal()
|
||||
self.mean = P.ReduceMean()
|
||||
self.sum = P.ReduceSum()
|
||||
self.total = Parameter(Tensor([0], mstype.float32), name='total')
|
||||
self.acc = Parameter(Tensor([0], mstype.float32), name='acc')
|
||||
self.total = Parameter(Tensor([0], mstype.float32))
|
||||
self.acc = Parameter(Tensor([0], mstype.float32))
|
||||
self.reshape = P.Reshape()
|
||||
self.shape = P.Shape()
|
||||
self.cast = P.Cast()
|
||||
|
|
|
@ -52,7 +52,7 @@ class CRF(nn.Cell):
|
|||
transitions = np.random.normal(size=(self.target_size, self.target_size)).astype(np.float32)
|
||||
transitions[tag_to_index[self.START_TAG], :] = -10000
|
||||
transitions[:, tag_to_index[self.STOP_TAG]] = -10000
|
||||
self.transitions = Parameter(Tensor(transitions), name="transition_matrix")
|
||||
self.transitions = Parameter(Tensor(transitions))
|
||||
self.cat = P.Concat(axis=-1)
|
||||
self.argmax = P.ArgMaxWithValue(axis=-1)
|
||||
self.log = P.Log()
|
||||
|
|
|
@ -90,8 +90,7 @@ class BertFinetuneCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
def construct(self,
|
||||
input_ids,
|
||||
|
@ -185,8 +184,8 @@ class BertSquadCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
def construct(self,
|
||||
input_ids,
|
||||
input_mask,
|
||||
|
@ -306,9 +305,9 @@ class BertSquad(nn.Cell):
|
|||
self.num_labels = num_labels
|
||||
self.seq_length = config.seq_length
|
||||
self.is_training = is_training
|
||||
self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
|
||||
self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
|
||||
self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
|
||||
self.total_num = Parameter(Tensor([0], mstype.float32))
|
||||
self.start_num = Parameter(Tensor([0], mstype.float32))
|
||||
self.end_num = Parameter(Tensor([0], mstype.float32))
|
||||
self.sum = P.ReduceSum()
|
||||
self.equal = P.Equal()
|
||||
self.argmax = P.ArgMaxWithValue(axis=1)
|
||||
|
|
|
@ -84,8 +84,7 @@ class GetMaskedLMOutput(nn.Cell):
|
|||
self.output_bias = Parameter(
|
||||
initializer(
|
||||
'zero',
|
||||
config.vocab_size),
|
||||
name='output_bias')
|
||||
config.vocab_size))
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.log_softmax = nn.LogSoftmax(axis=-1)
|
||||
self.shape_flat_offsets = (-1, 1)
|
||||
|
@ -359,8 +358,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
@ -465,10 +463,10 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
|
|||
self.enable_global_norm = enable_global_norm
|
||||
self.one = Tensor(np.array([1]).astype(np.int32))
|
||||
self.zero = Tensor(np.array([0]).astype(np.int32))
|
||||
self.local_step = Parameter(initializer(0, [1], mstype.int32), name="local_step")
|
||||
self.local_step = Parameter(initializer(0, [1], mstype.int32))
|
||||
self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros')
|
||||
self.accu_overflow = Parameter(initializer(0, [1], mstype.int32), name="accu_overflow")
|
||||
self.loss = Parameter(initializer(0, [1], mstype.float32), name="accu_loss")
|
||||
self.accu_overflow = Parameter(initializer(0, [1], mstype.int32))
|
||||
self.accu_loss = Parameter(initializer(0, [1], mstype.float32))
|
||||
|
||||
self.grad = C.GradOperation(get_by_list=True, sens_param=True)
|
||||
self.reducer_flag = False
|
||||
|
@ -499,8 +497,7 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
@ -529,8 +526,8 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
|
|||
# update accumulation parameters
|
||||
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
|
||||
self.local_step = self.select(is_accu_step, self.local_step + self.one, self.one)
|
||||
self.loss = self.select(is_accu_step, self.loss + loss, loss)
|
||||
mean_loss = self.loss / self.local_step
|
||||
self.accu_loss = self.select(is_accu_step, self.accu_loss + loss, loss)
|
||||
mean_loss = self.accu_loss / self.local_step
|
||||
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
|
||||
|
||||
# alloc status and clear should be right before gradoperation
|
||||
|
|
|
@ -110,8 +110,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[vocab_size, embedding_size]),
|
||||
name='embedding_table')
|
||||
[vocab_size, embedding_size]))
|
||||
self.expand = P.ExpandDims()
|
||||
self.shape_flat = (-1,)
|
||||
self.gather = P.GatherV2()
|
||||
|
@ -170,8 +169,7 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[token_type_vocab_size,
|
||||
embedding_size]),
|
||||
name='embedding_table')
|
||||
embedding_size]))
|
||||
|
||||
self.shape_flat = (-1,)
|
||||
self.one_hot = P.OneHot()
|
||||
|
@ -188,8 +186,7 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
self.full_position_embeddings = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[max_position_embeddings,
|
||||
embedding_size]),
|
||||
name='full_position_embeddings')
|
||||
embedding_size]))
|
||||
|
||||
def construct(self, token_type_ids, word_embeddings):
|
||||
"""Postprocessors apply positional and token type embeddings to word embeddings."""
|
||||
|
@ -314,8 +311,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
|
|||
|
||||
self.embeddings_table = Parameter(
|
||||
initializer(TruncatedNormal(initializer_range),
|
||||
[self.vocab_size, self.depth]),
|
||||
name='embeddings_for_position')
|
||||
[self.vocab_size, self.depth]))
|
||||
|
||||
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
|
||||
max_relative_position=max_relative_position)
|
||||
|
|
|
@ -86,8 +86,8 @@ class BertPretrainEva(nn.Cell):
|
|||
self.equal = P.Equal()
|
||||
self.mean = P.ReduceMean()
|
||||
self.sum = P.ReduceSum()
|
||||
self.total = Parameter(Tensor([0], mstype.float32), name='total')
|
||||
self.acc = Parameter(Tensor([0], mstype.float32), name='acc')
|
||||
self.total = Parameter(Tensor([0], mstype.float32))
|
||||
self.acc = Parameter(Tensor([0], mstype.float32))
|
||||
self.reshape = P.Reshape()
|
||||
self.shape = P.Shape()
|
||||
self.cast = P.Cast()
|
||||
|
|
|
@ -98,8 +98,7 @@ class GetMaskedLMOutput(nn.Cell):
|
|||
self.output_bias = Parameter(
|
||||
initializer(
|
||||
'zero',
|
||||
config.vocab_size),
|
||||
name='output_bias')
|
||||
config.vocab_size))
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.log_softmax = nn.LogSoftmax(axis=-1)
|
||||
self.shape_flat_offsets = (-1, 1)
|
||||
|
@ -379,8 +378,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
|
|
@ -136,8 +136,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[vocab_size, embedding_size]),
|
||||
name='embedding_table')
|
||||
[vocab_size, embedding_size]))
|
||||
self.expand = P.ExpandDims()
|
||||
self.shape_flat = (-1,)
|
||||
self.gather = P.GatherV2()
|
||||
|
@ -200,7 +199,6 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
embedding_shape=embedding_shape,
|
||||
use_one_hot_embeddings=use_one_hot_embeddings,
|
||||
initializer_range=initializer_range,
|
||||
name='embedding_table',
|
||||
batch_size=batch_size,
|
||||
damping=damping,
|
||||
loss_scale=loss_scale,
|
||||
|
@ -224,7 +222,6 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
embedding_shape=position_embedding_shape,
|
||||
use_one_hot_embeddings=use_one_hot_embeddings,
|
||||
initializer_range=initializer_range,
|
||||
name='full_position_embeddings',
|
||||
batch_size=batch_size,
|
||||
damping=damping,
|
||||
loss_scale=loss_scale,
|
||||
|
@ -363,8 +360,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
|
|||
|
||||
self.embeddings_table = Parameter(
|
||||
initializer(TruncatedNormal(initializer_range),
|
||||
[self.vocab_size, self.depth]),
|
||||
name='embeddings_for_position')
|
||||
[self.vocab_size, self.depth]))
|
||||
|
||||
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
|
||||
max_relative_position=max_relative_position)
|
||||
|
@ -944,7 +940,6 @@ class BertModel(nn.Cell):
|
|||
embedding_shape=output_embedding_shape,
|
||||
use_one_hot_embeddings=use_one_hot_embeddings,
|
||||
initializer_range=config.initializer_range,
|
||||
name='embedding_table',
|
||||
batch_size=batch_size,
|
||||
damping=damping,
|
||||
loss_scale=loss_scale,
|
||||
|
|
|
@ -94,9 +94,9 @@ class FusedLayerNorm(Cell):
|
|||
self.begin_norm_axis = begin_norm_axis
|
||||
self.begin_params_axis = begin_params_axis
|
||||
self.gamma = Parameter(initializer(
|
||||
gamma_init, normalized_shape), name="gamma")
|
||||
gamma_init, normalized_shape))
|
||||
self.beta = Parameter(initializer(
|
||||
beta_init, normalized_shape), name="beta")
|
||||
beta_init, normalized_shape))
|
||||
self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis)
|
||||
|
||||
self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5)
|
||||
|
|
|
@ -52,7 +52,7 @@ class THOR(Optimizer):
|
|||
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
|
||||
if isinstance(momentum, float) and momentum < 0.0:
|
||||
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32))
|
||||
self.params = self.parameters
|
||||
self.moments = self.params.clone(prefix="moments", init='zeros')
|
||||
self.hyper_map = C.HyperMap()
|
||||
|
@ -80,7 +80,7 @@ class THOR(Optimizer):
|
|||
self.batch_size = batch_size
|
||||
self.damping = damping
|
||||
self.one = Tensor(1, mstype.int32)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
|
||||
def construct(self, gradients):
|
||||
"""construct of THOR"""
|
||||
|
|
|
@ -54,7 +54,7 @@ class THOR(Optimizer):
|
|||
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
|
||||
if isinstance(momentum, float) and momentum < 0.0:
|
||||
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
|
||||
self.momentum = Parameter(Tensor(momentum, mstype.float32))
|
||||
self.params = self.parameters
|
||||
self.moments = self.params.clone(prefix="moments", init='zeros')
|
||||
self.hyper_map = C.HyperMap()
|
||||
|
@ -82,7 +82,7 @@ class THOR(Optimizer):
|
|||
self.batch_size = batch_size
|
||||
self.damping = damping
|
||||
self.one = Tensor(1, mstype.int32)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
mean = _get_gradients_mean()
|
||||
degree = _get_device_num()
|
||||
self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree)
|
||||
|
|
|
@ -41,7 +41,6 @@ class Embedding_Thor(Cell):
|
|||
embedding_shape,
|
||||
use_one_hot_embeddings=False,
|
||||
initializer_range=0.02,
|
||||
name='embedding_table',
|
||||
batch_size=12,
|
||||
damping=0.03,
|
||||
loss_scale=1,
|
||||
|
@ -52,8 +51,7 @@ class Embedding_Thor(Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[vocab_size, embedding_size]),
|
||||
name=name)
|
||||
[vocab_size, embedding_size]))
|
||||
self.thor = True
|
||||
self.expand = P.ExpandDims()
|
||||
self.shape_flat = (-1,)
|
||||
|
@ -67,14 +65,13 @@ class Embedding_Thor(Cell):
|
|||
self.shape = P.Shape()
|
||||
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
|
||||
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)),
|
||||
name="matrix_G_inv", requires_grad=False)
|
||||
requires_grad=False)
|
||||
self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16))
|
||||
self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32))
|
||||
self.dampingG = Tensor(np.identity(embedding_size), mstype.float32)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.freq = Tensor(frequency, mstype.int32)
|
||||
self.axis = 0
|
||||
self.damping = damping
|
||||
|
@ -169,14 +166,14 @@ class Dense_Thor(Cell):
|
|||
weight_init.shape()[1] != in_channels:
|
||||
raise ValueError("weight_init shape error")
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
|
||||
|
||||
if self.has_bias:
|
||||
if isinstance(bias_init, Tensor):
|
||||
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
|
||||
raise ValueError("bias_init shape error")
|
||||
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
@ -184,9 +181,9 @@ class Dense_Thor(Cell):
|
|||
self.activation = get_activation(activation)
|
||||
self.activation_flag = self.activation is not None
|
||||
self.matrix_A_inv = Parameter(Tensor(np.zeros([in_channels, in_channels]).astype(np.float16)),
|
||||
name='matrix_A_inv', requires_grad=False)
|
||||
requires_grad=False)
|
||||
self.matrix_G_inv = Parameter(Tensor(np.zeros([out_channels, out_channels]).astype(np.float16)),
|
||||
name="matrix_G_inv", requires_grad=False)
|
||||
requires_grad=False)
|
||||
self.fake_G = Tensor(np.zeros([out_channels, out_channels]).astype(np.float16))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
|
@ -196,7 +193,7 @@ class Dense_Thor(Cell):
|
|||
self.shape = P.Shape()
|
||||
self.reshape = P.Reshape()
|
||||
self.transpose = P.Transpose()
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
|
||||
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
|
||||
self.mul = P.Mul()
|
||||
self.cast = P.Cast()
|
||||
self.damping = damping
|
||||
|
|
|
@ -57,11 +57,10 @@ class BahdanauAttention(nn.Cell):
|
|||
self.normalize = normalize
|
||||
self.num_units = num_units
|
||||
self.linear_att = Parameter(Tensor(np.random.uniform(-initializer_range, initializer_range, size=[num_units]),
|
||||
dtype=mstype.float32), name='linear_att')
|
||||
dtype=mstype.float32))
|
||||
if self.normalize:
|
||||
self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32),
|
||||
name='normalize_scalar')
|
||||
self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32), name='normalize_bias')
|
||||
self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32))
|
||||
self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32))
|
||||
self.transpose = P.Transpose()
|
||||
self.transpose_orders = (1, 0, 2)
|
||||
self.shape_op = P.Shape()
|
||||
|
|
|
@ -49,10 +49,10 @@ class DynamicRNNCell(nn.Cell):
|
|||
# w
|
||||
dynamicRNN_w = np.random.uniform(-initializer_range, initializer_range,
|
||||
size=[self.input_size + self.hidden_size, 4 * self.hidden_size])
|
||||
self.dynamicRNN_w = Parameter(Tensor(dynamicRNN_w, mstype.float32), name='weight')
|
||||
self.dynamicRNN_w = Parameter(Tensor(dynamicRNN_w, mstype.float32))
|
||||
# b
|
||||
dynamicRNN_b = np.random.uniform(-initializer_range, initializer_range, size=[4 * self.hidden_size])
|
||||
self.dynamicRNN_b = Parameter(Tensor(dynamicRNN_b, mstype.float32), name='bias')
|
||||
self.dynamicRNN_b = Parameter(Tensor(dynamicRNN_b, mstype.float32))
|
||||
|
||||
self.dynamicRNN_h = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)
|
||||
self.dynamicRNN_c = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)
|
||||
|
|
|
@ -48,8 +48,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
|
||||
init_weight = np.random.normal(-initializer_range, initializer_range, size=[vocab_size, embed_dim])
|
||||
self.embedding_table = Parameter(Tensor(init_weight, mstype.float32),
|
||||
name='embedding_table')
|
||||
self.embedding_table = Parameter(Tensor(init_weight, mstype.float32))
|
||||
self.expand = P.ExpandDims()
|
||||
self.gather = P.GatherV2()
|
||||
self.one_hot = P.OneHot()
|
||||
|
|
|
@ -253,8 +253,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
self.add_flags(has_effect=True)
|
||||
|
||||
self.loss_scalar = P.ScalarSummary()
|
||||
|
|
|
@ -217,8 +217,8 @@ class Adam(Optimizer):
|
|||
|
||||
self.beta1 = Tensor(beta1, mstype.float32)
|
||||
self.beta2 = Tensor(beta2, mstype.float32)
|
||||
self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
|
||||
self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power")
|
||||
self.beta1_power = Parameter(initializer(1, [1], mstype.float32))
|
||||
self.beta2_power = Parameter(initializer(1, [1], mstype.float32))
|
||||
self.eps = eps
|
||||
|
||||
self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
|
||||
|
@ -377,7 +377,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
|
|||
_check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
|
||||
_check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name)
|
||||
# turn them to scalar when me support scalar/tensor mix operations
|
||||
self.global_step = Parameter(initializer(0, [1]), name="global_step")
|
||||
self.global_step = Parameter(initializer(0, [1]))
|
||||
self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32))
|
||||
self.warmup_flag = False
|
||||
if warmup_steps > 0:
|
||||
|
|
|
@ -41,8 +41,8 @@ class LayerNorm(nn.Cell):
|
|||
"""
|
||||
def __init__(self, normalized_shape, eps=1e-5):
|
||||
super(LayerNorm, self).__init__()
|
||||
self.gamma = Parameter(initializer('ones', normalized_shape), name="gamma")
|
||||
self.beta = Parameter(initializer('zeros', normalized_shape), name="beta")
|
||||
self.gamma = Parameter(initializer('ones', normalized_shape))
|
||||
self.beta = Parameter(initializer('zeros', normalized_shape))
|
||||
self.mean = P.ReduceMean(keep_dims=True)
|
||||
self.eps = eps
|
||||
|
||||
|
@ -100,8 +100,8 @@ class Mapping(nn.Cell):
|
|||
super(Mapping, self).__init__()
|
||||
self.output_size = output_size
|
||||
self.input_size = input_size
|
||||
self.weight = Parameter(initializer(Normal(sigma=0.02*scale), [input_size, output_size]), name="mapping_weight")
|
||||
self.bias = Parameter(initializer("zeros", [output_size,]), name="mapping_bias")
|
||||
self.weight = Parameter(initializer(Normal(sigma=0.02*scale), [input_size, output_size]))
|
||||
self.bias = Parameter(initializer("zeros", [output_size,]))
|
||||
self.dtype = dtype
|
||||
self.cast = P.Cast()
|
||||
|
||||
|
@ -194,8 +194,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
super(EmbeddingLookup, self).__init__()
|
||||
self.vocab_size = config.vocab_size
|
||||
self.embedding_size = config.embedding_size
|
||||
self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]),
|
||||
name="embedding_table")
|
||||
self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]))
|
||||
self.gather = P.GatherV2()
|
||||
self.shape = (-1, config.seq_length, config.embedding_size)
|
||||
def construct(self, input_ids):
|
||||
|
|
|
@ -106,8 +106,7 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
|
|
@ -44,8 +44,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
|
||||
# 0 is Padding index, thus init it as 0.
|
||||
init_weight[0, :] = 0
|
||||
self.embedding_table = Parameter(Tensor(init_weight),
|
||||
name='embedding_table')
|
||||
self.embedding_table = Parameter(Tensor(init_weight))
|
||||
self.expand = P.ExpandDims()
|
||||
self.gather = P.GatherV2()
|
||||
self.one_hot = P.OneHot()
|
||||
|
|
|
@ -277,8 +277,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
self.add_flags(has_effect=True)
|
||||
|
||||
def construct(self,
|
||||
|
|
|
@ -44,8 +44,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
|
||||
# 0 is Padding index, thus init it as 0.
|
||||
init_weight[0, :] = 0
|
||||
self.embedding_table = Parameter(Tensor(init_weight),
|
||||
name='embedding_table')
|
||||
self.embedding_table = Parameter(Tensor(init_weight))
|
||||
self.expand = P.ExpandDims()
|
||||
self.gather = P.GatherV2()
|
||||
self.one_hot = P.OneHot()
|
||||
|
|
|
@ -243,8 +243,7 @@ class BertTrainWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
@ -497,8 +496,7 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
|
|
@ -110,8 +110,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[vocab_size, embedding_size]),
|
||||
name='embedding_table')
|
||||
[vocab_size, embedding_size]))
|
||||
self.expand = P.ExpandDims()
|
||||
self.shape_flat = (-1,)
|
||||
self.gather = P.GatherV2()
|
||||
|
@ -170,8 +169,7 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
self.embedding_table = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[token_type_vocab_size,
|
||||
embedding_size]),
|
||||
name='embedding_table')
|
||||
embedding_size]))
|
||||
self.shape_flat = (-1,)
|
||||
self.one_hot = P.OneHot()
|
||||
self.on_value = Tensor(1.0, mstype.float32)
|
||||
|
@ -187,8 +185,7 @@ class EmbeddingPostprocessor(nn.Cell):
|
|||
self.full_position_embeddings = Parameter(initializer
|
||||
(TruncatedNormal(initializer_range),
|
||||
[max_position_embeddings,
|
||||
embedding_size]),
|
||||
name='full_position_embeddings')
|
||||
embedding_size]))
|
||||
|
||||
def construct(self, token_type_ids, word_embeddings):
|
||||
"""embedding postprocessor"""
|
||||
|
@ -317,8 +314,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
|
|||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embeddings_table = Parameter(
|
||||
initializer(TruncatedNormal(initializer_range),
|
||||
[self.vocab_size, self.depth]),
|
||||
name='embeddings_for_position')
|
||||
[self.vocab_size, self.depth]))
|
||||
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
|
||||
max_relative_position=max_relative_position)
|
||||
self.reshape = P.Reshape()
|
||||
|
|
|
@ -291,8 +291,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
|
|||
self.loss_scale = None
|
||||
self.loss_scaling_manager = scale_update_cell
|
||||
if scale_update_cell:
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
|
||||
name="loss_scale")
|
||||
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
|
||||
|
||||
@C.add_flags(has_effect=True)
|
||||
def construct(self,
|
||||
|
|
|
@ -115,8 +115,7 @@ class EmbeddingLookup(nn.Cell):
|
|||
self.vocab_size = vocab_size
|
||||
self.embedding_size = embedding_size
|
||||
self.use_one_hot_embeddings = use_one_hot_embeddings
|
||||
self.embedding_table = Parameter(normal_weight([vocab_size, embedding_size], embedding_size),
|
||||
name='embedding_table')
|
||||
self.embedding_table = Parameter(normal_weight([vocab_size, embedding_size], embedding_size))
|
||||
self.expand = P.ExpandDims()
|
||||
self.shape_flat = (-1,)
|
||||
self.gather = P.GatherV2()
|
||||
|
|
|
@ -47,14 +47,14 @@ class DenseLayer(nn.Cell):
|
|||
weight_init.shape()[1] != in_channels:
|
||||
raise ValueError("weight_init shape error")
|
||||
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
|
||||
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
|
||||
|
||||
if self.has_bias:
|
||||
if isinstance(bias_init, Tensor):
|
||||
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
|
||||
raise ValueError("bias_init shape error")
|
||||
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
|
||||
self.bias = Parameter(initializer(bias_init, [out_channels]))
|
||||
|
||||
self.matmul = P.MatMul(transpose_b=True)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
|
|
@ -35,10 +35,10 @@ class QuanConv(nn.Conv2d):
|
|||
self.x_upper_bound = Tensor(2 ** 8 - 1, ms.float32)
|
||||
self.w_lower_bound = Tensor(-2 ** 7 - 1, ms.float32)
|
||||
self.w_upper_bound = Tensor(2 ** 7, ms.float32)
|
||||
self.scale_a = Parameter(initializer('ones', [1]), name='scale_a')
|
||||
self.scale_a = Parameter(initializer('ones', [1]))
|
||||
self.scale_w = Parameter(initializer(
|
||||
'ones', [out_channels]), name='scale_w')
|
||||
self.zp_a = Parameter(initializer('ones', [1]), name='zp_a')
|
||||
'ones', [out_channels]))
|
||||
self.zp_a = Parameter(initializer('ones', [1]))
|
||||
|
||||
def construct(self, in_data):
|
||||
r"""construct of QuantConv"""
|
||||
|
|
|
@ -119,12 +119,12 @@ class DepthwiseConv(nn.Cell):
|
|||
self.bias_add = P.BiasAdd()
|
||||
weight_shape = [channel_multiplier, in_planes, *self.kernel_size]
|
||||
self.weight = Parameter(initializer(
|
||||
'ones', weight_shape), name='weight')
|
||||
'ones', weight_shape))
|
||||
|
||||
if has_bias:
|
||||
bias_shape = [channel_multiplier * in_planes]
|
||||
self.bias = Parameter(initializer(
|
||||
'zeros', bias_shape), name='bias')
|
||||
'zeros', bias_shape))
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
|
|
|
@ -499,7 +499,7 @@ class DepthWiseConv(nn.Cell):
|
|||
group=in_planes)
|
||||
|
||||
self.weight = Parameter(initializer(weight_init,
|
||||
[in_planes*1, 1, kernel_size, kernel_size]), name='depthwise_weight')
|
||||
[in_planes*1, 1, kernel_size, kernel_size]))
|
||||
|
||||
else:
|
||||
self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=1,
|
||||
|
@ -508,7 +508,7 @@ class DepthWiseConv(nn.Cell):
|
|||
pad=int(kernel_size/2))
|
||||
|
||||
self.weight = Parameter(initializer(weight_init,
|
||||
[1, in_planes, kernel_size, kernel_size]), name='depthwise_weight')
|
||||
[1, in_planes, kernel_size, kernel_size]))
|
||||
|
||||
def construct(self, x):
|
||||
x = self.depthwise_conv(x, self.weight)
|
||||
|
|
|
@ -31,11 +31,11 @@ class DepthWiseConv(nn.Cell):
|
|||
self.bias_add = P.BiasAdd()
|
||||
|
||||
weight_shape = [channel_multiplier, in_planes, kernel_size[0], kernel_size[1]]
|
||||
self.weight = Parameter(initializer('ones', weight_shape), name='weight')
|
||||
self.weight = Parameter(initializer('ones', weight_shape))
|
||||
|
||||
if has_bias:
|
||||
bias_shape = [channel_multiplier * in_planes]
|
||||
self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
|
||||
self.bias = Parameter(initializer('zeros', bias_shape))
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
|
|
Loading…
Reference in New Issue