!9257 make parameter name optional

From: @caozhou_huawei
Reviewed-by: @kingxian,@zh_qh
Signed-off-by: @kingxian
This commit is contained in:
mindspore-ci-bot 2020-12-02 14:06:18 +08:00 committed by Gitee
commit 825ba197f2
43 changed files with 174 additions and 211 deletions

View File

@ -30,6 +30,7 @@ __all__ = ['Parameter', 'ParameterTuple']
PARAMETER_NAME_DEFAULT = "Parameter"
PARAMETER_NAME_PREFIX_MAX_LEN = 1024
def _is_in_parallel_mode():
"""Get parallel mode."""
return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
@ -51,10 +52,12 @@ class Parameter(MetaTensor_):
A Parameter has to belong to a Cell.
If there is an operator in the network that requires part of the inputs to be Parameter,
then the Parameters as this part of the inputs are not allowed to be cast.
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
otherwise, the parameter name may be different than expected.
Args:
default_input (Union[Tensor, MetaTensor, Number]): Parameter data, to be set initialized.
name (str): Name of the child parameter.
name (str): Name of the child parameter. Default: None.
requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode,
broadcast and gradients communication would not be applied to parameters. Default: False.
@ -72,7 +75,7 @@ class Parameter(MetaTensor_):
>>> def __init__(self):
>>> super(Net, self).__init__()
>>> self.matmul = P.MatMul()
>>> self.weight = Parameter(Tensor(np.ones((1,2))), name="w", requires_grad=True)
>>> self.weight = Parameter(Tensor(np.ones((1,2))), requires_grad=True)
>>>
>>> def construct(self, x):
>>> out = self.matmul(self.weight, x)
@ -88,7 +91,7 @@ class Parameter(MetaTensor_):
"""
__base_type__ = {}
def __new__(cls, default_input, name, *args, **kwargs):
def __new__(cls, default_input, *args, **kwargs):
input_class, *class_init_args = Parameter._get_parameter_new_args(default_input)
new_type = Parameter._get_base_class(input_class)
obj = input_class.__new__(new_type)
@ -112,7 +115,7 @@ class Parameter(MetaTensor_):
return (
Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel))
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False):
def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False):
self._param_info = ParamInfo()
self.name = name
self.requires_grad = requires_grad
@ -276,24 +279,20 @@ class Parameter(MetaTensor_):
"""
self._is_init = is_init_
def clone(self, prefix, init='same'):
def clone(self, init='same'):
"""
Clone the parameter.
Args:
prefix (str): Namespace of parameter. The cloned Parameter name is
combined of prefix and current name: `f"{perfix}.{self.name}"`.
init (Union[Tensor, str, MetaTensor, numbers.Number]): Initialize the shape of the parameter.
Default: 'same'.
Returns:
Parameter, a new parameter.
"""
Validator.check_str_by_regular(prefix)
x = copy(self)
# pylint: disable=protected-access
x._param_info = self._param_info.clone()
x._param_info.name = prefix + '.' + self._param_info.name
x.is_init = False
x.is_param_ps = self.is_param_ps
x.init_in_server = self.init_in_server
@ -464,10 +463,25 @@ class ParameterTuple(tuple):
def __new__(cls, iterable):
"""Create instance object of ParameterTuple."""
data = tuple(iterable)
ids = set()
orders = {}
for x in data:
if not isinstance(x, Parameter):
raise TypeError(f"ParameterTuple input should be `Parameter` collection."
f"But got a {type(iterable)}, {iterable}")
if id(x) not in ids:
ids.add(id(x))
if x.name not in orders.keys():
orders[x.name] = [0, x]
else:
if isinstance(orders[x.name], list):
name = x.name
orders[name][1].name = name + "_" + str(0)
x.name = x.name + "_" + str(1)
orders[name] = 1
else:
orders[x.name] += 1
x.name = x.name + "_" + str(orders[x.name])
return tuple.__new__(ParameterTuple, tuple(data))
def clone(self, prefix, init='same'):
@ -484,7 +498,8 @@ class ParameterTuple(tuple):
Validator.check_str_by_regular(prefix)
new = []
for x in self:
x1 = x.clone(prefix, init)
x1 = x.clone(init)
x1.name = prefix + "." + x1.name
new.append(x1)
return ParameterTuple(new)

View File

@ -20,6 +20,7 @@ import os
from collections import OrderedDict
import numpy
from mindspore import log as logger
from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
from .. import context
from ..common import dtype as mstype
from ..common.api import _executor, _pynative_exec
@ -619,6 +620,8 @@ class Cell(Cell_):
raise KeyError("Duplicated parameter name '{}'.".format(param_name))
if not isinstance(param, Parameter) and param is not None:
raise TypeError("The type of parameter should be 'Parameter' if not None.")
if isinstance(param, Parameter) and param.name == PARAMETER_NAME_DEFAULT:
param.name = param_name
self._params[param_name] = param
def cast_param(self, param):

View File

@ -55,11 +55,11 @@ class DepthWiseConv(nn.Cell):
self.bias_add = P.BiasAdd()
weight_shape = [channel_multiplier, in_planes, kernel_size, kernel_size]
self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape), name='weight')
self.weight = Parameter(initializer(KaimingNormal(mode='fan_out'), weight_shape))
if has_bias:
bias_shape = [channel_multiplier * in_planes]
self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
self.bias = Parameter(initializer('zeros', bias_shape))
else:
self.bias = None

View File

@ -469,12 +469,12 @@ class DepthWiseConv(nn.Cell):
self.depthwise_conv = P.Conv2D(out_channel=in_planes * 1, kernel_size=kernel_size,
stride=stride, pad_mode="same", group=in_planes)
self.weight = Parameter(initializer(
weight_init, [in_planes * 1, 1, kernel_size, kernel_size]), name='depthwise_weight')
weight_init, [in_planes * 1, 1, kernel_size, kernel_size]))
else:
self.depthwise_conv = P.DepthwiseConv2dNative(
channel_multiplier=1, kernel_size=kernel_size, stride=stride, pad_mode='same',)
self.weight = Parameter(initializer(
weight_init, [1, in_planes, kernel_size, kernel_size]), name='depthwise_weight')
weight_init, [1, in_planes, kernel_size, kernel_size]))
def construct(self, x):
x = self.depthwise_conv(x, self.weight)

View File

@ -28,9 +28,8 @@ class DenseNoTranpose(nn.Cell):
def __init__(self, input_channels, output_channels, weight_init):
super(DenseNoTranpose, self).__init__()
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16),
name="weight")
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd()

View File

@ -26,9 +26,8 @@ class DenseNoTranpose(nn.Cell):
"""Dense method"""
def __init__(self, input_channels, output_channels, weight_init):
super(DenseNoTranpose, self).__init__()
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16),
name="weight")
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")
self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16))
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor())
self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd()

View File

@ -55,7 +55,7 @@ class THOR_GPU(Optimizer):
Validator.check_value_type("momentum", momentum, [float], self.cls_name)
if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters
self.use_nesterov = Validator.check_bool(use_nesterov)
self.moments = self.params.clone(prefix="moments", init='zeros')
@ -160,7 +160,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap()

View File

@ -109,11 +109,10 @@ class _Conv(Cell):
'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
weight_init, [out_channels, in_channels // group, *kernel_size]))
if Validator.check_bool(has_bias):
self.bias = Parameter(_initializer(
bias_init, [out_channels]), name='bias')
self.bias = Parameter(initializer(bias_init, [out_channels]))
else:
if bias_init != 'zeros':
logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
@ -174,12 +173,10 @@ class Conv2d_Thor_GPU(_Conv):
split_dim = 128
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim)
self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32),
name='matrix_A_inv', requires_grad=False)
self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32),
name='matrix_A_inv', requires_grad=False)
self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32), requires_grad=False)
self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32), requires_grad=False)
self.broadcast_to = P.BroadcastTo(matrix_A_shape)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same")
self.matmul = P.MatMul(transpose_b=True)
self.shape = P.Shape()
@ -195,7 +192,7 @@ class Conv2d_Thor_GPU(_Conv):
self.axis = 0
self.sqrt = P.Sqrt()
self.reduce_mean = P.ReduceMean(keep_dims=False)
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
self.damping = Parameter(Tensor(damping), requires_grad=False)
self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
@ -301,14 +298,14 @@ class Dense_Thor_GPU(Cell):
weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()
@ -317,12 +314,10 @@ class Dense_Thor_GPU(Cell):
self.activation_flag = self.activation is not None
split_dim = 128
matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)),
name='matrix_A_inv', requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)),
name="matrix_G_inv", requires_grad=False)
self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
self.broadcast_to = P.BroadcastTo(matrix_A_shape)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.shape = P.Shape()
self.reshape = P.Reshape()
self.transpose = P.Transpose()
@ -331,7 +326,7 @@ class Dense_Thor_GPU(Cell):
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.batch_size = Tensor(batch_size, mstype.float16)
self.getG = P.InsertGradientOf(self.save_gradient)
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
self.damping = Parameter(Tensor(damping), requires_grad=False)
self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
self.cast = P.Cast()
@ -467,20 +462,20 @@ class Conv2d_Thor(_Conv):
self.matrix_G_device_shape[3])
self.matrix_A_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
name='matrix_A_inv', requires_grad=False)
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
requires_grad=False)
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.matrix_G_inv = Parameter(
Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
name="matrix_G_inv", requires_grad=False)
requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.fake_G = Tensor(
np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))
self.shape = P.Shape()
self.reshape = P.Reshape()
self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul()
self.cast = P.Cast()
self.damping = Tensor(damping)
@ -648,14 +643,14 @@ class Dense_Thor(Cell):
weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()
@ -663,10 +658,8 @@ class Dense_Thor(Cell):
self.activation = get_activation(activation)
self.activation_flag = self.activation is not None
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
requires_grad=False)
self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), requires_grad=False)
self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True)
@ -676,7 +669,7 @@ class Dense_Thor(Cell):
self.shape = P.Shape()
self.reshape = P.Reshape()
self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul()
self.cast = P.Cast()
self.damping = Tensor(damping)
@ -689,8 +682,8 @@ class Dense_Thor(Cell):
self.assignadd = P.AssignAdd()
self.freq = Tensor(frequency, mstype.int32)
self.axis = 0
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), requires_grad=False)
self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001])
self.fused_abs_max2 = P.CusFusedAbsMax1()
self.log = P.Log()

View File

@ -33,13 +33,12 @@ class CTCLoss(_Loss):
def __init__(self, max_sequence_length, max_label_length, batch_size):
super(CTCLoss, self).__init__()
self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32),
name="sequence_length")
self.sequence_length = Parameter(Tensor(np.array([max_sequence_length] * batch_size), mstype.int32))
labels_indices = []
for i in range(batch_size):
for j in range(max_label_length):
labels_indices.append([i, j])
self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64), name="labels_indices")
self.labels_indices = Parameter(Tensor(np.array(labels_indices), mstype.int64))
self.reshape = P.Reshape()
self.ctc_loss = P.CTCLoss(ctc_merge_repeated=True)

View File

@ -45,12 +45,10 @@ class StackedRNN(nn.Cell):
self.rnn1 = P.DynamicRNN(forget_bias=0.0)
self.rnn2 = P.DynamicRNN(forget_bias=0.0)
self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16),
name="w1")
self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16),
name="w2")
self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b1")
self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16), name="b2")
self.w1 = Parameter(np.random.uniform(-k, k, (input_size + hidden_size, 4 * hidden_size)).astype(np.float16))
self.w2 = Parameter(np.random.uniform(-k, k, (hidden_size + hidden_size, 4 * hidden_size)).astype(np.float16))
self.b1 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
self.b2 = Parameter(np.random.uniform(-k, k, (4 * hidden_size)).astype(np.float16))
self.h1 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
self.h2 = Tensor(np.zeros(shape=(1, batch_size, hidden_size)).astype(np.float16))
@ -98,7 +96,7 @@ class StackedRNNForGPU(nn.Cell):
self.cast = P.Cast()
k = (1 / hidden_size) ** 0.5
weight_shape = 4 * hidden_size * (input_size + 3 * hidden_size + 4)
self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32), name='weight')
self.weight = Parameter(np.random.uniform(-k, k, (weight_shape, 1, 1)).astype(np.float32))
self.h = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))
self.c = Tensor(np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32))

View File

@ -39,7 +39,6 @@ class MeanConv(nn.Cell):
"""
def __init__(self,
name,
feature_in_dim,
feature_out_dim,
activation,
@ -47,8 +46,7 @@ class MeanConv(nn.Cell):
super(MeanConv, self).__init__()
self.out_weight = Parameter(
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32),
name=name + 'out_weight')
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
if activation == "tanh":
self.act = P.Tanh()
@ -90,15 +88,13 @@ class AttenConv(nn.Cell):
"""
def __init__(self,
name,
feature_in_dim,
feature_out_dim,
dropout=0.2):
super(AttenConv, self).__init__()
self.out_weight = Parameter(
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32),
name=name + 'out_weight')
initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim], dtype=mstype.float32))
self.cast = P.Cast()
self.squeeze = P.Squeeze(1)
self.concat = P.Concat(axis=1)
@ -147,10 +143,8 @@ class BGCF(nn.Cell):
input_dim):
super(BGCF, self).__init__()
self.user_embeddings = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32),
name='user_embed')
self.item_embeddings = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32),
name='item_embed')
self.user_embed = Parameter(initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32))
self.item_embed = Parameter(initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32))
self.cast = P.Cast()
self.tanh = P.Tanh()
self.shape = P.Shape()
@ -163,30 +157,27 @@ class BGCF(nn.Cell):
(self.input_dim, self.num_user, self.num_item) = dataset_argv
self.layer_dim = architect_argv
self.gnew_agg_mean = MeanConv('gnew_agg_mean', self.input_dim, self.layer_dim,
self.gnew_agg_mean = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[1])
self.gnew_agg_mean.to_float(mstype.float16)
self.gnew_agg_user = AttenConv('gnew_agg_att_user', self.input_dim,
self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_user = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_user.to_float(mstype.float16)
self.gnew_agg_item = AttenConv('gnew_agg_att_item', self.input_dim,
self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_item = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2])
self.gnew_agg_item.to_float(mstype.float16)
self.user_feature_dim = self.input_dim
self.item_feature_dim = self.input_dim
self.final_weight = Parameter(
initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32),
name='final_weight')
initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32))
self.raw_agg_funcs_user = MeanConv('raw_agg_user', self.input_dim, self.layer_dim,
self.raw_agg_funcs_user = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[0])
self.raw_agg_funcs_user.to_float(mstype.float16)
self.raw_agg_funcs_item = MeanConv('raw_agg_item', self.input_dim, self.layer_dim,
self.raw_agg_funcs_item = MeanConv(self.input_dim, self.layer_dim,
activation=activation, dropout=neigh_drop_rate[0])
self.raw_agg_funcs_item.to_float(mstype.float16)
@ -207,14 +198,14 @@ class BGCF(nn.Cell):
neg_gnew_neighs,
neg_item_num):
"""Aggregate user and item embeddings"""
all_user_embed = self.gather(self.user_embeddings, self.concat_0((u_id, pos_users)), 0)
all_user_embed = self.gather(self.user_embed, self.concat_0((u_id, pos_users)), 0)
u_self_matrix_at_layers = self.gather(self.user_embeddings, u_group_nodes, 0)
u_neigh_matrix_at_layers = self.gather(self.item_embeddings, u_neighs, 0)
u_self_matrix_at_layers = self.gather(self.user_embed, u_group_nodes, 0)
u_neigh_matrix_at_layers = self.gather(self.item_embed, u_neighs, 0)
u_output_mean = self.raw_agg_funcs_user(u_self_matrix_at_layers, u_neigh_matrix_at_layers)
u_gnew_neighs_matrix = self.gather(self.item_embeddings, u_gnew_neighs, 0)
u_gnew_neighs_matrix = self.gather(self.item_embed, u_gnew_neighs, 0)
u_output_from_gnew_mean = self.gnew_agg_mean(u_self_matrix_at_layers, u_gnew_neighs_matrix)
u_output_from_gnew_att = self.gnew_agg_user(u_self_matrix_at_layers,
@ -223,14 +214,14 @@ class BGCF(nn.Cell):
u_output = self.concat_1((u_output_mean, u_output_from_gnew_mean, u_output_from_gnew_att))
all_user_rep = self.tanh(u_output)
all_pos_item_embed = self.gather(self.item_embeddings, self.concat_0((pos_item_id, pos_items)), 0)
all_pos_item_embed = self.gather(self.item_embed, self.concat_0((pos_item_id, pos_items)), 0)
i_self_matrix_at_layers = self.gather(self.item_embeddings, i_group_nodes, 0)
i_neigh_matrix_at_layers = self.gather(self.user_embeddings, i_neighs, 0)
i_self_matrix_at_layers = self.gather(self.item_embed, i_group_nodes, 0)
i_neigh_matrix_at_layers = self.gather(self.user_embed, i_neighs, 0)
i_output_mean = self.raw_agg_funcs_item(i_self_matrix_at_layers, i_neigh_matrix_at_layers)
i_gnew_neighs_matrix = self.gather(self.user_embeddings, i_gnew_neighs, 0)
i_gnew_neighs_matrix = self.gather(self.user_embed, i_gnew_neighs, 0)
i_output_from_gnew_mean = self.gnew_agg_mean(i_self_matrix_at_layers, i_gnew_neighs_matrix)
i_output_from_gnew_att = self.gnew_agg_item(i_self_matrix_at_layers,
@ -239,14 +230,14 @@ class BGCF(nn.Cell):
i_output = self.concat_1((i_output_mean, i_output_from_gnew_mean, i_output_from_gnew_att))
all_pos_item_rep = self.tanh(i_output)
neg_item_embed = self.gather(self.item_embeddings, neg_item_id, 0)
neg_item_embed = self.gather(self.item_embed, neg_item_id, 0)
neg_self_matrix_at_layers = self.gather(self.item_embeddings, neg_group_nodes, 0)
neg_neigh_matrix_at_layers = self.gather(self.user_embeddings, neg_neighs, 0)
neg_self_matrix_at_layers = self.gather(self.item_embed, neg_group_nodes, 0)
neg_neigh_matrix_at_layers = self.gather(self.user_embed, neg_neighs, 0)
neg_output_mean = self.raw_agg_funcs_item(neg_self_matrix_at_layers, neg_neigh_matrix_at_layers)
neg_gnew_neighs_matrix = self.gather(self.user_embeddings, neg_gnew_neighs, 0)
neg_gnew_neighs_matrix = self.gather(self.user_embed, neg_gnew_neighs, 0)
neg_output_from_gnew_mean = self.gnew_agg_mean(neg_self_matrix_at_layers, neg_gnew_neighs_matrix)
neg_output_from_gnew_att = self.gnew_agg_item(neg_self_matrix_at_layers,

View File

@ -80,14 +80,14 @@ class GNNFeatureTransform(nn.Cell):
weight_init.shape[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()
@ -280,7 +280,7 @@ class AttentionHead(nn.Cell):
self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio)
self.matmul = P.MatMul()
self.bias_add = P.BiasAdd()
self.bias = Parameter(initializer('zeros', self.out_channel), name='bias')
self.bias = Parameter(initializer('zeros', self.out_channel))
self.residual = residual
if self.residual:
if in_channel != out_channel:

View File

@ -80,8 +80,8 @@ class BertPretrainEva(nn.Cell):
self.equal = P.Equal()
self.mean = P.ReduceMean()
self.sum = P.ReduceSum()
self.total = Parameter(Tensor([0], mstype.float32), name='total')
self.acc = Parameter(Tensor([0], mstype.float32), name='acc')
self.total = Parameter(Tensor([0], mstype.float32))
self.acc = Parameter(Tensor([0], mstype.float32))
self.reshape = P.Reshape()
self.shape = P.Shape()
self.cast = P.Cast()

View File

@ -52,7 +52,7 @@ class CRF(nn.Cell):
transitions = np.random.normal(size=(self.target_size, self.target_size)).astype(np.float32)
transitions[tag_to_index[self.START_TAG], :] = -10000
transitions[:, tag_to_index[self.STOP_TAG]] = -10000
self.transitions = Parameter(Tensor(transitions), name="transition_matrix")
self.transitions = Parameter(Tensor(transitions))
self.cat = P.Concat(axis=-1)
self.argmax = P.ArgMaxWithValue(axis=-1)
self.log = P.Log()

View File

@ -90,8 +90,7 @@ class BertFinetuneCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
def construct(self,
input_ids,
@ -185,8 +184,8 @@ class BertSquadCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
def construct(self,
input_ids,
input_mask,
@ -306,9 +305,9 @@ class BertSquad(nn.Cell):
self.num_labels = num_labels
self.seq_length = config.seq_length
self.is_training = is_training
self.total_num = Parameter(Tensor([0], mstype.float32), name='total_num')
self.start_num = Parameter(Tensor([0], mstype.float32), name='start_num')
self.end_num = Parameter(Tensor([0], mstype.float32), name='end_num')
self.total_num = Parameter(Tensor([0], mstype.float32))
self.start_num = Parameter(Tensor([0], mstype.float32))
self.end_num = Parameter(Tensor([0], mstype.float32))
self.sum = P.ReduceSum()
self.equal = P.Equal()
self.argmax = P.ArgMaxWithValue(axis=1)

View File

@ -84,8 +84,7 @@ class GetMaskedLMOutput(nn.Cell):
self.output_bias = Parameter(
initializer(
'zero',
config.vocab_size),
name='output_bias')
config.vocab_size))
self.matmul = P.MatMul(transpose_b=True)
self.log_softmax = nn.LogSoftmax(axis=-1)
self.shape_flat_offsets = (-1, 1)
@ -359,8 +358,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,
@ -465,10 +463,10 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
self.enable_global_norm = enable_global_norm
self.one = Tensor(np.array([1]).astype(np.int32))
self.zero = Tensor(np.array([0]).astype(np.int32))
self.local_step = Parameter(initializer(0, [1], mstype.int32), name="local_step")
self.local_step = Parameter(initializer(0, [1], mstype.int32))
self.accu_grads = self.weights.clone(prefix="accu_grads", init='zeros')
self.accu_overflow = Parameter(initializer(0, [1], mstype.int32), name="accu_overflow")
self.loss = Parameter(initializer(0, [1], mstype.float32), name="accu_loss")
self.accu_overflow = Parameter(initializer(0, [1], mstype.int32))
self.accu_loss = Parameter(initializer(0, [1], mstype.float32))
self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.reducer_flag = False
@ -499,8 +497,7 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,
@ -529,8 +526,8 @@ class BertTrainAccumulateStepsWithLossScaleCell(nn.Cell):
# update accumulation parameters
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
self.local_step = self.select(is_accu_step, self.local_step + self.one, self.one)
self.loss = self.select(is_accu_step, self.loss + loss, loss)
mean_loss = self.loss / self.local_step
self.accu_loss = self.select(is_accu_step, self.accu_loss + loss, loss)
mean_loss = self.accu_loss / self.local_step
is_accu_step = self.not_equal(self.local_step, self.accumulation_steps)
# alloc status and clear should be right before gradoperation

View File

@ -110,8 +110,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[vocab_size, embedding_size]),
name='embedding_table')
[vocab_size, embedding_size]))
self.expand = P.ExpandDims()
self.shape_flat = (-1,)
self.gather = P.GatherV2()
@ -170,8 +169,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[token_type_vocab_size,
embedding_size]),
name='embedding_table')
embedding_size]))
self.shape_flat = (-1,)
self.one_hot = P.OneHot()
@ -188,8 +186,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.full_position_embeddings = Parameter(initializer
(TruncatedNormal(initializer_range),
[max_position_embeddings,
embedding_size]),
name='full_position_embeddings')
embedding_size]))
def construct(self, token_type_ids, word_embeddings):
"""Postprocessors apply positional and token type embeddings to word embeddings."""
@ -314,8 +311,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
self.embeddings_table = Parameter(
initializer(TruncatedNormal(initializer_range),
[self.vocab_size, self.depth]),
name='embeddings_for_position')
[self.vocab_size, self.depth]))
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
max_relative_position=max_relative_position)

View File

@ -86,8 +86,8 @@ class BertPretrainEva(nn.Cell):
self.equal = P.Equal()
self.mean = P.ReduceMean()
self.sum = P.ReduceSum()
self.total = Parameter(Tensor([0], mstype.float32), name='total')
self.acc = Parameter(Tensor([0], mstype.float32), name='acc')
self.total = Parameter(Tensor([0], mstype.float32))
self.acc = Parameter(Tensor([0], mstype.float32))
self.reshape = P.Reshape()
self.shape = P.Shape()
self.cast = P.Cast()

View File

@ -98,8 +98,7 @@ class GetMaskedLMOutput(nn.Cell):
self.output_bias = Parameter(
initializer(
'zero',
config.vocab_size),
name='output_bias')
config.vocab_size))
self.matmul = P.MatMul(transpose_b=True)
self.log_softmax = nn.LogSoftmax(axis=-1)
self.shape_flat_offsets = (-1, 1)
@ -379,8 +378,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,

View File

@ -136,8 +136,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[vocab_size, embedding_size]),
name='embedding_table')
[vocab_size, embedding_size]))
self.expand = P.ExpandDims()
self.shape_flat = (-1,)
self.gather = P.GatherV2()
@ -200,7 +199,6 @@ class EmbeddingPostprocessor(nn.Cell):
embedding_shape=embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=initializer_range,
name='embedding_table',
batch_size=batch_size,
damping=damping,
loss_scale=loss_scale,
@ -224,7 +222,6 @@ class EmbeddingPostprocessor(nn.Cell):
embedding_shape=position_embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=initializer_range,
name='full_position_embeddings',
batch_size=batch_size,
damping=damping,
loss_scale=loss_scale,
@ -363,8 +360,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
self.embeddings_table = Parameter(
initializer(TruncatedNormal(initializer_range),
[self.vocab_size, self.depth]),
name='embeddings_for_position')
[self.vocab_size, self.depth]))
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
max_relative_position=max_relative_position)
@ -944,7 +940,6 @@ class BertModel(nn.Cell):
embedding_shape=output_embedding_shape,
use_one_hot_embeddings=use_one_hot_embeddings,
initializer_range=config.initializer_range,
name='embedding_table',
batch_size=batch_size,
damping=damping,
loss_scale=loss_scale,

View File

@ -94,9 +94,9 @@ class FusedLayerNorm(Cell):
self.begin_norm_axis = begin_norm_axis
self.begin_params_axis = begin_params_axis
self.gamma = Parameter(initializer(
gamma_init, normalized_shape), name="gamma")
gamma_init, normalized_shape))
self.beta = Parameter(initializer(
beta_init, normalized_shape), name="beta")
beta_init, normalized_shape))
self.layer_norm = P.LayerNorm(begin_norm_axis=self.begin_norm_axis, begin_params_axis=self.begin_params_axis)
self.batch_norm = P.BatchNorm(is_training=True, epsilon=1e-5)

View File

@ -52,7 +52,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap()
@ -80,7 +80,7 @@ class THOR(Optimizer):
self.batch_size = batch_size
self.damping = damping
self.one = Tensor(1, mstype.int32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
def construct(self, gradients):
"""construct of THOR"""

View File

@ -54,7 +54,7 @@ class THOR(Optimizer):
super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
if isinstance(momentum, float) and momentum < 0.0:
raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
self.momentum = Parameter(Tensor(momentum, mstype.float32))
self.params = self.parameters
self.moments = self.params.clone(prefix="moments", init='zeros')
self.hyper_map = C.HyperMap()
@ -82,7 +82,7 @@ class THOR(Optimizer):
self.batch_size = batch_size
self.damping = damping
self.one = Tensor(1, mstype.int32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
mean = _get_gradients_mean()
degree = _get_device_num()
self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree)

View File

@ -41,7 +41,6 @@ class Embedding_Thor(Cell):
embedding_shape,
use_one_hot_embeddings=False,
initializer_range=0.02,
name='embedding_table',
batch_size=12,
damping=0.03,
loss_scale=1,
@ -52,8 +51,7 @@ class Embedding_Thor(Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[vocab_size, embedding_size]),
name=name)
[vocab_size, embedding_size]))
self.thor = True
self.expand = P.ExpandDims()
self.shape_flat = (-1,)
@ -67,14 +65,13 @@ class Embedding_Thor(Cell):
self.shape = P.Shape()
self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)),
name='matrix_A_inv', requires_grad=False)
self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)),
name="matrix_G_inv", requires_grad=False)
requires_grad=False)
self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16))
self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32))
self.dampingG = Tensor(np.identity(embedding_size), mstype.float32)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.freq = Tensor(frequency, mstype.int32)
self.axis = 0
self.damping = damping
@ -169,14 +166,14 @@ class Dense_Thor(Cell):
weight_init.shape()[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()
@ -184,9 +181,9 @@ class Dense_Thor(Cell):
self.activation = get_activation(activation)
self.activation_flag = self.activation is not None
self.matrix_A_inv = Parameter(Tensor(np.zeros([in_channels, in_channels]).astype(np.float16)),
name='matrix_A_inv', requires_grad=False)
requires_grad=False)
self.matrix_G_inv = Parameter(Tensor(np.zeros([out_channels, out_channels]).astype(np.float16)),
name="matrix_G_inv", requires_grad=False)
requires_grad=False)
self.fake_G = Tensor(np.zeros([out_channels, out_channels]).astype(np.float16))
self.matmul = P.MatMul(transpose_b=True)
@ -196,7 +193,7 @@ class Dense_Thor(Cell):
self.shape = P.Shape()
self.reshape = P.Reshape()
self.transpose = P.Transpose()
self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
self.mul = P.Mul()
self.cast = P.Cast()
self.damping = damping

View File

@ -57,11 +57,10 @@ class BahdanauAttention(nn.Cell):
self.normalize = normalize
self.num_units = num_units
self.linear_att = Parameter(Tensor(np.random.uniform(-initializer_range, initializer_range, size=[num_units]),
dtype=mstype.float32), name='linear_att')
dtype=mstype.float32))
if self.normalize:
self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32),
name='normalize_scalar')
self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32), name='normalize_bias')
self.normalize_scalar = Parameter(Tensor(np.array([1.0 / num_units]), dtype=mstype.float32))
self.normalize_bias = Parameter(Tensor(np.zeros(num_units), dtype=mstype.float32))
self.transpose = P.Transpose()
self.transpose_orders = (1, 0, 2)
self.shape_op = P.Shape()

View File

@ -49,10 +49,10 @@ class DynamicRNNCell(nn.Cell):
# w
dynamicRNN_w = np.random.uniform(-initializer_range, initializer_range,
size=[self.input_size + self.hidden_size, 4 * self.hidden_size])
self.dynamicRNN_w = Parameter(Tensor(dynamicRNN_w, mstype.float32), name='weight')
self.dynamicRNN_w = Parameter(Tensor(dynamicRNN_w, mstype.float32))
# b
dynamicRNN_b = np.random.uniform(-initializer_range, initializer_range, size=[4 * self.hidden_size])
self.dynamicRNN_b = Parameter(Tensor(dynamicRNN_b, mstype.float32), name='bias')
self.dynamicRNN_b = Parameter(Tensor(dynamicRNN_b, mstype.float32))
self.dynamicRNN_h = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)
self.dynamicRNN_c = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)

View File

@ -48,8 +48,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
init_weight = np.random.normal(-initializer_range, initializer_range, size=[vocab_size, embed_dim])
self.embedding_table = Parameter(Tensor(init_weight, mstype.float32),
name='embedding_table')
self.embedding_table = Parameter(Tensor(init_weight, mstype.float32))
self.expand = P.ExpandDims()
self.gather = P.GatherV2()
self.one_hot = P.OneHot()

View File

@ -253,8 +253,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
self.add_flags(has_effect=True)
self.loss_scalar = P.ScalarSummary()

View File

@ -217,8 +217,8 @@ class Adam(Optimizer):
self.beta1 = Tensor(beta1, mstype.float32)
self.beta2 = Tensor(beta2, mstype.float32)
self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power")
self.beta1_power = Parameter(initializer(1, [1], mstype.float32))
self.beta2_power = Parameter(initializer(1, [1], mstype.float32))
self.eps = eps
self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
@ -377,7 +377,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
_check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
_check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name)
# turn them to scalar when me support scalar/tensor mix operations
self.global_step = Parameter(initializer(0, [1]), name="global_step")
self.global_step = Parameter(initializer(0, [1]))
self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32))
self.warmup_flag = False
if warmup_steps > 0:

View File

@ -41,8 +41,8 @@ class LayerNorm(nn.Cell):
"""
def __init__(self, normalized_shape, eps=1e-5):
super(LayerNorm, self).__init__()
self.gamma = Parameter(initializer('ones', normalized_shape), name="gamma")
self.beta = Parameter(initializer('zeros', normalized_shape), name="beta")
self.gamma = Parameter(initializer('ones', normalized_shape))
self.beta = Parameter(initializer('zeros', normalized_shape))
self.mean = P.ReduceMean(keep_dims=True)
self.eps = eps
@ -100,8 +100,8 @@ class Mapping(nn.Cell):
super(Mapping, self).__init__()
self.output_size = output_size
self.input_size = input_size
self.weight = Parameter(initializer(Normal(sigma=0.02*scale), [input_size, output_size]), name="mapping_weight")
self.bias = Parameter(initializer("zeros", [output_size,]), name="mapping_bias")
self.weight = Parameter(initializer(Normal(sigma=0.02*scale), [input_size, output_size]))
self.bias = Parameter(initializer("zeros", [output_size,]))
self.dtype = dtype
self.cast = P.Cast()
@ -194,8 +194,7 @@ class EmbeddingLookup(nn.Cell):
super(EmbeddingLookup, self).__init__()
self.vocab_size = config.vocab_size
self.embedding_size = config.embedding_size
self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]),
name="embedding_table")
self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]))
self.gather = P.GatherV2()
self.shape = (-1, config.seq_length, config.embedding_size)
def construct(self, input_ids):

View File

@ -106,8 +106,7 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,

View File

@ -44,8 +44,7 @@ class EmbeddingLookup(nn.Cell):
init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
# 0 is Padding index, thus init it as 0.
init_weight[0, :] = 0
self.embedding_table = Parameter(Tensor(init_weight),
name='embedding_table')
self.embedding_table = Parameter(Tensor(init_weight))
self.expand = P.ExpandDims()
self.gather = P.GatherV2()
self.one_hot = P.OneHot()

View File

@ -277,8 +277,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
self.add_flags(has_effect=True)
def construct(self,

View File

@ -44,8 +44,7 @@ class EmbeddingLookup(nn.Cell):
init_weight = np.random.normal(0, embed_dim ** -0.5, size=[vocab_size, embed_dim]).astype(np.float32)
# 0 is Padding index, thus init it as 0.
init_weight[0, :] = 0
self.embedding_table = Parameter(Tensor(init_weight),
name='embedding_table')
self.embedding_table = Parameter(Tensor(init_weight))
self.expand = P.ExpandDims()
self.gather = P.GatherV2()
self.one_hot = P.OneHot()

View File

@ -243,8 +243,7 @@ class BertTrainWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,
@ -497,8 +496,7 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,

View File

@ -110,8 +110,7 @@ class EmbeddingLookup(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[vocab_size, embedding_size]),
name='embedding_table')
[vocab_size, embedding_size]))
self.expand = P.ExpandDims()
self.shape_flat = (-1,)
self.gather = P.GatherV2()
@ -170,8 +169,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.embedding_table = Parameter(initializer
(TruncatedNormal(initializer_range),
[token_type_vocab_size,
embedding_size]),
name='embedding_table')
embedding_size]))
self.shape_flat = (-1,)
self.one_hot = P.OneHot()
self.on_value = Tensor(1.0, mstype.float32)
@ -187,8 +185,7 @@ class EmbeddingPostprocessor(nn.Cell):
self.full_position_embeddings = Parameter(initializer
(TruncatedNormal(initializer_range),
[max_position_embeddings,
embedding_size]),
name='full_position_embeddings')
embedding_size]))
def construct(self, token_type_ids, word_embeddings):
"""embedding postprocessor"""
@ -317,8 +314,7 @@ class RelaPosEmbeddingsGenerator(nn.Cell):
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embeddings_table = Parameter(
initializer(TruncatedNormal(initializer_range),
[self.vocab_size, self.depth]),
name='embeddings_for_position')
[self.vocab_size, self.depth]))
self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
max_relative_position=max_relative_position)
self.reshape = P.Reshape()

View File

@ -291,8 +291,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
@C.add_flags(has_effect=True)
def construct(self,

View File

@ -115,8 +115,7 @@ class EmbeddingLookup(nn.Cell):
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.use_one_hot_embeddings = use_one_hot_embeddings
self.embedding_table = Parameter(normal_weight([vocab_size, embedding_size], embedding_size),
name='embedding_table')
self.embedding_table = Parameter(normal_weight([vocab_size, embedding_size], embedding_size))
self.expand = P.ExpandDims()
self.shape_flat = (-1,)
self.gather = P.GatherV2()

View File

@ -47,14 +47,14 @@ class DenseLayer(nn.Cell):
weight_init.shape()[1] != in_channels:
raise ValueError("weight_init shape error")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
if self.has_bias:
if isinstance(bias_init, Tensor):
if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
raise ValueError("bias_init shape error")
self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
self.bias = Parameter(initializer(bias_init, [out_channels]))
self.matmul = P.MatMul(transpose_b=True)
self.bias_add = P.BiasAdd()

View File

@ -35,10 +35,10 @@ class QuanConv(nn.Conv2d):
self.x_upper_bound = Tensor(2 ** 8 - 1, ms.float32)
self.w_lower_bound = Tensor(-2 ** 7 - 1, ms.float32)
self.w_upper_bound = Tensor(2 ** 7, ms.float32)
self.scale_a = Parameter(initializer('ones', [1]), name='scale_a')
self.scale_a = Parameter(initializer('ones', [1]))
self.scale_w = Parameter(initializer(
'ones', [out_channels]), name='scale_w')
self.zp_a = Parameter(initializer('ones', [1]), name='zp_a')
'ones', [out_channels]))
self.zp_a = Parameter(initializer('ones', [1]))
def construct(self, in_data):
r"""construct of QuantConv"""

View File

@ -119,12 +119,12 @@ class DepthwiseConv(nn.Cell):
self.bias_add = P.BiasAdd()
weight_shape = [channel_multiplier, in_planes, *self.kernel_size]
self.weight = Parameter(initializer(
'ones', weight_shape), name='weight')
'ones', weight_shape))
if has_bias:
bias_shape = [channel_multiplier * in_planes]
self.bias = Parameter(initializer(
'zeros', bias_shape), name='bias')
'zeros', bias_shape))
else:
self.bias = None

View File

@ -499,7 +499,7 @@ class DepthWiseConv(nn.Cell):
group=in_planes)
self.weight = Parameter(initializer(weight_init,
[in_planes*1, 1, kernel_size, kernel_size]), name='depthwise_weight')
[in_planes*1, 1, kernel_size, kernel_size]))
else:
self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=1,
@ -508,7 +508,7 @@ class DepthWiseConv(nn.Cell):
pad=int(kernel_size/2))
self.weight = Parameter(initializer(weight_init,
[1, in_planes, kernel_size, kernel_size]), name='depthwise_weight')
[1, in_planes, kernel_size, kernel_size]))
def construct(self, x):
x = self.depthwise_conv(x, self.weight)

View File

@ -31,11 +31,11 @@ class DepthWiseConv(nn.Cell):
self.bias_add = P.BiasAdd()
weight_shape = [channel_multiplier, in_planes, kernel_size[0], kernel_size[1]]
self.weight = Parameter(initializer('ones', weight_shape), name='weight')
self.weight = Parameter(initializer('ones', weight_shape))
if has_bias:
bias_shape = [channel_multiplier * in_planes]
self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
self.bias = Parameter(initializer('zeros', bias_shape))
else:
self.bias = None