From b812b18c028df16b6ff08e456bb435cf50e10442 Mon Sep 17 00:00:00 2001 From: "wangnan39@huawei.com" Date: Fri, 17 Apr 2020 12:03:50 +0800 Subject: [PATCH] support update parameter for vm --- mindspore/common/parameter.py | 16 ++-- mindspore/nn/optim/adam.py | 12 --- mindspore/nn/optim/ftrl.py | 2 +- mindspore/nn/optim/lars.py | 17 ----- mindspore/nn/optim/momentum.py | 20 +---- mindspore/nn/optim/optimizer.py | 2 + mindspore/nn/optim/rmsprop.py | 21 +----- mindspore/nn/optim/sgd.py | 20 +---- mindspore/nn/wrap/cell_wrapper.py | 30 +------- mindspore/train/serialization.py | 11 ++- tests/ut/python/nn/test_cell_wrapper.py | 4 - tests/ut/python/nn/test_parameter.py | 74 ------------------- tests/ut/python/ops/test_momentum.py | 2 +- .../python/pynative_mode/test_cell_bprop.py | 2 +- tests/vm_impl/nn_ops_vm_impl.py | 4 +- 15 files changed, 34 insertions(+), 203 deletions(-) diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index c354bcd2352..5f56d23956e 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -15,7 +15,6 @@ """Parameter for cell.""" from copy import copy, deepcopy -import numpy as np from .initializer import initializer from .tensor import Tensor from .._checkparam import _check_str_by_regular @@ -176,14 +175,15 @@ class Parameter: return res def set_parameter_data(self, data): - if isinstance(data, (Tensor, list, int, float, - np.float16, np.float32, np.int32, np.int16, np.ndarray)) and not isinstance(data, bool): - if isinstance(data, Tensor): - # make a copy of Tensor to init the parameter - data = Tensor(data.asnumpy().copy()) - self.default_input = data + """Set `default_input` of current `Parameter`.""" + if isinstance(data, bool): + raise ValueError('Parameter data can not be `bool`') + if isinstance(data, Tensor): + # make a copy of Tensor to init the parameter + data = Tensor(data.asnumpy().copy()) else: - raise ValueError("Parameter data must be tensor or number.") + data = Tensor(data) + self.default_input = data class ParameterTuple(tuple): diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py index 65f8ec678be..4e88c3ef93e 100755 --- a/mindspore/nn/optim/adam.py +++ b/mindspore/nn/optim/adam.py @@ -101,17 +101,6 @@ def _run_opt_with_one_number(opt, lr, beta1_power, beta2_power, beta1, beta2, ep return success -@adam_opt.register("Function", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", "Tensor", - "Tensor") -def _run_opt_with_two_number(opt, lr, beta1_power, beta2_power, beta1, beta2, eps, gradient, params, moment1, - moment2): - """Apply adam optimizer to the weight parameter using Tensor.""" - success = True - success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2, - eps, gradient)) - return success - - class Adam(Optimizer): r""" Updates gradients by Adaptive Moment Estimation (Adam) algorithm. @@ -183,7 +172,6 @@ class Adam(Optimizer): self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') - self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py index d08dd6cf4c9..2bc329f42d8 100644 --- a/mindspore/nn/optim/ftrl.py +++ b/mindspore/nn/optim/ftrl.py @@ -23,7 +23,7 @@ from mindspore._checkparam import Rel from .optimizer import Optimizer, apply_decay, grad_scale ftrl_opt = C.MultitypeFuncGraph("ftrl_opt") -@ftrl_opt.register("Function", "Number", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") +@ftrl_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment): """Apply ftrl optimizer to the weight parameter.""" success = True diff --git a/mindspore/nn/optim/lars.py b/mindspore/nn/optim/lars.py index 02538aa61a6..73451f3bf5d 100755 --- a/mindspore/nn/optim/lars.py +++ b/mindspore/nn/optim/lars.py @@ -43,23 +43,6 @@ def _tensor_run_opt(lars, weight_decay, learning_rate, gradient, weight, decay_f return gradient -@lars_opt.register("Function", "Number", "Number", "Tensor", "Tensor", "Bool", "Bool") -def _tensor_run_opt_v2(lars, weight_decay, learning_rate, gradient, weight, decay_flag, lars_flag): - """Apply lars optimizer to the weight parameter.""" - if lars_flag: - op_reduce = P.ReduceSum() - w_square_sum = op_reduce(F.square(weight)) - grad_square_sum = op_reduce(F.square(gradient)) - if decay_flag: - grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, weight_decay, learning_rate) - else: - num_zero = 0.0 - grad_t = lars(weight, gradient, w_square_sum, grad_square_sum, num_zero, learning_rate) - return grad_t - - return gradient - - class LARS(Optimizer): """ Implements the LARS algorithm with LARSUpdate Operator. diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index bac8e74a42c..c69e226df99 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -15,19 +15,13 @@ """momentum""" from mindspore.ops import functional as F, composite as C, operations as P from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +import mindspore.common.dtype as mstype from .optimizer import Optimizer momentum_opt = C.MultitypeFuncGraph("momentum_opt") -@momentum_opt.register("Function", "Number", "Number", "Tensor", "Tensor", "Tensor") -def _tensor_run_opt(opt, learning_rate, momentum, gradient, weight, moment): - """Apply momentum optimizer to the weight parameter.""" - success = True - success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum)) - return success - - @momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, moment): """Apply momentum optimizer to the weight parameter using Tensor.""" @@ -36,14 +30,6 @@ def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, moment): return success -@momentum_opt.register("Function", "Tensor", "Number", "Tensor", "Tensor", "Tensor") -def _tensor_run_opt_dyn(opt, learning_rate, momentum, gradient, weight, moment): - """Apply momentum optimizer to the weight parameter using dynamic learning rate.""" - success = True - success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum)) - return success - - class Momentum(Optimizer): """ Implements the Momentum algorithm. @@ -86,7 +72,7 @@ class Momentum(Optimizer): super(Momentum, self).__init__(learning_rate, params, weight_decay, loss_scale, decay_filter) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) - self.momentum = Parameter(momentum, name="momentum") + self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py index 5738044532c..8a7c65e5b23 100755 --- a/mindspore/nn/optim/optimizer.py +++ b/mindspore/nn/optim/optimizer.py @@ -22,6 +22,7 @@ from mindspore.ops import functional as F, composite as C, operations as P from mindspore.nn.cell import Cell from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.common.initializer import initializer +import mindspore.common.dtype as mstype from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from mindspore.common.tensor import Tensor @@ -64,6 +65,7 @@ class Optimizer(Cell): self.assignadd = None self.global_step = None validator.check_number_range("learning rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name) + learning_rate = Tensor(learning_rate, mstype.float32) else: self.dynamic_lr = True self.gather = P.GatherV2() diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py index 97d7538a26d..a8f118b7099 100644 --- a/mindspore/nn/optim/rmsprop.py +++ b/mindspore/nn/optim/rmsprop.py @@ -21,34 +21,17 @@ rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") centered_rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt") -@rmsprop_opt.register("Function", "Number", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") -def _rmsprop_opt(opt, learning_rate, decay, epsilon, momentum, weight, ms, mom, grad): - """Apply rmsprop optimizer to the weight parameter.""" - success = True - success = F.depend(success, opt(weight, ms, mom, grad, learning_rate, decay, momentum, epsilon)) - return success - - @rmsprop_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") -def _rmsprop_opt_dynamic_lr(opt, learning_rate, decay, epsilon, momentum, weight, ms, mom, grad): +def _rmsprop_opt(opt, learning_rate, decay, epsilon, momentum, weight, ms, mom, grad): """Apply rmsprop optimizer to the weight parameter using dynamic learning rate.""" success = True success = F.depend(success, opt(weight, ms, mom, grad, learning_rate, decay, momentum, epsilon)) return success -@centered_rmsprop_opt.register("Function", "Number", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", - "Tensor", "Tensor") -def _centered_rmsprop_opt(opt, learning_rate, decay, epsilon, momentum, weight, mg, ms, mom, grad): - """Apply centered rmsprop optimizer to the weight parameter.""" - success = True - success = F.depend(success, opt(weight, mg, ms, mom, grad, learning_rate, decay, momentum, epsilon)) - return success - - @centered_rmsprop_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") -def _centered_rmsprop_opt_dynamic_lr(opt, learning_rate, decay, epsilon, momentum, weight, mg, ms, mom, grad): +def _centered_rmsprop_opt(opt, learning_rate, decay, epsilon, momentum, weight, mg, ms, mom, grad): """Apply centered rmsprop optimizer to the weight parameter using dynamic learning rate.""" success = True success = F.depend(success, opt(weight, mg, ms, mom, grad, learning_rate, decay, momentum, epsilon)) diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py index db0775e023e..cda5aa904a9 100755 --- a/mindspore/nn/optim/sgd.py +++ b/mindspore/nn/optim/sgd.py @@ -15,20 +15,14 @@ """sgd""" from mindspore.ops import functional as F, composite as C, operations as P from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +import mindspore.common.dtype as mstype from mindspore._checkparam import Validator as validator from .optimizer import Optimizer sgd_opt = C.MultitypeFuncGraph("sgd_opt") -@sgd_opt.register("Function", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor") -def _tensor_run_opt(opt, learning_rate, momentum, gradient, weight, accum, stat): - """Apply sgd optimizer to the weight parameter.""" - success = True - success = F.depend(success, opt(weight, gradient, learning_rate, accum, momentum, stat)) - return success - - @sgd_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, accum, stat): """Apply sgd optimizer to the weight parameter using Tensor.""" @@ -37,14 +31,6 @@ def _tensor_run_opt_ext(opt, learning_rate, momentum, gradient, weight, accum, s return success -@sgd_opt.register("Function", "Tensor", "Number", "Tensor", "Tensor", "Tensor", "Tensor") -def _tensor_run_opt_dyn(opt, learning_rate, momentum, gradient, weight, accum, stat): - """Apply sgd optimizer to the weight parameter using dynamic learning rate.""" - success = True - success = F.depend(success, opt(weight, gradient, learning_rate, accum, momentum, stat)) - return success - - class SGD(Optimizer): """ Implements stochastic gradient descent (optionally with momentum). @@ -105,7 +91,7 @@ class SGD(Optimizer): self.opt = P.SGD(dampening, weight_decay, nesterov) - self.momentum = Parameter(momentum, name="momentum") + self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.accum = self.parameters.clone(prefix="accum", init='zeros') self.stat = self.parameters.clone(prefix="stat", init='ones') self.hyper_map = C.HyperMap() diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py index 64c382557ae..6c88b7d9574 100644 --- a/mindspore/nn/wrap/cell_wrapper.py +++ b/mindspore/nn/wrap/cell_wrapper.py @@ -13,17 +13,10 @@ # limitations under the License. # ============================================================================ """Cell_wrapper.""" -import copy - -import numpy as np - from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean, _get_parallel_mode) from mindspore.train.parallel_utils import ParallelMode - -from ...common import Tensor from ...common import dtype as mstype -from ...common.initializer import initializer from ...common.parameter import Parameter, ParameterTuple from ...ops import composite as C from ...ops import functional as F @@ -348,25 +341,8 @@ class ParameterUpdate(Cell): super(ParameterUpdate, self).__init__(auto_prefix=False) if not isinstance(param, Parameter): raise TypeError("`param` must be `Parameter`, but got {}".format(param)) - - default_input = param.default_input - if isinstance(default_input, Tensor): - shape = default_input.shape() - zero_dtype = default_input.dtype() - elif isinstance(default_input, float): - shape = [1] - zero_dtype = mstype.float32 - elif isinstance(default_input, int): - shape = [1] - zero_dtype = mstype.int32 - else: - raise TypeError("`default_input` in `param` must be Tensor, float or int, but got {}".format(default_input)) - - self._param = Parameter(initializer(copy.deepcopy(default_input), shape), param.name) - self._param.is_init = True - self._zero = Tensor(np.zeros(shape), zero_dtype) + self._param = param def construct(self, x): - zero = self._param + self._zero - F.control_depend(zero, F.assign(self._param, x)) - return zero + self._param = x + return x diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index ae17bf81165..e933d406665 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -36,7 +36,6 @@ tensor_to_ms_type = {"Int8": mstype.int8, "Int16": mstype.int16, "Int32": mstype tensor_to_np_type = {"Int8": np.int8, "Int16": np.int16, "Int32": np.int32, "Int64": np.int64, "Float16": np.float16, "Float32": np.float32, "Float64": np.float64} - def _special_process_par(par, new_par): """ Processes the special condition. @@ -182,8 +181,14 @@ def load_checkpoint(ckpoint_file_name, net=None): param_data = np.fromstring(data, np_type) dims = element.tensor.dims - if dims in [[0], [1]]: - parameter_dict[element.tag] = Parameter(param_data[0], name=element.tag) + if dims == [0]: + if 'Float' in data_type: + param_data = float(param_data[0]) + elif 'Int' in data_type: + param_data = int(param_data[0]) + parameter_dict[element.tag] = Parameter(Tensor(param_data, ms_type), name=element.tag) + elif dims == [1]: + parameter_dict[element.tag] = Parameter(Tensor(param_data, ms_type), name=element.tag) else: param_dim = [] for dim in dims: diff --git a/tests/ut/python/nn/test_cell_wrapper.py b/tests/ut/python/nn/test_cell_wrapper.py index 3e163c9e4fb..148d42ab64b 100755 --- a/tests/ut/python/nn/test_cell_wrapper.py +++ b/tests/ut/python/nn/test_cell_wrapper.py @@ -94,10 +94,6 @@ def test_parameter_update_float32(): def test_parameter_update_error(): """ test_parameter_update """ input_np = np.array([1]) - input_parameter = Parameter(np.array([1]), 'input_parameter') with pytest.raises(TypeError): ParameterUpdate(input_np) - - with pytest.raises(TypeError): - ParameterUpdate(input_parameter) diff --git a/tests/ut/python/nn/test_parameter.py b/tests/ut/python/nn/test_parameter.py index 49e89e124e3..529af532f7a 100644 --- a/tests/ut/python/nn/test_parameter.py +++ b/tests/ut/python/nn/test_parameter.py @@ -52,86 +52,12 @@ def test_parameter_tuple_illegal(): def test_parameter_init_illegal(): - import numpy as np - dat = np.array([[1, 2, 3], [2, 3, 4]]) - tensor = Tensor(dat) - data_none = None data_bool = True data_str = "nicai" - data_int = 3 - data_list = [1, "2", True] - data_tuple = (1, 2, 3) - np_arr_int16 = np.ones([1,1], dtype=np.int16) - np_arr_int32 = np.ones([1,1], dtype=np.int32) - np_arr_float16 = np.ones([1,1], dtype=np.float16) - np_arr_float32 = np.ones([1,1], dtype=np.float32) - -# with pytest.raises(ValueError): -# Parameter(np_arr_int16[0][0], name=data_str) - Parameter(np_arr_int32[0], name=data_str) - Parameter(np_arr_float16[0], name=data_str) - Parameter(np_arr_float32[0], name=data_str) - Parameter(np_arr_float32, name=data_str) - - Parameter(tensor, name=data_str) - Parameter(data_int, name=data_str) - Parameter(dat, name=data_str) - with pytest.raises(ValueError): - Parameter(data_none, name=data_str) with pytest.raises(ValueError): Parameter(data_bool, name=data_str) - with pytest.raises(ValueError): - Parameter(data_str, name=data_str) - Parameter(data_list, name=data_str) - with pytest.raises(ValueError): - Parameter(data_tuple, name=data_str) - Parameter(tensor, name=data_str) - Parameter(tensor, name=data_none) - with pytest.raises(ValueError): - Parameter(tensor, name=dat) - with pytest.raises(ValueError): - Parameter(tensor, name=tensor) - with pytest.raises(ValueError): - Parameter(tensor, name=data_bool) - with pytest.raises(ValueError): - Parameter(tensor, name=data_int) - with pytest.raises(ValueError): - Parameter(tensor, name=data_list) - with pytest.raises(ValueError): - Parameter(tensor, name=data_tuple) - Parameter(tensor, name=data_str, requires_grad=data_bool) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_none) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=dat) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=tensor) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_str) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_int) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_list) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_tuple) - - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_bool) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=dat) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=tensor) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_none) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_str) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_int) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_list) - with pytest.raises(TypeError): - Parameter(tensor, name=data_str, requires_grad=data_bool,layerwise_parallel=data_tuple) def test_check_str_by_regular(): diff --git a/tests/ut/python/ops/test_momentum.py b/tests/ut/python/ops/test_momentum.py index 64b5a9af123..3334f1670a1 100644 --- a/tests/ut/python/ops/test_momentum.py +++ b/tests/ut/python/ops/test_momentum.py @@ -31,7 +31,7 @@ from ....mindspore_test_framework.pipeline.forward.compile_forward \ run_opt = C.MultitypeFuncGraph("run_opt") -@run_opt.register("Function", "Int", "Number", "Number", +@run_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor") def tensor_run_opt(opt, iters, learning_rate, momentum, diff --git a/tests/ut/python/pynative_mode/test_cell_bprop.py b/tests/ut/python/pynative_mode/test_cell_bprop.py index da1e14974fc..c69b80412e5 100644 --- a/tests/ut/python/pynative_mode/test_cell_bprop.py +++ b/tests/ut/python/pynative_mode/test_cell_bprop.py @@ -51,7 +51,7 @@ class InlineMulADD(nn.Cell): def __init__(self): super(InlineMulADD, self).__init__() self.mul_add = MulAdd() - self.param = Parameter(2, 'param') + self.param = 2 def construct(self, x, y): return self.mul_add(x, y) + x + self.param * y diff --git a/tests/vm_impl/nn_ops_vm_impl.py b/tests/vm_impl/nn_ops_vm_impl.py index 8794acbbd2c..0df4b5fbaa6 100644 --- a/tests/vm_impl/nn_ops_vm_impl.py +++ b/tests/vm_impl/nn_ops_vm_impl.py @@ -377,8 +377,8 @@ def vm_impl_momentum(self): accumulation = accumulation.asnumpy() variable = variable.asnumpy() shape = accumulation.shape - learning_rate = np.full(shape, learning_rate) - momentum = np.full(shape, momentum) + learning_rate = np.full(shape, learning_rate.asnumpy()) + momentum = np.full(shape, momentum.asnumpy()) accumulation = accumulation * momentum + gradient if use_nesterov is True: variable -= gradient * learning_rate + accumulation * momentum * learning_rate