change order param same as group params

This commit is contained in:
guohongzilong 2020-06-30 13:07:31 +08:00
parent 71fd4321c6
commit 652093642e
7 changed files with 45 additions and 96 deletions

View File

@ -181,8 +181,7 @@ class Adam(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
@ -220,16 +219,14 @@ class Adam(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.Adam(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)

View File

@ -109,6 +109,10 @@ class LazyAdam(Optimizer):
- weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
will be used. If not, the `weight_decay` in the API will be used.
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
use dynamic learning rate, then the i-th step will
@ -146,12 +150,13 @@ class LazyAdam(Optimizer):
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01},
>>> {'params': no_conv_params}]
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.LazyAdam(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # the conv_params's parameters will use a learning rate of 0.01 and a weight decay of 0.01
>>> # the no_cov_params's parameters don't set learning and weight decay. So they will use a
>>> # learning rate of 0.1 and a weight decay of 0.0.
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)

View File

@ -64,8 +64,7 @@ class Momentum(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
@ -97,16 +96,14 @@ class Momentum(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.Momentum(group_params, learning_rate=0.1, momentum=0.9, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)

View File

@ -77,8 +77,7 @@ class Optimizer(Cell):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0.
If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0.
@ -351,16 +350,18 @@ class Optimizer(Cell):
self.group_weight_decay.append(weight_decay_)
if self.is_group_params_ordered:
self._order_and_adjust_group_params(ordered_parameters, learning_rate, weight_decay)
self._order_and_adjust_group_params(ordered_parameters)
def _order_and_adjust_group_params(self, ordered_parameters, learning_rate, weight_decay):
def _order_and_adjust_group_params(self, ordered_parameters):
"""
Order group parameter, learning rate and weight decay in group params. And assign the parameters
which in the value of 'order_params' but not in any group to default value.
Order group parameter, learning rate and weight decay in group params.
"""
params_length = len(ordered_parameters)
ordered_learning_rate = [Parameter(learning_rate, name="lr_" + param.name) for param in ordered_parameters]
ordered_weight_decay = [weight_decay * self.loss_scale] * params_length
params_length = len(self.group_params)
if len(ordered_parameters) != len(self.group_params):
raise ValueError(f"The value of 'order_params' should be same with all group parameters.")
ordered_learning_rate = [None] * params_length
ordered_weight_decay = [None] * params_length
params_name = [param.name for param in ordered_parameters]
for param, lr, wd in zip(self.group_params, self.group_lr, self.group_weight_decay):

View File

@ -107,8 +107,7 @@ class RMSProp(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
@ -140,16 +139,14 @@ class RMSProp(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.RMSProp(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)

View File

@ -64,8 +64,7 @@ class SGD(Optimizer):
- order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
in the value of 'order_params' but not in any group will use default learning rate and default weight
decay.
in the value of 'order_params' should be in one of group parameters.
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
@ -98,16 +97,14 @@ class SGD(Optimizer):
>>>
>>> #2) Use parameter groups and set different values
>>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
>>> bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
>>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
>>> group_params = [{'params': conv_params, 'weight_decay': 0.01},
>>> {'params': bias_params, 'lr': 0.01},
>>> {'params': no_conv_params, 'lr': 0.01},
>>> {'order_params': net.trainable_params()}]
>>> opt = nn.SGD(group_params, learning_rate=0.1, weight_decay=0.0)
>>> # The conv_params's parameters will use a learning rate of default value 0.1 and a weight decay of 0.01.
>>> # The bias_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The no_conv_params's parameters will use a learning rate of 0.01 and a weight decay of default value 0.0.
>>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
>>> # The parameters which in the value of 'order_params' but not in any group will use a learning rate
>>> # of default value 0.1 and a weight decay of default value 0.0.
>>>
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> model = Model(net, loss_fn=loss, optimizer=optim)

View File

@ -250,8 +250,9 @@ def test_get_lr_parameter_with_order_group():
net = LeNet5()
conv_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'lr': conv_lr},
{'order_params': net.trainable_params()}]
{'params': no_conv_params}]
opt = SGD(group_params)
assert opt.is_group_lr is True
for param in opt.parameters:
@ -278,65 +279,19 @@ def test_get_lr_parameter_with_no_group():
opt.get_lr_parameter(params_error)
def test_order_params_lr():
net = LeNet5()
conv_lr = 0.01
default_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'lr': conv_lr},
{'order_params': net.trainable_params()}]
opt = SGD(group_params, learning_rate=default_lr)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(conv_lr, mstype.float32).asnumpy())
else:
assert np.all(lr.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())
assert param.name == order_param.name
assert lr.name == 'lr_' + param.name
def test_order_params_weight_decay():
net = LeNet5()
conv_weight_decay = 0.01
default_wd = 0.0
default_lr = 0.1
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay},
{'order_params': net.trainable_params()}]
opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
assert opt.is_group is True
assert opt.is_group_lr is False
assert opt.is_group_params_ordered is True
assert opt.learning_rate.name == "learning_rate"
assert np.all(opt.learning_rate.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())
for weight_decay, decay_flags, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()):
if param in conv_params:
assert weight_decay == conv_weight_decay
assert decay_flags is True
else:
assert weight_decay == default_wd
assert decay_flags is False
assert param.name == order_param.name
def test_order_params_all_1():
def test_order_params_1():
net = LeNet5()
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
group_params = [{'params': conv_params, 'weight_decay': 0.01},
{'params': bias_params, 'lr': 0.01},
{'order_params': net.trainable_params()}]
{'order_params': bias_params+conv_params}]
opt = SGD(group_params, learning_rate=0.1, weight_decay=0.0)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for weight_decay, decay_flags, lr, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()):
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, bias_params+conv_params):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy())
assert weight_decay == 0.01
@ -354,7 +309,7 @@ def test_order_params_all_1():
assert lr.name == 'lr_' + param.name
def test_order_params_all_2():
def test_order_params_2():
net = LeNet5()
conv_weight_decay = 0.01
fc1_lr = (0.5, 0.4, 0.3)
@ -364,13 +319,13 @@ def test_order_params_all_2():
fc1_params = list(filter(lambda x: 'fc1' in x.name, net.trainable_params()))
group_params = [{'params': fc1_params, 'lr': fc1_lr},
{'params': conv_params, 'weight_decay': conv_weight_decay},
{'order_params': net.trainable_params()}]
{'order_params': fc1_params+conv_params}]
opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
assert opt.is_group is True
assert opt.is_group_lr is True
assert opt.is_group_params_ordered is True
for weight_decay, decay_flags, lr, param, order_param in zip(
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()):
opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, fc1_params+conv_params):
if param in conv_params:
assert np.all(lr.data.asnumpy() == Tensor(np.array([default_lr] * 3), mstype.float32).asnumpy())
assert weight_decay == conv_weight_decay
@ -388,7 +343,7 @@ def test_order_params_all_2():
assert lr.name == 'lr_' + param.name
def test_get_order_params_with_not_include():
def test_get_order_params_with_not_same():
net = LeNet5()
conv_weight_decay = 0.8