forked from mindspore-Ecosystem/mindspore
modify lr epoch
This commit is contained in:
parent
5708bae7e7
commit
dc056b0cd0
|
@ -51,12 +51,4 @@ alexnet_imagenet_cfg = edict({
|
|||
|
||||
# lr
|
||||
'is_dynamic_loss_scale': 0,
|
||||
'label_smooth': 1,
|
||||
'label_smooth_factor': 0.1,
|
||||
'lr_scheduler': 'cosine_annealing',
|
||||
'warmup_epochs': 5,
|
||||
'lr_epochs': [30, 60, 90, 120],
|
||||
'lr_gamma': 0.1,
|
||||
'T_max': 150,
|
||||
'eta_min': 0.0,
|
||||
})
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
import math
|
||||
from collections import Counter
|
||||
import numpy as np
|
||||
|
||||
def get_lr_cifar10(current_step, lr_max, total_epochs, steps_per_epoch):
|
||||
|
@ -44,71 +43,18 @@ def get_lr_cifar10(current_step, lr_max, total_epochs, steps_per_epoch):
|
|||
|
||||
return learning_rate
|
||||
|
||||
def get_lr_imagenet(cfg, steps_per_epoch):
|
||||
def get_lr_imagenet(lr, epoch_size, steps_per_epoch):
|
||||
"""generate learning rate array"""
|
||||
if cfg.lr_scheduler == 'exponential':
|
||||
lr = warmup_step_lr(cfg.learning_rate,
|
||||
cfg.lr_epochs,
|
||||
steps_per_epoch,
|
||||
cfg.warmup_epochs,
|
||||
cfg.epoch_size,
|
||||
gamma=cfg.lr_gamma,
|
||||
)
|
||||
elif cfg.lr_scheduler == 'cosine_annealing':
|
||||
lr = warmup_cosine_annealing_lr(cfg.learning_rate,
|
||||
steps_per_epoch,
|
||||
cfg.warmup_epochs,
|
||||
cfg.epoch_size,
|
||||
cfg.T_max,
|
||||
cfg.eta_min)
|
||||
else:
|
||||
raise NotImplementedError(cfg.lr_scheduler)
|
||||
|
||||
lr = warmup_cosine_annealing_lr(lr, epoch_size, steps_per_epoch)
|
||||
return lr
|
||||
|
||||
|
||||
|
||||
def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
|
||||
"""Linear learning rate"""
|
||||
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
|
||||
lr = float(init_lr) + lr_inc * current_step
|
||||
return lr
|
||||
|
||||
def warmup_step_lr(lr, lr_epochs, steps_per_epoch, warmup_epochs, max_epoch, gamma=0.1):
|
||||
"""Linear warm up learning rate"""
|
||||
base_lr = lr
|
||||
warmup_init_lr = 0
|
||||
total_steps = int(max_epoch * steps_per_epoch)
|
||||
warmup_steps = int(warmup_epochs * steps_per_epoch)
|
||||
milestones = lr_epochs
|
||||
milestones_steps = []
|
||||
for milestone in milestones:
|
||||
milestones_step = milestone * steps_per_epoch
|
||||
milestones_steps.append(milestones_step)
|
||||
|
||||
lr_each_step = []
|
||||
lr = base_lr
|
||||
milestones_steps_counter = Counter(milestones_steps)
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
|
||||
else:
|
||||
lr = lr * gamma**milestones_steps_counter[i]
|
||||
lr_each_step.append(lr)
|
||||
|
||||
return np.array(lr_each_step).astype(np.float32)
|
||||
|
||||
def multi_step_lr(lr, milestones, steps_per_epoch, max_epoch, gamma=0.1):
|
||||
return warmup_step_lr(lr, milestones, steps_per_epoch, 0, max_epoch, gamma=gamma)
|
||||
|
||||
def step_lr(lr, epoch_size, steps_per_epoch, max_epoch, gamma=0.1):
|
||||
lr_epochs = []
|
||||
for i in range(1, max_epoch):
|
||||
if i % epoch_size == 0:
|
||||
lr_epochs.append(i)
|
||||
return multi_step_lr(lr, lr_epochs, steps_per_epoch, max_epoch, gamma=gamma)
|
||||
|
||||
def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
|
||||
def warmup_cosine_annealing_lr(lr, max_epoch, steps_per_epoch, warmup_epochs=5, T_max=150, eta_min=0.0):
|
||||
""" Cosine annealing learning rate"""
|
||||
base_lr = lr
|
||||
warmup_init_lr = 0
|
||||
|
|
|
@ -71,6 +71,8 @@ if __name__ == "__main__":
|
|||
context.set_context(device_id=args.device_id)
|
||||
|
||||
if device_num > 1:
|
||||
cfg.learning_rate = cfg.learning_rate * device_num
|
||||
cfg.epoch_size = cfg.epoch_size * 2
|
||||
context.reset_auto_parallel_context()
|
||||
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
|
||||
gradients_mean=True)
|
||||
|
@ -107,7 +109,7 @@ if __name__ == "__main__":
|
|||
|
||||
elif args.dataset_name == 'imagenet':
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
|
||||
lr = Tensor(get_lr_imagenet(cfg, step_per_epoch))
|
||||
lr = Tensor(get_lr_imagenet(cfg.learning_rate, cfg.epoch_size, step_per_epoch))
|
||||
opt = nn.Momentum(params=get_param_groups(network),
|
||||
learning_rate=lr,
|
||||
momentum=cfg.momentum,
|
||||
|
|
Loading…
Reference in New Issue