update nasnet scripts for r1.0
This commit is contained in:
parent
a93476ba5d
commit
bf3d0b1589
|
@ -40,7 +40,7 @@ Parameters for both training and evaluating can be set in config.py
|
|||
'rank': 0, # local rank of distributed
|
||||
'group_size': 1, # world size of distributed
|
||||
'work_nums': 8, # number of workers to read the data
|
||||
'epoch_size': 250, # total epoch numbers
|
||||
'epoch_size': 500, # total epoch numbers
|
||||
'keep_checkpoint_max': 100, # max numbers to keep checkpoints
|
||||
'ckpt_path': './checkpoint/', # save checkpoint path
|
||||
'is_save_on_master': 1 # save checkpoint on rank0, distributed parameters
|
||||
|
|
|
@ -23,9 +23,9 @@ nasnet_a_mobile_config_gpu = edict({
|
|||
'rank': 0,
|
||||
'group_size': 1,
|
||||
'work_nums': 8,
|
||||
'epoch_size': 312,
|
||||
'epoch_size': 500,
|
||||
'keep_checkpoint_max': 100,
|
||||
'ckpt_path': './',
|
||||
'ckpt_path': './checkpoint/',
|
||||
'is_save_on_master': 0,
|
||||
|
||||
### Dataset Config
|
||||
|
|
|
@ -23,7 +23,7 @@ import mindspore.ops.functional as F
|
|||
import mindspore.ops.composite as C
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
|
||||
from mindspore.train.parallel_utils import ParallelMode
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
|
||||
|
||||
|
||||
|
@ -33,7 +33,6 @@ GRADIENT_CLIP_VALUE = 10.0
|
|||
clip_grad = C.MultitypeFuncGraph("clip_grad")
|
||||
|
||||
|
||||
# pylint: disable=consider-using-in
|
||||
@clip_grad.register("Number", "Number", "Tensor")
|
||||
def _clip_grad(clip_type, clip_value, grad):
|
||||
"""
|
||||
|
@ -47,7 +46,7 @@ def _clip_grad(clip_type, clip_value, grad):
|
|||
Outputs:
|
||||
tuple[Tensor]: clipped gradients.
|
||||
"""
|
||||
if clip_type != 0 and clip_type != 1:
|
||||
if clip_type not in (0, 1):
|
||||
return grad
|
||||
dt = F.dtype(grad)
|
||||
if clip_type == 0:
|
||||
|
|
|
@ -18,7 +18,7 @@ import os
|
|||
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore import ParallelMode
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.communication.management import init, get_rank, get_group_size
|
||||
from mindspore.nn.optim.rmsprop import RMSProp
|
||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||
|
@ -28,7 +28,7 @@ from mindspore.common import set_seed
|
|||
|
||||
from src.config import nasnet_a_mobile_config_gpu as cfg
|
||||
from src.dataset import create_dataset
|
||||
from src.nasnet_a_mobile import NASNetAMobileWithLoss, NASNetAMobileTrainOneStepWithClipGradient
|
||||
from src.nasnet_a_mobile import NASNetAMobile, CrossEntropy
|
||||
from src.lr_generator import get_lr
|
||||
|
||||
|
||||
|
@ -68,10 +68,13 @@ if __name__ == '__main__':
|
|||
batches_per_epoch = dataset.get_dataset_size()
|
||||
|
||||
# network
|
||||
net_with_loss = NASNetAMobileWithLoss(cfg)
|
||||
net = NASNetAMobile(cfg.num_classes)
|
||||
if args_opt.resume:
|
||||
ckpt = load_checkpoint(args_opt.resume)
|
||||
load_param_into_net(net_with_loss, ckpt)
|
||||
load_param_into_net(net, ckpt)
|
||||
|
||||
#loss
|
||||
loss = CrossEntropy(smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes, factor=cfg.aux_factor)
|
||||
|
||||
# learning rate schedule
|
||||
lr = get_lr(lr_init=cfg.lr_init, lr_decay_rate=cfg.lr_decay_rate,
|
||||
|
@ -82,20 +85,18 @@ if __name__ == '__main__':
|
|||
# optimizer
|
||||
decayed_params = []
|
||||
no_decayed_params = []
|
||||
for param in net_with_loss.trainable_params():
|
||||
for param in net.trainable_params():
|
||||
if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
|
||||
decayed_params.append(param)
|
||||
else:
|
||||
no_decayed_params.append(param)
|
||||
group_params = [{'params': decayed_params, 'weight_decay': cfg.weight_decay},
|
||||
{'params': no_decayed_params},
|
||||
{'order_params': net_with_loss.trainable_params()}]
|
||||
{'order_params': net.trainable_params()}]
|
||||
optimizer = RMSProp(group_params, lr, decay=cfg.rmsprop_decay, weight_decay=cfg.weight_decay,
|
||||
momentum=cfg.momentum, epsilon=cfg.opt_eps, loss_scale=cfg.loss_scale)
|
||||
|
||||
net_with_grads = NASNetAMobileTrainOneStepWithClipGradient(net_with_loss, optimizer)
|
||||
net_with_grads.set_train()
|
||||
model = Model(net_with_grads)
|
||||
model = Model(net, loss_fn=loss, optimizer=optimizer)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
loss_cb = LossMonitor(per_print_times=batches_per_epoch)
|
||||
|
|
Loading…
Reference in New Issue