From 4e3fa200ee20a8a318344f8a4ba9a6de59fd023f Mon Sep 17 00:00:00 2001 From: ms_yan <6576637+ms_yan@user.noreply.gitee.com> Date: Tue, 28 Jul 2020 23:10:45 +0800 Subject: [PATCH] repair problem in vgg16 cifar10 version --- model_zoo/official/cv/vgg16/README.md | 6 ++-- model_zoo/official/cv/vgg16/eval.py | 31 ++++++++----------- .../cv/vgg16/scripts/run_distribute_train.sh | 2 +- model_zoo/official/cv/vgg16/src/vgg.py | 2 +- model_zoo/official/cv/vgg16/train.py | 4 +-- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/model_zoo/official/cv/vgg16/README.md b/model_zoo/official/cv/vgg16/README.md index 53eb05f66de..9ba1ae49b54 100644 --- a/model_zoo/official/cv/vgg16/README.md +++ b/model_zoo/official/cv/vgg16/README.md @@ -38,9 +38,11 @@ epcoh: 2 step: 781, loss is 1.827582 ### Evaluation +- Do eval as follows, need to specify dataset type as "cifar10" or "imagenet2012" ``` -python eval.py --data_path=your_data_path --device_id=6 --checkpoint_path=./train_vgg_cifar10-70-781.ckpt > out.eval.log 2>&1 & +python eval.py --data_path=your_data_path --dataset="cifar10" --pre_trained=./train_vgg_cifar10-70-781.ckpt > out.eval.log 2>&1 & ``` +- If the using dataset is The above python command will run in the background, you can view the results through the file `out.eval.log`. You will get the accuracy as following: @@ -93,7 +95,7 @@ parameters/options: --device_target the evaluation backend type, default is Ascend. --data_path the storage path of datasetd --device_id the device which used to evaluate model. - --checkpoint_path the checkpoint file path used to evaluate model. + --pre_trained the checkpoint file path used to evaluate model. ``` ### Distribute Training diff --git a/model_zoo/official/cv/vgg16/eval.py b/model_zoo/official/cv/vgg16/eval.py index 504a79207d6..2c42dbea6da 100644 --- a/model_zoo/official/cv/vgg16/eval.py +++ b/model_zoo/official/cv/vgg16/eval.py @@ -52,15 +52,15 @@ class ParameterReduce(nn.Cell): def parse_args(cloud_args=None): """parse_args""" parser = argparse.ArgumentParser('mindspore classification test') - parser.add_argument('--device_target', type=str, default='GPU', choices=['Ascend', 'GPU'], + parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], help='device where the code will be implemented. (Default: Ascend)') # dataset related - parser.add_argument('--dataset', type=str, choices=["cifar10", "imagenet2012"], default="imagenet2012") + parser.add_argument('--dataset', type=str, choices=["cifar10", "imagenet2012"], default="cifar10") parser.add_argument('--data_path', type=str, default='', help='eval data dir') parser.add_argument('--per_batch_size', default=32, type=int, help='batch size for per npu') # network related parser.add_argument('--graph_ckpt', type=int, default=1, help='graph ckpt or feed ckpt') - parser.add_argument('--pretrained', default='', type=str, help='fully path of pretrained model to load. ' + parser.add_argument('--pre_trained', default='', type=str, help='fully path of pretrained model to load. ' 'If it is a direction, it will test all ckpt') # logging related @@ -68,9 +68,6 @@ def parse_args(cloud_args=None): parser.add_argument('--rank', type=int, default=0, help='local rank of distributed') parser.add_argument('--group_size', type=int, default=1, help='world size of distributed') - # roma obs - parser.add_argument('--train_url', type=str, default="", help='train url') - args_opt = parser.parse_args() args_opt = merge_args(args_opt, cloud_args) @@ -82,6 +79,8 @@ def parse_args(cloud_args=None): args_opt.image_size = cfg.image_size args_opt.num_classes = cfg.num_classes args_opt.per_batch_size = cfg.batch_size + args_opt.momentum = cfg.momentum + args_opt.weight_decay = cfg.weight_decay args_opt.buffer_size = cfg.buffer_size args_opt.pad_mode = cfg.pad_mode args_opt.padding = cfg.padding @@ -130,23 +129,23 @@ def test(cloud_args=None): args.logger.save_args(args) if args.dataset == "cifar10": - net = vgg16(num_classes=args.num_classes) - opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, + net = vgg16(num_classes=args.num_classes, args=args) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum, weight_decay=args.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) - param_dict = load_checkpoint(args.checkpoint_path) + param_dict = load_checkpoint(args.pre_trained) load_param_into_net(net, param_dict) net.set_train(False) - dataset = vgg_create_dataset(args.data_path, 1, False) + dataset = vgg_create_dataset(args.data_path, args.image_size, args.per_batch_size, training=False) res = model.eval(dataset) print("result: ", res) else: # network args.logger.important_info('start create network') - if os.path.isdir(args.pretrained): - models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt'))) + if os.path.isdir(args.pre_trained): + models = list(glob.glob(os.path.join(args.pre_trained, '*.ckpt'))) print(models) if args.graph_ckpt: f = lambda x: -1 * int(os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split('_')[0]) @@ -154,14 +153,10 @@ def test(cloud_args=None): f = lambda x: -1 * int(os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1]) args.models = sorted(models, key=f) else: - args.models = [args.pretrained,] + args.models = [args.pre_trained,] for model in args.models: - if args.dataset == "cifar10": - dataset = vgg_create_dataset(args.data_path, args.image_size, args.per_batch_size, training=False) - else: - dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size) - + dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size) eval_dataloader = dataset.create_tuple_iterator() network = vgg16(args.num_classes, args, phase="test") diff --git a/model_zoo/official/cv/vgg16/scripts/run_distribute_train.sh b/model_zoo/official/cv/vgg16/scripts/run_distribute_train.sh index ca4c993deda..600550c4e51 100755 --- a/model_zoo/official/cv/vgg16/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/vgg16/scripts/run_distribute_train.sh @@ -47,6 +47,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --data_path=$2 --device_id=$i &> log & + python train.py --data_path=$2 --device_target="Ascend" --device_id=$i &> log & cd .. done \ No newline at end of file diff --git a/model_zoo/official/cv/vgg16/src/vgg.py b/model_zoo/official/cv/vgg16/src/vgg.py index 835d2a0b5d6..5b94cc1d2e9 100644 --- a/model_zoo/official/cv/vgg16/src/vgg.py +++ b/model_zoo/official/cv/vgg16/src/vgg.py @@ -138,5 +138,5 @@ def vgg16(num_classes=1000, args=None, phase="train"): >>> vgg16(num_classes=1000) """ - net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=True, phase=phase) + net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase) return net diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py index 2bd78c4685e..dedb247aa0f 100644 --- a/model_zoo/official/cv/vgg16/train.py +++ b/model_zoo/official/cv/vgg16/train.py @@ -109,7 +109,7 @@ class ProgressMonitor(Callback): def parse_args(cloud_args=None): """parameters""" parser = argparse.ArgumentParser('mindspore classification training') - parser.add_argument('--device_target', type=str, default='GPU', choices=['Ascend', 'GPU'], + parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], help='device where the code will be implemented. (Default: Ascend)') parser.add_argument('--device_id', type=int, default=1, help='device id of GPU or Ascend. (Default: None)') @@ -127,7 +127,7 @@ def parse_args(cloud_args=None): # logging and checkpoint related parser.add_argument('--log_interval', type=int, default=100, help='logging interval') parser.add_argument('--ckpt_path', type=str, default='outputs/', help='checkpoint save location') - parser.add_argument('--ckpt_interval', type=int, default=5000, help='ckpt_interval') + parser.add_argument('--ckpt_interval', type=int, default=2, help='ckpt_interval') parser.add_argument('--is_save_on_master', type=int, default=1, help='save ckpt on master or all rank') # distributed related