From d72feda64b327946236eae611d2b3d9e73cbfa06 Mon Sep 17 00:00:00 2001 From: panfengfeng Date: Sun, 7 Feb 2021 16:44:50 +0800 Subject: [PATCH] fix cnn efficientnet bugs --- .../scripts/run_distribute_train_ascend.sh | 8 ++--- .../scripts/run_standalone_eval_ascend.sh | 4 +-- .../scripts/run_standalone_train_ascend.sh | 4 +-- .../official/cv/cnn_direction_model/train.py | 17 ++++++--- model_zoo/official/cv/efficientnet/README.md | 36 +++++++++---------- .../official/cv/efficientnet/src/transform.py | 2 +- 6 files changed, 38 insertions(+), 33 deletions(-) diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh index df5ba8dbb6b..af22cd0235d 100644 --- a/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh @@ -15,7 +15,7 @@ # ============================================================================ if [ $# != 2 ] && [ $# != 3 ] then - echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" + echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" exit 1 fi @@ -76,13 +76,13 @@ do if [ $# == 2 ] then - python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 > train.log 2>&1 & fi if [ $# == 3 ] then - python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log & + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 > train.log 2>&1 & fi cd .. -done \ No newline at end of file +done diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh index 40133b78d50..1d40fbf36ad 100644 --- a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh @@ -57,6 +57,6 @@ cd ./eval || exit echo "start evaluation for device $DEVICE_ID" env > env.log -python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 #&> log & +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 > eval.log 2>&1 & -cd .. \ No newline at end of file +cd .. diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh index b57f5e70b08..611da1ac26c 100644 --- a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh @@ -66,7 +66,7 @@ fi if [ $# == 2 ] then - python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & + python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 > train.log 2>&1 & fi -cd .. \ No newline at end of file +cd .. diff --git a/model_zoo/official/cv/cnn_direction_model/train.py b/model_zoo/official/cv/cnn_direction_model/train.py index 5033032ad43..5e9a0142be0 100644 --- a/model_zoo/official/cv/cnn_direction_model/train.py +++ b/model_zoo/official/cv/cnn_direction_model/train.py @@ -41,6 +41,7 @@ parser.add_argument('--device_num', type=int, default=1, help='Device num.') parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') +parser.add_argument('--is_save_on_master', type=int, default=1, help='save ckpt on master or all rank') args_opt = parser.parse_args() @@ -70,6 +71,13 @@ if __name__ == '__main__': context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL) init() + args_opt.rank_save_ckpt_flag = 0 + if args_opt.is_save_on_master: + if rank_id == 0: + args_opt.rank_save_ckpt_flag = 1 + else: + args_opt.rank_save_ckpt_flag = 1 + # create dataset dataset_name = config.dataset_name dataset = create_dataset_train(args_opt.dataset_path + "/" + dataset_name + @@ -100,10 +108,11 @@ if __name__ == '__main__': loss_cb = LossMonitor() cb = [time_cb, loss_cb] if config.save_checkpoint: - config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps, - keep_checkpoint_max=config.keep_checkpoint_max) - ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck) - cb += [ckpt_cb] + if args_opt.rank_save_ckpt_flag == 1: + config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck) + cb += [ckpt_cb] # train model model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False) diff --git a/model_zoo/official/cv/efficientnet/README.md b/model_zoo/official/cv/efficientnet/README.md index f7d001f2a46..39c82b2efdb 100644 --- a/model_zoo/official/cv/efficientnet/README.md +++ b/model_zoo/official/cv/efficientnet/README.md @@ -18,7 +18,6 @@ # [EfficientNet-B0 Description](#contents) - [Paper](https://arxiv.org/abs/1905.11946): Mingxing Tan, Quoc V. Le. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. 2019. # [Model architecture](#contents) @@ -27,27 +26,25 @@ The overall network architecture of EfficientNet-B0 is show below: [Link](https://arxiv.org/abs/1905.11946) - # [Dataset](#contents) Dataset used: [imagenet](http://www.image-net.org/) - Dataset size: ~125G, 1.2W colorful images in 1000 classes - - Train: 120G, 1.2W images - - Test: 5G, 50000 images + - Train: 120G, 1.2W images + - Test: 5G, 50000 images - Data format: RGB images. - - Note: Data will be processed in src/dataset.py - + - Note: Data will be processed in src/dataset.py # [Environment Requirements](#contents) - Hardware GPU - - Prepare hardware environment with GPU processor. + - Prepare hardware environment with GPU processor. - Framework - - [MindSpore](https://www.mindspore.cn/install/en) + - [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) + - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) # [Script description](#contents) @@ -77,7 +74,7 @@ Dataset used: [imagenet](http://www.image-net.org/) Parameters for both training and evaluating can be set in config.py. -``` +```python 'random_seed': 1, # fix random seed 'model': 'efficientnet_b0', # model name 'drop': 0.2, # dropout rate @@ -106,17 +103,17 @@ Parameters for both training and evaluating can be set in config.py. ## [Training Process](#contents) -#### Usage +### Usage -``` +```python GPU: # distribute training example(8p) - sh run_distribute_train_for_gpu.sh + sh run_distribute_train_for_gpu.sh # standalone training sh run_standalone_train_for_gpu.sh DEVICE_ID DATA_DIR ``` -#### Launch +### Launch ```bash # distributed training example(8p) for GPU @@ -133,7 +130,7 @@ You can find checkpoint file together with result in log. ### Usage -``` +```bash # Evaluation sh run_eval_for_gpu.sh DATA_DIR DEVICE_ID PATH_CHECKPOINT ``` @@ -148,9 +145,9 @@ sh run_eval_for_gpu.sh /dataset/eval ./checkpoint/efficientnet_b0-600_1251.ckpt #### Result -Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log. +Evaluation result will be stored in the scripts path. Under this, you can find result like the following in log. -``` +```python acc=76.96%(TOP1) ``` @@ -186,7 +183,6 @@ acc=76.96%(TOP1) | outputs | probability | | Accuracy | acc=76.96%(TOP1) | - # [ModelZoo Homepage](#contents) - + Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). diff --git a/model_zoo/official/cv/efficientnet/src/transform.py b/model_zoo/official/cv/efficientnet/src/transform.py index 2cdea209bb2..38ee8de0b6e 100644 --- a/model_zoo/official/cv/efficientnet/src/transform.py +++ b/model_zoo/official/cv/efficientnet/src/transform.py @@ -31,7 +31,7 @@ class RandAugment: self.hparams = hparams def __call__(self, imgs, labels, batchInfo): - # assert the imgs objetc are pil_images + # assert the imgs object are pil_images ret_imgs = [] ret_labels = [] py_to_pil_op = P.ToPIL()