diff --git a/model_zoo/official/cv/maskrcnn/default_config.yaml b/model_zoo/official/cv/maskrcnn/default_config.yaml index 2df11756193..624491f8e50 100644 --- a/model_zoo/official/cv/maskrcnn/default_config.yaml +++ b/model_zoo/official/cv/maskrcnn/default_config.yaml @@ -19,8 +19,8 @@ ann_file: "./annotations/instances_val2017.json" modelarts_dataset_unzip_name: 'cocodataset' need_modelarts_dataset_unzip: True -img_path: '' # "image file path." -result_path: '' # "result file path." +img_path: '' +result_path: '' # Training options img_width: 1280 @@ -30,10 +30,15 @@ flip_ratio: 0.5 expand_ratio: 1.0 max_instance_count: 128 -mask_shape: (28, 28) +mask_shape: [28, 28] # anchor -feature_shapes: [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)] +feature_shapes: +- [192, 320] +- [96, 160] +- [48, 80] +- [24, 40] +- [12, 20] anchor_scales: [8] anchor_ratios: [0.5, 1.0, 2.0] anchor_strides: [4, 8, 16, 32, 64] @@ -72,7 +77,7 @@ activate_num_classes: 2 use_sigmoid_cls: True # roi_align -roi_layer: dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2) +roi_layer: {type: 'RoIAlign', out_size: 7, mask_out_size: 14, sample_num: 2} roi_align_out_channels: 256 roi_align_featmap_strides: [4, 8, 16, 32] roi_align_finest_scale: 56 @@ -127,7 +132,7 @@ base_lr: 0.02 base_step: 58633 total_epoch: 13 warmup_step: 500 -warmup_ratio: 1/3.0 +warmup_ratio: 0.333333 sgd_momentum: 0.9 # train @@ -142,11 +147,11 @@ save_checkpoint_epochs: 1 keep_checkpoint_max: 12 save_checkpoint_path: "./" -mindrecord_dir: "./MindRecord_COCO" # "/home/mask_rcnn/MindRecord_COCO2017_Train" +mindrecord_dir: "./MindRecord_COCO" train_data_type: "train2017" val_data_type: "val2017" instance_set: "annotations/instances_{}.json" -coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', +coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', @@ -160,7 +165,7 @@ coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', - 'teddy bear', 'hair drier', 'toothbrush') + 'teddy bear', 'hair drier', 'toothbrush'] num_classes: 81 only_create_dataset: False @@ -173,7 +178,6 @@ device_num: 1 rank_id: 0 # maskrcnn export -batch_size_export: 2 file_name: "maskrcnn" file_format: "AIR" ckpt_file: '/cache/data/cocodataset/ckpt_maskrcnn/mask_rcnn-12_7393.ckpt' diff --git a/model_zoo/official/cv/maskrcnn/eval.py b/model_zoo/official/cv/maskrcnn/eval.py index d4dc263c6e7..3785b323b7c 100644 --- a/model_zoo/official/cv/maskrcnn/eval.py +++ b/model_zoo/official/cv/maskrcnn/eval.py @@ -16,7 +16,6 @@ """Evaluation for MaskRcnn""" import os import time -import re import numpy as np from src.model_utils.config import config @@ -34,13 +33,6 @@ from mindspore.common import set_seed set_seed(1) -lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes] -config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))] -config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2) -config.warmup_ratio = 1/3.0 -config.mask_shape = (28, 28) - - def maskrcnn_eval(dataset_path, ckpt_path, ann_file): """MaskRcnn evaluation.""" ds = create_maskrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False) diff --git a/model_zoo/official/cv/maskrcnn/export.py b/model_zoo/official/cv/maskrcnn/export.py index bfd06034d54..d65103b96fe 100644 --- a/model_zoo/official/cv/maskrcnn/export.py +++ b/model_zoo/official/cv/maskrcnn/export.py @@ -14,7 +14,6 @@ # ============================================================================ """export checkpoint file into air, onnx, mindir models""" -import re import numpy as np from src.model_utils.config import config from src.model_utils.device_adapter import get_device_id @@ -23,15 +22,6 @@ from src.maskrcnn.mask_rcnn_r50 import MaskRcnn_Infer from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export -lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes] -config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))] -config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2) -config.warmup_ratio = 1/3.0 -config.mask_shape = (28, 28) -train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' '] -config.coco_classes = np.array(train_cls) -config.batch_size = config.batch_size_export - if not config.enable_modelarts: config.ckpt_file = config.ckpt_file_local @@ -45,6 +35,7 @@ def modelarts_process(): @moxing_wrapper(pre_process=modelarts_process) def export_maskrcnn(): """ export_maskrcnn """ + config.test_batch_size = config.batch_size net = MaskRcnn_Infer(config=config) param_dict = load_checkpoint(config.ckpt_file) diff --git a/model_zoo/official/cv/maskrcnn/src/dataset.py b/model_zoo/official/cv/maskrcnn/src/dataset.py index 3a48eb2bb89..f3f3ccfd968 100644 --- a/model_zoo/official/cv/maskrcnn/src/dataset.py +++ b/model_zoo/official/cv/maskrcnn/src/dataset.py @@ -17,7 +17,6 @@ from __future__ import division import os -import re import numpy as np from numpy import random import cv2 @@ -29,8 +28,6 @@ from mindspore.mindrecord import FileWriter from .model_utils.config import config -config.mask_shape = (28, 28) - def bbox_overlaps(bboxes1, bboxes2, mode='iou'): """Calculate the ious between each bbox of bboxes1 and bboxes2. @@ -390,10 +387,7 @@ def create_coco_label(is_training): data_type = config.train_data_type # Classes need to train or test. - # train_cls = config.coco_classes - train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' '] - train_cls = np.array(train_cls) - print(train_cls) + train_cls = config.coco_classes train_cls_dict = {} for i, cls in enumerate(train_cls): diff --git a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py index 36255425e5c..b7f714e16df 100644 --- a/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py +++ b/model_zoo/official/cv/maskrcnn/src/maskrcnn/rcnn_cls.py @@ -114,7 +114,7 @@ class RcnnCls(nn.Cell): self.train_batch_size = batch_size self.test_batch_size = cfg.test_batch_size - self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer["out_size"]) + self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer.out_size) self.relu = P.ReLU() self.logicaland = P.LogicalAnd() self.loss_cls = P.SoftmaxCrossEntropyWithLogits() diff --git a/model_zoo/official/cv/maskrcnn/src/maskrcnn/roi_align.py b/model_zoo/official/cv/maskrcnn/src/maskrcnn/roi_align.py index b8b27ff3298..2fef7d73141 100644 --- a/model_zoo/official/cv/maskrcnn/src/maskrcnn/roi_align.py +++ b/model_zoo/official/cv/maskrcnn/src/maskrcnn/roi_align.py @@ -88,9 +88,9 @@ class SingleRoIExtractor(nn.Cell): self.out_channels = out_channels self.featmap_strides = featmap_strides self.num_levels = len(self.featmap_strides) - self.out_size = roi_layer['mask_out_size'] if mask else roi_layer['out_size'] + self.out_size = config.roi_layer.mask_out_size if mask else config.roi_layer.out_size self.mask = mask - self.sample_num = roi_layer['sample_num'] + self.sample_num = config.roi_layer.sample_num self.roi_layers = self.build_roi_layers(self.featmap_strides) self.roi_layers = L.CellList(self.roi_layers) diff --git a/model_zoo/official/cv/maskrcnn/train.py b/model_zoo/official/cv/maskrcnn/train.py index c75523f4829..e5e61bb4a83 100644 --- a/model_zoo/official/cv/maskrcnn/train.py +++ b/model_zoo/official/cv/maskrcnn/train.py @@ -17,7 +17,6 @@ import time import os -import re from src.model_utils.config import config from src.model_utils.moxing_adapter import moxing_wrapper @@ -40,12 +39,6 @@ from mindspore.communication.management import get_rank, get_group_size set_seed(1) -lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes] -config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))] -config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2) -config.warmup_ratio = 1/3.0 -config.mask_shape = (28, 28) - def modelarts_pre_process(): def unzip(zip_file, save_dir): import zipfile diff --git a/model_zoo/official/nlp/transformer/README.md b/model_zoo/official/nlp/transformer/README.md index 826a6ea544d..185abe006c9 100644 --- a/model_zoo/official/nlp/transformer/README.md +++ b/model_zoo/official/nlp/transformer/README.md @@ -189,7 +189,6 @@ python eval.py > eval.log 2>&1 & ├─__init__.py ├─beam_search.py ├─dataset.py - ├─eval_config.py ├─lr_schedule.py ├─process_output.py ├─tokenization.py @@ -244,15 +243,12 @@ options: #### Running Options ```text -config.py: +default_config.yaml: transformer_network version of Transformer model: base | large, default is large init_loss_scale_value initial value of loss scale: N, default is 2^10 scale_factor factor used to update loss scale: N, default is 2 scale_window steps for once updatation of loss scale: N, default is 2000 optimizer optimizer used in the network: Adam, default is "Adam" - -eval_config.py: - transformer_network version of Transformer model: base | large, default is large data_file data file: PATH model_file checkpoint file to be loaded: PATH output_file output file of evaluation: PATH @@ -313,7 +309,7 @@ Parameters for learning rate: ## [Training Process](#contents) -- Set options in `config.py`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset. +- Set options in `default_config.yaml`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset. - Run `run_standalone_train.sh` for non-distributed training of Transformer model. @@ -331,7 +327,7 @@ Parameters for learning rate: ## [Evaluation Process](#contents) -- Set options in `eval_config.py`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path. +- Set options in `default_config.yaml`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path. - Run `eval.py` for evaluation of Transformer model. @@ -422,7 +418,7 @@ There are three random situations: - Initialization of some model weights. - Dropout operations. -Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in src/config.py. +Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in default_config.yaml. ## [ModelZoo Homepage](#contents) diff --git a/model_zoo/official/nlp/transformer/README_CN.md b/model_zoo/official/nlp/transformer/README_CN.md index 052067e3d9d..e0a5f3994ec 100644 --- a/model_zoo/official/nlp/transformer/README_CN.md +++ b/model_zoo/official/nlp/transformer/README_CN.md @@ -195,7 +195,6 @@ python eval.py > eval.log 2>&1 & ├─__init__.py ├─beam_search.py ├─dataset.py - ├─eval_config.py ├─lr_schedule.py ├─process_output.py ├─tokenization.py @@ -250,15 +249,12 @@ options: #### 运行选项 ```text -config.py: +default_config.yaml: transformer_network version of Transformer model: base | large, default is large init_loss_scale_value initial value of loss scale: N, default is 2^10 scale_factor factor used to update loss scale: N, default is 2 scale_window steps for once updatation of loss scale: N, default is 2000 optimizer optimizer used in the network: Adam, default is "Adam" - -eval_config.py: - transformer_network version of Transformer model: base | large, default is large data_file data file: PATH model_file checkpoint file to be loaded: PATH output_file output file of evaluation: PATH @@ -320,7 +316,7 @@ Parameters for learning rate: ### 训练过程 -- 在`config.py`中设置选项,包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。 +- 在`default_config.yaml`中设置选项,包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。 - 运行`run_standalone_train.sh`,进行Transformer模型的非分布式训练。 @@ -338,7 +334,7 @@ Parameters for learning rate: ### 评估过程 -- 在`eval_config.py`中设置选项。确保已设置了‘data_file'、'model_file’和'output_file'文件路径。 +- 在`default_config.yaml`中设置选项。确保已设置了‘data_file'、'model_file’和'output_file'文件路径。 - 运行`eval.py`,评估Transformer模型。 @@ -429,7 +425,7 @@ bash run_infer_310.sh [MINDIR_PATH] [NEED_PREPROCESS] [DEVICE_ID] - 初始化部分模型权重 - 随机失活运行 -train.py已经设置了一些种子,避免数据集轮换和权重初始化的随机性。若需关闭随机失活,将src/config.py中相应的dropout_prob参数设置为0。 +train.py已经设置了一些种子,避免数据集轮换和权重初始化的随机性。若需关闭随机失活,将default_config.yaml中相应的dropout_prob参数设置为0。 ## ModelZoo主页 diff --git a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh index c2204c67f0a..41be7cdef41 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh @@ -16,8 +16,8 @@ if [ $# != 5 ] ; then echo "==============================================================================================================" echo "Please run the script as: " -echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH" -echo "for example: sh run_distribute_pretrain.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml" +echo "sh run_distribute_train_ascend.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH" +echo "for example: sh run_distribute_train_ascend.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml" echo "It is better to use absolute path." echo "==============================================================================================================" exit 1; diff --git a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend_multi_machines.sh b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend_multi_machines.sh index 47f3acdec32..14fd9d12d6b 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend_multi_machines.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend_multi_machines.sh @@ -64,8 +64,7 @@ do --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ - --data_path=$DATA_PATH \ - --bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 & + --data_path=$DATA_PATH > log.txt 2>&1 & cd ../ done cd .. \ No newline at end of file diff --git a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_gpu.sh b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_gpu.sh index ef1337951e6..ceafe417d4c 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_gpu.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_gpu.sh @@ -17,7 +17,7 @@ if [ $# != 4 ] ; then echo "==============================================================================================================" echo "Please run the script as: " echo "sh run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE DATA_PATH CONFIG_PATH" -echo "for example: sh run_distribute_pretrain.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml" +echo "for example: sh run_distribute_train_gpu.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml" echo "It is better to use absolute path." echo "==============================================================================================================" exit 1; @@ -47,5 +47,4 @@ mpirun -n $RANK_SIZE \ --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ - --data_path=$DATA_PATH \ - --bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 & + --data_path=$DATA_PATH > log.txt 2>&1 & diff --git a/model_zoo/official/nlp/transformer/scripts/run_standalone_train.sh b/model_zoo/official/nlp/transformer/scripts/run_standalone_train.sh index 41980e36b7f..50a7779ee8f 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_standalone_train.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_standalone_train.sh @@ -48,8 +48,7 @@ if [ $DEVICE_TARGET == 'Ascend' ];then --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ - --data_path=$DATA_PATH \ - --bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 & + --data_path=$DATA_PATH > log.txt 2>&1 & elif [ $DEVICE_TARGET == 'GPU' ];then export CUDA_VISIBLE_DEVICES="$2" @@ -64,8 +63,7 @@ elif [ $DEVICE_TARGET == 'GPU' ];then --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ - --data_path=$DATA_PATH \ - --bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 & + --data_path=$DATA_PATH > log.txt 2>&1 & else echo "Not supported device target." fi