This commit is contained in:
huchunmei 2021-07-05 16:22:23 +08:00
parent fb6ec96862
commit 3713ef59bb
13 changed files with 34 additions and 72 deletions

View File

@ -19,8 +19,8 @@ ann_file: "./annotations/instances_val2017.json"
modelarts_dataset_unzip_name: 'cocodataset' modelarts_dataset_unzip_name: 'cocodataset'
need_modelarts_dataset_unzip: True need_modelarts_dataset_unzip: True
img_path: '' # "image file path." img_path: ''
result_path: '' # "result file path." result_path: ''
# Training options # Training options
img_width: 1280 img_width: 1280
@ -30,10 +30,15 @@ flip_ratio: 0.5
expand_ratio: 1.0 expand_ratio: 1.0
max_instance_count: 128 max_instance_count: 128
mask_shape: (28, 28) mask_shape: [28, 28]
# anchor # anchor
feature_shapes: [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)] feature_shapes:
- [192, 320]
- [96, 160]
- [48, 80]
- [24, 40]
- [12, 20]
anchor_scales: [8] anchor_scales: [8]
anchor_ratios: [0.5, 1.0, 2.0] anchor_ratios: [0.5, 1.0, 2.0]
anchor_strides: [4, 8, 16, 32, 64] anchor_strides: [4, 8, 16, 32, 64]
@ -72,7 +77,7 @@ activate_num_classes: 2
use_sigmoid_cls: True use_sigmoid_cls: True
# roi_align # roi_align
roi_layer: dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2) roi_layer: {type: 'RoIAlign', out_size: 7, mask_out_size: 14, sample_num: 2}
roi_align_out_channels: 256 roi_align_out_channels: 256
roi_align_featmap_strides: [4, 8, 16, 32] roi_align_featmap_strides: [4, 8, 16, 32]
roi_align_finest_scale: 56 roi_align_finest_scale: 56
@ -127,7 +132,7 @@ base_lr: 0.02
base_step: 58633 base_step: 58633
total_epoch: 13 total_epoch: 13
warmup_step: 500 warmup_step: 500
warmup_ratio: 1/3.0 warmup_ratio: 0.333333
sgd_momentum: 0.9 sgd_momentum: 0.9
# train # train
@ -142,11 +147,11 @@ save_checkpoint_epochs: 1
keep_checkpoint_max: 12 keep_checkpoint_max: 12
save_checkpoint_path: "./" save_checkpoint_path: "./"
mindrecord_dir: "./MindRecord_COCO" # "/home/mask_rcnn/MindRecord_COCO2017_Train" mindrecord_dir: "./MindRecord_COCO"
train_data_type: "train2017" train_data_type: "train2017"
val_data_type: "val2017" val_data_type: "val2017"
instance_set: "annotations/instances_{}.json" instance_set: "annotations/instances_{}.json"
coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
@ -160,7 +165,7 @@ coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush') 'teddy bear', 'hair drier', 'toothbrush']
num_classes: 81 num_classes: 81
only_create_dataset: False only_create_dataset: False
@ -173,7 +178,6 @@ device_num: 1
rank_id: 0 rank_id: 0
# maskrcnn export # maskrcnn export
batch_size_export: 2
file_name: "maskrcnn" file_name: "maskrcnn"
file_format: "AIR" file_format: "AIR"
ckpt_file: '/cache/data/cocodataset/ckpt_maskrcnn/mask_rcnn-12_7393.ckpt' ckpt_file: '/cache/data/cocodataset/ckpt_maskrcnn/mask_rcnn-12_7393.ckpt'

View File

@ -16,7 +16,6 @@
"""Evaluation for MaskRcnn""" """Evaluation for MaskRcnn"""
import os import os
import time import time
import re
import numpy as np import numpy as np
from src.model_utils.config import config from src.model_utils.config import config
@ -34,13 +33,6 @@ from mindspore.common import set_seed
set_seed(1) set_seed(1)
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
def maskrcnn_eval(dataset_path, ckpt_path, ann_file): def maskrcnn_eval(dataset_path, ckpt_path, ann_file):
"""MaskRcnn evaluation.""" """MaskRcnn evaluation."""
ds = create_maskrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False) ds = create_maskrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False)

View File

@ -14,7 +14,6 @@
# ============================================================================ # ============================================================================
"""export checkpoint file into air, onnx, mindir models""" """export checkpoint file into air, onnx, mindir models"""
import re
import numpy as np import numpy as np
from src.model_utils.config import config from src.model_utils.config import config
from src.model_utils.device_adapter import get_device_id from src.model_utils.device_adapter import get_device_id
@ -23,15 +22,6 @@ from src.maskrcnn.mask_rcnn_r50 import MaskRcnn_Infer
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' ']
config.coco_classes = np.array(train_cls)
config.batch_size = config.batch_size_export
if not config.enable_modelarts: if not config.enable_modelarts:
config.ckpt_file = config.ckpt_file_local config.ckpt_file = config.ckpt_file_local
@ -45,6 +35,7 @@ def modelarts_process():
@moxing_wrapper(pre_process=modelarts_process) @moxing_wrapper(pre_process=modelarts_process)
def export_maskrcnn(): def export_maskrcnn():
""" export_maskrcnn """ """ export_maskrcnn """
config.test_batch_size = config.batch_size
net = MaskRcnn_Infer(config=config) net = MaskRcnn_Infer(config=config)
param_dict = load_checkpoint(config.ckpt_file) param_dict = load_checkpoint(config.ckpt_file)

View File

@ -17,7 +17,6 @@
from __future__ import division from __future__ import division
import os import os
import re
import numpy as np import numpy as np
from numpy import random from numpy import random
import cv2 import cv2
@ -29,8 +28,6 @@ from mindspore.mindrecord import FileWriter
from .model_utils.config import config from .model_utils.config import config
config.mask_shape = (28, 28)
def bbox_overlaps(bboxes1, bboxes2, mode='iou'): def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2. """Calculate the ious between each bbox of bboxes1 and bboxes2.
@ -390,10 +387,7 @@ def create_coco_label(is_training):
data_type = config.train_data_type data_type = config.train_data_type
# Classes need to train or test. # Classes need to train or test.
# train_cls = config.coco_classes train_cls = config.coco_classes
train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' ']
train_cls = np.array(train_cls)
print(train_cls)
train_cls_dict = {} train_cls_dict = {}
for i, cls in enumerate(train_cls): for i, cls in enumerate(train_cls):

View File

@ -114,7 +114,7 @@ class RcnnCls(nn.Cell):
self.train_batch_size = batch_size self.train_batch_size = batch_size
self.test_batch_size = cfg.test_batch_size self.test_batch_size = cfg.test_batch_size
self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer["out_size"]) self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer.out_size)
self.relu = P.ReLU() self.relu = P.ReLU()
self.logicaland = P.LogicalAnd() self.logicaland = P.LogicalAnd()
self.loss_cls = P.SoftmaxCrossEntropyWithLogits() self.loss_cls = P.SoftmaxCrossEntropyWithLogits()

View File

@ -88,9 +88,9 @@ class SingleRoIExtractor(nn.Cell):
self.out_channels = out_channels self.out_channels = out_channels
self.featmap_strides = featmap_strides self.featmap_strides = featmap_strides
self.num_levels = len(self.featmap_strides) self.num_levels = len(self.featmap_strides)
self.out_size = roi_layer['mask_out_size'] if mask else roi_layer['out_size'] self.out_size = config.roi_layer.mask_out_size if mask else config.roi_layer.out_size
self.mask = mask self.mask = mask
self.sample_num = roi_layer['sample_num'] self.sample_num = config.roi_layer.sample_num
self.roi_layers = self.build_roi_layers(self.featmap_strides) self.roi_layers = self.build_roi_layers(self.featmap_strides)
self.roi_layers = L.CellList(self.roi_layers) self.roi_layers = L.CellList(self.roi_layers)

View File

@ -17,7 +17,6 @@
import time import time
import os import os
import re
from src.model_utils.config import config from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper from src.model_utils.moxing_adapter import moxing_wrapper
@ -40,12 +39,6 @@ from mindspore.communication.management import get_rank, get_group_size
set_seed(1) set_seed(1)
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
def modelarts_pre_process(): def modelarts_pre_process():
def unzip(zip_file, save_dir): def unzip(zip_file, save_dir):
import zipfile import zipfile

View File

@ -189,7 +189,6 @@ python eval.py > eval.log 2>&1 &
├─__init__.py ├─__init__.py
├─beam_search.py ├─beam_search.py
├─dataset.py ├─dataset.py
├─eval_config.py
├─lr_schedule.py ├─lr_schedule.py
├─process_output.py ├─process_output.py
├─tokenization.py ├─tokenization.py
@ -244,15 +243,12 @@ options:
#### Running Options #### Running Options
```text ```text
config.py: default_config.yaml:
transformer_network version of Transformer model: base | large, default is large transformer_network version of Transformer model: base | large, default is large
init_loss_scale_value initial value of loss scale: N, default is 2^10 init_loss_scale_value initial value of loss scale: N, default is 2^10
scale_factor factor used to update loss scale: N, default is 2 scale_factor factor used to update loss scale: N, default is 2
scale_window steps for once updatation of loss scale: N, default is 2000 scale_window steps for once updatation of loss scale: N, default is 2000
optimizer optimizer used in the network: Adam, default is "Adam" optimizer optimizer used in the network: Adam, default is "Adam"
eval_config.py:
transformer_network version of Transformer model: base | large, default is large
data_file data file: PATH data_file data file: PATH
model_file checkpoint file to be loaded: PATH model_file checkpoint file to be loaded: PATH
output_file output file of evaluation: PATH output_file output file of evaluation: PATH
@ -313,7 +309,7 @@ Parameters for learning rate:
## [Training Process](#contents) ## [Training Process](#contents)
- Set options in `config.py`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset. - Set options in `default_config.yaml`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset.
- Run `run_standalone_train.sh` for non-distributed training of Transformer model. - Run `run_standalone_train.sh` for non-distributed training of Transformer model.
@ -331,7 +327,7 @@ Parameters for learning rate:
## [Evaluation Process](#contents) ## [Evaluation Process](#contents)
- Set options in `eval_config.py`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path. - Set options in `default_config.yaml`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path.
- Run `eval.py` for evaluation of Transformer model. - Run `eval.py` for evaluation of Transformer model.
@ -422,7 +418,7 @@ There are three random situations:
- Initialization of some model weights. - Initialization of some model weights.
- Dropout operations. - Dropout operations.
Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in src/config.py. Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in default_config.yaml.
## [ModelZoo Homepage](#contents) ## [ModelZoo Homepage](#contents)

View File

@ -195,7 +195,6 @@ python eval.py > eval.log 2>&1 &
├─__init__.py ├─__init__.py
├─beam_search.py ├─beam_search.py
├─dataset.py ├─dataset.py
├─eval_config.py
├─lr_schedule.py ├─lr_schedule.py
├─process_output.py ├─process_output.py
├─tokenization.py ├─tokenization.py
@ -250,15 +249,12 @@ options:
#### 运行选项 #### 运行选项
```text ```text
config.py: default_config.yaml:
transformer_network version of Transformer model: base | large, default is large transformer_network version of Transformer model: base | large, default is large
init_loss_scale_value initial value of loss scale: N, default is 2^10 init_loss_scale_value initial value of loss scale: N, default is 2^10
scale_factor factor used to update loss scale: N, default is 2 scale_factor factor used to update loss scale: N, default is 2
scale_window steps for once updatation of loss scale: N, default is 2000 scale_window steps for once updatation of loss scale: N, default is 2000
optimizer optimizer used in the network: Adam, default is "Adam" optimizer optimizer used in the network: Adam, default is "Adam"
eval_config.py:
transformer_network version of Transformer model: base | large, default is large
data_file data file: PATH data_file data file: PATH
model_file checkpoint file to be loaded: PATH model_file checkpoint file to be loaded: PATH
output_file output file of evaluation: PATH output_file output file of evaluation: PATH
@ -320,7 +316,7 @@ Parameters for learning rate:
### 训练过程 ### 训练过程
- 在`config.py`中设置选项包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。 - 在`default_config.yaml`中设置选项包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。
- 运行`run_standalone_train.sh`进行Transformer模型的非分布式训练。 - 运行`run_standalone_train.sh`进行Transformer模型的非分布式训练。
@ -338,7 +334,7 @@ Parameters for learning rate:
### 评估过程 ### 评估过程
- 在`eval_config.py`中设置选项。确保已设置了data_file'、'model_file和'output_file'文件路径。 - 在`default_config.yaml`中设置选项。确保已设置了data_file'、'model_file和'output_file'文件路径。
- 运行`eval.py`评估Transformer模型。 - 运行`eval.py`评估Transformer模型。
@ -429,7 +425,7 @@ bash run_infer_310.sh [MINDIR_PATH] [NEED_PREPROCESS] [DEVICE_ID]
- 初始化部分模型权重 - 初始化部分模型权重
- 随机失活运行 - 随机失活运行
train.py已经设置了一些种子避免数据集轮换和权重初始化的随机性。若需关闭随机失活src/config.py中相应的dropout_prob参数设置为0。 train.py已经设置了一些种子避免数据集轮换和权重初始化的随机性。若需关闭随机失活default_config.yaml中相应的dropout_prob参数设置为0。
## ModelZoo主页 ## ModelZoo主页

View File

@ -16,8 +16,8 @@
if [ $# != 5 ] ; then if [ $# != 5 ] ; then
echo "==============================================================================================================" echo "=============================================================================================================="
echo "Please run the script as: " echo "Please run the script as: "
echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH" echo "sh run_distribute_train_ascend.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH"
echo "for example: sh run_distribute_pretrain.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml" echo "for example: sh run_distribute_train_ascend.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml"
echo "It is better to use absolute path." echo "It is better to use absolute path."
echo "==============================================================================================================" echo "=============================================================================================================="
exit 1; exit 1;

View File

@ -64,8 +64,7 @@ do
--checkpoint_path="" \ --checkpoint_path="" \
--save_checkpoint_steps=2500 \ --save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \ --save_checkpoint_num=30 \
--data_path=$DATA_PATH \ --data_path=$DATA_PATH > log.txt 2>&1 &
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
cd ../ cd ../
done done
cd .. cd ..

View File

@ -17,7 +17,7 @@ if [ $# != 4 ] ; then
echo "==============================================================================================================" echo "=============================================================================================================="
echo "Please run the script as: " echo "Please run the script as: "
echo "sh run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE DATA_PATH CONFIG_PATH" echo "sh run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE DATA_PATH CONFIG_PATH"
echo "for example: sh run_distribute_pretrain.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml" echo "for example: sh run_distribute_train_gpu.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml"
echo "It is better to use absolute path." echo "It is better to use absolute path."
echo "==============================================================================================================" echo "=============================================================================================================="
exit 1; exit 1;
@ -47,5 +47,4 @@ mpirun -n $RANK_SIZE \
--checkpoint_path="" \ --checkpoint_path="" \
--save_checkpoint_steps=2500 \ --save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \ --save_checkpoint_num=30 \
--data_path=$DATA_PATH \ --data_path=$DATA_PATH > log.txt 2>&1 &
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &

View File

@ -48,8 +48,7 @@ if [ $DEVICE_TARGET == 'Ascend' ];then
--checkpoint_path="" \ --checkpoint_path="" \
--save_checkpoint_steps=2500 \ --save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \ --save_checkpoint_num=30 \
--data_path=$DATA_PATH \ --data_path=$DATA_PATH > log.txt 2>&1 &
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
elif [ $DEVICE_TARGET == 'GPU' ];then elif [ $DEVICE_TARGET == 'GPU' ];then
export CUDA_VISIBLE_DEVICES="$2" export CUDA_VISIBLE_DEVICES="$2"
@ -64,8 +63,7 @@ elif [ $DEVICE_TARGET == 'GPU' ];then
--checkpoint_path="" \ --checkpoint_path="" \
--save_checkpoint_steps=2500 \ --save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \ --save_checkpoint_num=30 \
--data_path=$DATA_PATH \ --data_path=$DATA_PATH > log.txt 2>&1 &
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
else else
echo "Not supported device target." echo "Not supported device target."
fi fi