!19388 repair maskrcnn export & transformer

Merge pull request !19388 from huchunmei/maskrcnn
This commit is contained in:
i-robot 2021-07-07 02:06:01 +00:00 committed by Gitee
commit 1f2080f860
13 changed files with 34 additions and 72 deletions

View File

@ -19,8 +19,8 @@ ann_file: "./annotations/instances_val2017.json"
modelarts_dataset_unzip_name: 'cocodataset'
need_modelarts_dataset_unzip: True
img_path: '' # "image file path."
result_path: '' # "result file path."
img_path: ''
result_path: ''
# Training options
img_width: 1280
@ -30,10 +30,15 @@ flip_ratio: 0.5
expand_ratio: 1.0
max_instance_count: 128
mask_shape: (28, 28)
mask_shape: [28, 28]
# anchor
feature_shapes: [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)]
feature_shapes:
- [192, 320]
- [96, 160]
- [48, 80]
- [24, 40]
- [12, 20]
anchor_scales: [8]
anchor_ratios: [0.5, 1.0, 2.0]
anchor_strides: [4, 8, 16, 32, 64]
@ -72,7 +77,7 @@ activate_num_classes: 2
use_sigmoid_cls: True
# roi_align
roi_layer: dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
roi_layer: {type: 'RoIAlign', out_size: 7, mask_out_size: 14, sample_num: 2}
roi_align_out_channels: 256
roi_align_featmap_strides: [4, 8, 16, 32]
roi_align_finest_scale: 56
@ -127,7 +132,7 @@ base_lr: 0.02
base_step: 58633
total_epoch: 13
warmup_step: 500
warmup_ratio: 1/3.0
warmup_ratio: 0.333333
sgd_momentum: 0.9
# train
@ -142,11 +147,11 @@ save_checkpoint_epochs: 1
keep_checkpoint_max: 12
save_checkpoint_path: "./"
mindrecord_dir: "./MindRecord_COCO" # "/home/mask_rcnn/MindRecord_COCO2017_Train"
mindrecord_dir: "./MindRecord_COCO"
train_data_type: "train2017"
val_data_type: "val2017"
instance_set: "annotations/instances_{}.json"
coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
@ -160,7 +165,7 @@ coco_classes: ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush')
'teddy bear', 'hair drier', 'toothbrush']
num_classes: 81
only_create_dataset: False
@ -173,7 +178,6 @@ device_num: 1
rank_id: 0
# maskrcnn export
batch_size_export: 2
file_name: "maskrcnn"
file_format: "AIR"
ckpt_file: '/cache/data/cocodataset/ckpt_maskrcnn/mask_rcnn-12_7393.ckpt'

View File

@ -16,7 +16,6 @@
"""Evaluation for MaskRcnn"""
import os
import time
import re
import numpy as np
from src.model_utils.config import config
@ -34,13 +33,6 @@ from mindspore.common import set_seed
set_seed(1)
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
def maskrcnn_eval(dataset_path, ckpt_path, ann_file):
"""MaskRcnn evaluation."""
ds = create_maskrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False)

View File

@ -14,7 +14,6 @@
# ============================================================================
"""export checkpoint file into air, onnx, mindir models"""
import re
import numpy as np
from src.model_utils.config import config
from src.model_utils.device_adapter import get_device_id
@ -23,15 +22,6 @@ from src.maskrcnn.mask_rcnn_r50 import MaskRcnn_Infer
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' ']
config.coco_classes = np.array(train_cls)
config.batch_size = config.batch_size_export
if not config.enable_modelarts:
config.ckpt_file = config.ckpt_file_local
@ -45,6 +35,7 @@ def modelarts_process():
@moxing_wrapper(pre_process=modelarts_process)
def export_maskrcnn():
""" export_maskrcnn """
config.test_batch_size = config.batch_size
net = MaskRcnn_Infer(config=config)
param_dict = load_checkpoint(config.ckpt_file)

View File

@ -17,7 +17,6 @@
from __future__ import division
import os
import re
import numpy as np
from numpy import random
import cv2
@ -29,8 +28,6 @@ from mindspore.mindrecord import FileWriter
from .model_utils.config import config
config.mask_shape = (28, 28)
def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
@ -390,10 +387,7 @@ def create_coco_label(is_training):
data_type = config.train_data_type
# Classes need to train or test.
# train_cls = config.coco_classes
train_cls = [i for i in re.findall(r'[a-zA-Z\s]+', config.coco_classes) if i != ' ']
train_cls = np.array(train_cls)
print(train_cls)
train_cls = config.coco_classes
train_cls_dict = {}
for i, cls in enumerate(train_cls):

View File

@ -114,7 +114,7 @@ class RcnnCls(nn.Cell):
self.train_batch_size = batch_size
self.test_batch_size = cfg.test_batch_size
self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer["out_size"])
self.fpn_cls = FpnCls(self.in_channels, self.rcnn_fc_out_channels, self.num_classes, cfg.roi_layer.out_size)
self.relu = P.ReLU()
self.logicaland = P.LogicalAnd()
self.loss_cls = P.SoftmaxCrossEntropyWithLogits()

View File

@ -88,9 +88,9 @@ class SingleRoIExtractor(nn.Cell):
self.out_channels = out_channels
self.featmap_strides = featmap_strides
self.num_levels = len(self.featmap_strides)
self.out_size = roi_layer['mask_out_size'] if mask else roi_layer['out_size']
self.out_size = config.roi_layer.mask_out_size if mask else config.roi_layer.out_size
self.mask = mask
self.sample_num = roi_layer['sample_num']
self.sample_num = config.roi_layer.sample_num
self.roi_layers = self.build_roi_layers(self.featmap_strides)
self.roi_layers = L.CellList(self.roi_layers)

View File

@ -17,7 +17,6 @@
import time
import os
import re
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
@ -40,12 +39,6 @@ from mindspore.communication.management import get_rank, get_group_size
set_seed(1)
lss = [int(re.findall(r'[0-9]+', i)[0]) for i in config.feature_shapes]
config.feature_shapes = [(lss[2*i], lss[2*i+1]) for i in range(int(len(lss)/2))]
config.roi_layer = dict(type='RoIAlign', out_size=7, mask_out_size=14, sample_num=2)
config.warmup_ratio = 1/3.0
config.mask_shape = (28, 28)
def modelarts_pre_process():
def unzip(zip_file, save_dir):
import zipfile

View File

@ -189,7 +189,6 @@ python eval.py > eval.log 2>&1 &
├─__init__.py
├─beam_search.py
├─dataset.py
├─eval_config.py
├─lr_schedule.py
├─process_output.py
├─tokenization.py
@ -244,15 +243,12 @@ options:
#### Running Options
```text
config.py:
default_config.yaml:
transformer_network version of Transformer model: base | large, default is large
init_loss_scale_value initial value of loss scale: N, default is 2^10
scale_factor factor used to update loss scale: N, default is 2
scale_window steps for once updatation of loss scale: N, default is 2000
optimizer optimizer used in the network: Adam, default is "Adam"
eval_config.py:
transformer_network version of Transformer model: base | large, default is large
data_file data file: PATH
model_file checkpoint file to be loaded: PATH
output_file output file of evaluation: PATH
@ -313,7 +309,7 @@ Parameters for learning rate:
## [Training Process](#contents)
- Set options in `config.py`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset.
- Set options in `default_config.yaml`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html) for more information about dataset.
- Run `run_standalone_train.sh` for non-distributed training of Transformer model.
@ -331,7 +327,7 @@ Parameters for learning rate:
## [Evaluation Process](#contents)
- Set options in `eval_config.py`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path.
- Set options in `default_config.yaml`. Make sure the 'data_file', 'model_file' and 'output_file' are set to your own path.
- Run `eval.py` for evaluation of Transformer model.
@ -422,7 +418,7 @@ There are three random situations:
- Initialization of some model weights.
- Dropout operations.
Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in src/config.py.
Some seeds have already been set in train.py to avoid the randomness of dataset shuffle and weight initialization. If you want to disable dropout, please set the corresponding dropout_prob parameter to 0 in default_config.yaml.
## [ModelZoo Homepage](#contents)

View File

@ -195,7 +195,6 @@ python eval.py > eval.log 2>&1 &
├─__init__.py
├─beam_search.py
├─dataset.py
├─eval_config.py
├─lr_schedule.py
├─process_output.py
├─tokenization.py
@ -250,15 +249,12 @@ options:
#### 运行选项
```text
config.py:
default_config.yaml:
transformer_network version of Transformer model: base | large, default is large
init_loss_scale_value initial value of loss scale: N, default is 2^10
scale_factor factor used to update loss scale: N, default is 2
scale_window steps for once updatation of loss scale: N, default is 2000
optimizer optimizer used in the network: Adam, default is "Adam"
eval_config.py:
transformer_network version of Transformer model: base | large, default is large
data_file data file: PATH
model_file checkpoint file to be loaded: PATH
output_file output file of evaluation: PATH
@ -320,7 +316,7 @@ Parameters for learning rate:
### 训练过程
- 在`config.py`中设置选项包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。
- 在`default_config.yaml`中设置选项包括loss_scale、学习率和网络超参数。点击[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/data_preparation.html)查看更多数据集信息。
- 运行`run_standalone_train.sh`进行Transformer模型的非分布式训练。
@ -338,7 +334,7 @@ Parameters for learning rate:
### 评估过程
- 在`eval_config.py`中设置选项。确保已设置了data_file'、'model_file和'output_file'文件路径。
- 在`default_config.yaml`中设置选项。确保已设置了data_file'、'model_file和'output_file'文件路径。
- 运行`eval.py`评估Transformer模型。
@ -429,7 +425,7 @@ bash run_infer_310.sh [MINDIR_PATH] [NEED_PREPROCESS] [DEVICE_ID]
- 初始化部分模型权重
- 随机失活运行
train.py已经设置了一些种子避免数据集轮换和权重初始化的随机性。若需关闭随机失活src/config.py中相应的dropout_prob参数设置为0。
train.py已经设置了一些种子避免数据集轮换和权重初始化的随机性。若需关闭随机失活default_config.yaml中相应的dropout_prob参数设置为0。
## ModelZoo主页

View File

@ -16,8 +16,8 @@
if [ $# != 5 ] ; then
echo "=============================================================================================================="
echo "Please run the script as: "
echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH"
echo "for example: sh run_distribute_pretrain.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml"
echo "sh run_distribute_train_ascend.sh DEVICE_NUM EPOCH_SIZE DATA_PATH RANK_TABLE_FILE CONFIG_PATH"
echo "for example: sh run_distribute_train_ascend.sh 8 52 /path/ende-l128-mindrecord00 /path/hccl.json ./default_config_large.yaml"
echo "It is better to use absolute path."
echo "=============================================================================================================="
exit 1;

View File

@ -64,8 +64,7 @@ do
--checkpoint_path="" \
--save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \
--data_path=$DATA_PATH \
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
--data_path=$DATA_PATH > log.txt 2>&1 &
cd ../
done
cd ..

View File

@ -17,7 +17,7 @@ if [ $# != 4 ] ; then
echo "=============================================================================================================="
echo "Please run the script as: "
echo "sh run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE DATA_PATH CONFIG_PATH"
echo "for example: sh run_distribute_pretrain.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml"
echo "for example: sh run_distribute_train_gpu.sh 8 55 /path/ende-l128-mindrecord00 ./default_config_large_gpu.yaml"
echo "It is better to use absolute path."
echo "=============================================================================================================="
exit 1;
@ -47,5 +47,4 @@ mpirun -n $RANK_SIZE \
--checkpoint_path="" \
--save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \
--data_path=$DATA_PATH \
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
--data_path=$DATA_PATH > log.txt 2>&1 &

View File

@ -48,8 +48,7 @@ if [ $DEVICE_TARGET == 'Ascend' ];then
--checkpoint_path="" \
--save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \
--data_path=$DATA_PATH \
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
--data_path=$DATA_PATH > log.txt 2>&1 &
elif [ $DEVICE_TARGET == 'GPU' ];then
export CUDA_VISIBLE_DEVICES="$2"
@ -64,8 +63,7 @@ elif [ $DEVICE_TARGET == 'GPU' ];then
--checkpoint_path="" \
--save_checkpoint_steps=2500 \
--save_checkpoint_num=30 \
--data_path=$DATA_PATH \
--bucket_boundaries=[16,32,48,64,128] > log.txt 2>&1 &
--data_path=$DATA_PATH > log.txt 2>&1 &
else
echo "Not supported device target."
fi