!18458 Faster_RCNN-Resnet(With Resnet50,101,152 backbone)
Merge pull request !18458 from 关文聪/master
This commit is contained in:
commit
c8037bac0e
|
@ -70,7 +70,7 @@ Dataset used: [COCO2017](<https://cocodataset.org/>)
|
|||
pip install mmcv==0.2.14
|
||||
```
|
||||
|
||||
And change the COCO_ROOT and other settings you need in `config.py`. The directory structure is as follows:
|
||||
And change the COCO_ROOT and other settings you need in `config_50.yaml、config_101.yaml or config_152.yaml`. The directory structure is as follows:
|
||||
|
||||
```path
|
||||
.
|
||||
|
@ -90,7 +90,7 @@ Dataset used: [COCO2017](<https://cocodataset.org/>)
|
|||
train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
|
||||
```
|
||||
|
||||
Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class information of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config.py`.
|
||||
Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class information of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config_50.yaml、config_101.yaml or config_152.yaml`.
|
||||
|
||||
# Quick Start
|
||||
|
||||
|
@ -110,13 +110,13 @@ Note:
|
|||
python convert_checkpoint.py --ckpt_file=[BACKBONE_MODEL]
|
||||
|
||||
# standalone training
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# distributed training
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# eval
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
|
||||
# inference
|
||||
sh run_infer_310.sh [AIR_PATH] [DATA_PATH] [ANN_FILE_PATH]
|
||||
|
@ -130,13 +130,13 @@ sh run_infer_310.sh [AIR_PATH] [DATA_PATH] [ANN_FILE_PATH]
|
|||
python convert_checkpoint.py --ckpt_file=[BACKBONE_MODEL]
|
||||
|
||||
# standalone training
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# distributed training
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# eval
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
|
||||
```
|
||||
|
||||
|
@ -160,17 +160,17 @@ bash scripts/docker_start.sh fasterrcnn:20.1.0 [DATA_DIR] [MODEL_DIR]
|
|||
|
||||
```shell
|
||||
# standalone training
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# distributed training
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] [BACKBONE]
|
||||
```
|
||||
|
||||
4. Eval
|
||||
|
||||
```shell
|
||||
# eval
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
5. Inference
|
||||
|
@ -203,14 +203,19 @@ sh run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANN_FILE] [DEVICE_ID]
|
|||
├─anchor_generator.py // anchor generator
|
||||
├─bbox_assign_sample.py // first stage sampler
|
||||
├─bbox_assign_sample_stage2.py // second stage sampler
|
||||
├─faster_rcnn_r50.py // fasterrcnn network
|
||||
├─faster_rcnn_resnet.py // fasterrcnn network
|
||||
├─faster_rcnn_resnet50v1.py //fasterrcnn network for ResNet50v1.0
|
||||
├─fpn_neck.py //feature pyramid network
|
||||
├─proposal_generator.py // proposal generator
|
||||
├─rcnn.py // rcnn network
|
||||
├─resnet50.py // backbone network
|
||||
├─resnet.py // backbone network
|
||||
├─resnet50v1.py // backbone network for ResNet50v1.0
|
||||
├─roi_align.py // roi align network
|
||||
└─rpn.py // region proposal network
|
||||
├─config.py // total config
|
||||
├─config.py // config for yaml parsing
|
||||
├─config_50.yaml // config for ResNet50
|
||||
├─config_101.yaml // config for ResNet101
|
||||
├─config_152.yaml // config for ResNet152
|
||||
├─dataset.py // create dataset and process dataset
|
||||
├─lr_schedule.py // learning ratio generator
|
||||
├─network_define.py // network define for fasterrcnn
|
||||
|
@ -239,10 +244,10 @@ sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
|||
|
||||
```shell
|
||||
# standalone training on gpu
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# distributed training on gpu
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL] [BACKBONE]
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
@ -274,7 +279,7 @@ Notes:
|
|||
load_param_into_net(net, param_dict)
|
||||
```
|
||||
|
||||
3. The original dataset path needs to be in the config.py,you can select "coco_root" or "image_dir".
|
||||
3. The original dataset path needs to be in the config_50.yaml、config_101.yaml、config_152.yaml,you can select "coco_root" or "image_dir".
|
||||
|
||||
### Result
|
||||
|
||||
|
@ -299,14 +304,14 @@ epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.005
|
|||
|
||||
```shell
|
||||
# eval on ascend
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
#### on GPU
|
||||
|
||||
```shell
|
||||
# eval on GPU
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
> checkpoint can be produced in training process.
|
||||
|
@ -335,7 +340,7 @@ Eval result will be stored in the example path, whose folder name is "eval". Und
|
|||
## Model Export
|
||||
|
||||
```shell
|
||||
python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_format[EXPORT_FORMAT]
|
||||
python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_format[EXPORT_FORMAT] --backbone [BACKBONE]
|
||||
```
|
||||
|
||||
`EXPORT_FORMAT` should be in ["AIR", "MINDIR"]
|
||||
|
|
|
@ -71,7 +71,7 @@ Faster R-CNN是一个两阶段目标检测网络,该网络采用RPN,可以
|
|||
pip install mmcv==0.2.14
|
||||
```
|
||||
|
||||
在`config.py`中更改COCO_ROOT和其他您需要的设置。目录结构如下:
|
||||
根据模型运行需要,对应地在`config_50.yaml、config_101.yaml或config_152.yaml`中更改COCO_ROOT和其他需要的设置。目录结构如下:
|
||||
|
||||
```path
|
||||
.
|
||||
|
@ -91,7 +91,7 @@ Faster R-CNN是一个两阶段目标检测网络,该网络采用RPN,可以
|
|||
train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
|
||||
```
|
||||
|
||||
每行是按空间分割的图像标注,第一列是图像的相对路径,其余为[xmin,ymin,xmax,ymax,class]格式的框和类信息。从`IMAGE_DIR`(数据集目录)图像路径以及`ANNO_PATH`(TXT文件路径)的相对路径中读取图像。`IMAGE_DIR`和`ANNO_PATH`可在`config.py`中设置。
|
||||
每行是按空间分割的图像标注,第一列是图像的相对路径,其余为[xmin,ymin,xmax,ymax,class]格式的框和类信息。从`IMAGE_DIR`(数据集目录)图像路径以及`ANNO_PATH`(TXT文件路径)的相对路径中读取图像。`IMAGE_DIR`和`ANNO_PATH`可在`config_50.yaml、config_101.yaml或config_152.yaml`中设置。
|
||||
|
||||
# 快速入门
|
||||
|
||||
|
@ -111,13 +111,13 @@ Faster R-CNN是一个两阶段目标检测网络,该网络采用RPN,可以
|
|||
python convert_checkpoint.py --ckpt_file=[BACKBONE_MODEL]
|
||||
|
||||
# 单机训练
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# 分布式训练
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# 评估
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
|
||||
#推理
|
||||
sh run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANN_FILE] [DEVICE_ID]
|
||||
|
@ -131,13 +131,13 @@ sh run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANN_FILE] [DEVICE_ID]
|
|||
python convert_checkpoint.py --ckpt_file=[BACKBONE_MODEL]
|
||||
|
||||
# 单机训练
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# 分布式训练
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# 评估
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
|
||||
```
|
||||
|
||||
|
@ -161,17 +161,17 @@ bash scripts/docker_start.sh fasterrcnn:20.1.0 [DATA_DIR] [MODEL_DIR]
|
|||
|
||||
```shell
|
||||
# 单机训练
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# 分布式训练
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] [BACKBONE]
|
||||
```
|
||||
|
||||
4. 评估
|
||||
|
||||
```shell
|
||||
# 评估
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
5. 推理
|
||||
|
@ -204,14 +204,19 @@ sh run_infer_310.sh [AIR_PATH] [DATA_PATH] [ANN_FILE_PATH] [DEVICE_ID]
|
|||
├─anchor_generator.py // 锚点生成器
|
||||
├─bbox_assign_sample.py // 第一阶段采样器
|
||||
├─bbox_assign_sample_stage2.py // 第二阶段采样器
|
||||
├─faster_rcnn_r50.py // Faster R-CNN网络
|
||||
├─faster_rcnn_resnet.py // Faster R-CNN网络
|
||||
├─faster_rcnn_resnet50v1.py //以Resnet50v1.0作为backbone的Faster R-CNN网络
|
||||
├─fpn_neck.py // 特征金字塔网络
|
||||
├─proposal_generator.py // 候选生成器
|
||||
├─rcnn.py // R-CNN网络
|
||||
├─resnet50.py // 骨干网络
|
||||
├─resnet.py // 骨干网络
|
||||
├─resnet50v1.py // Resnet50v1.0骨干网络
|
||||
├─roi_align.py // ROI对齐网络
|
||||
└─rpn.py // 区域候选网络
|
||||
├─config.py // 总配置
|
||||
├─config.py // 读取yaml配置的config类
|
||||
├─config_50.yaml // Resnet50相关配置
|
||||
├─config_101.yaml // Resnet101相关配置
|
||||
├─config_152.yaml // Resnet152相关配置
|
||||
├─dataset.py // 创建并处理数据集
|
||||
├─lr_schedule.py // 学习率生成器
|
||||
├─network_define.py // Faster R-CNN网络定义
|
||||
|
@ -230,20 +235,20 @@ sh run_infer_310.sh [AIR_PATH] [DATA_PATH] [ANN_FILE_PATH] [DEVICE_ID]
|
|||
|
||||
```shell
|
||||
# Ascend单机训练
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_ascend.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# Ascend分布式训练
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] [BACKBONE]
|
||||
```
|
||||
|
||||
#### 在GPU上运行
|
||||
|
||||
```shell
|
||||
# GPU单机训练
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL]
|
||||
sh run_standalone_train_gpu.sh [PRETRAINED_MODEL] [BACKBONE]
|
||||
|
||||
# GPU分布式训练
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL]
|
||||
sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_MODEL] [BACKBONE]
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
@ -275,7 +280,7 @@ Notes:
|
|||
load_param_into_net(net, param_dict)
|
||||
```
|
||||
|
||||
3. config.py中包含原数据集路径,可以选择“coco_root”或“image_dir”。
|
||||
3. config_50.yaml、config_101.yaml、config_152.yaml中包含原数据集路径,可以选择“coco_root”或“image_dir”。
|
||||
|
||||
### 结果
|
||||
|
||||
|
@ -300,14 +305,14 @@ epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.005
|
|||
|
||||
```shell
|
||||
# Ascend评估
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
#### 在GPU上运行
|
||||
|
||||
```shell
|
||||
# GPU评估
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
||||
sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]
|
||||
```
|
||||
|
||||
> 在训练过程中生成检查点。
|
||||
|
@ -336,7 +341,7 @@ sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
|
|||
## 模型导出
|
||||
|
||||
```shell
|
||||
python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_format[EXPORT_FORMAT]
|
||||
python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_format[EXPORT_FORMAT] --backbone [BACKBONE]
|
||||
```
|
||||
|
||||
`EXPORT_FORMAT` 可选 ["AIR", "MINDIR"]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -24,10 +24,9 @@ from mindspore import context
|
|||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
from mindspore.common import set_seed, Parameter
|
||||
|
||||
from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50
|
||||
from src.config import config
|
||||
from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset
|
||||
from src.util import coco_eval, bbox2result_1image, results2json
|
||||
import src.config as cfg
|
||||
|
||||
set_seed(1)
|
||||
|
||||
|
@ -38,14 +37,29 @@ parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoi
|
|||
parser.add_argument("--device_target", type=str, default="Ascend",
|
||||
help="device where the code will be implemented, default is Ascend")
|
||||
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
|
||||
parser.add_argument("--backbone", type=str, required=True, \
|
||||
help="backbone network name, options:resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152")
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=args_opt.device_id)
|
||||
|
||||
if args_opt.backbone in ("resnet_v1.5_50", "resnet_v1_101", "resnet_v1_152"):
|
||||
from src.FasterRcnn.faster_rcnn_resnet import Faster_Rcnn_Resnet
|
||||
if args_opt.backbone == "resnet_v1.5_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_101":
|
||||
config = cfg.get_config("./src/config_101.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_152":
|
||||
config = cfg.get_config("./src/config_152.yaml")
|
||||
|
||||
elif args_opt.backbone == "resnet_v1_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
from src.FasterRcnn.faster_rcnn_resnet50v1 import Faster_Rcnn_Resnet
|
||||
|
||||
def fasterrcnn_eval(dataset_path, ckpt_path, ann_file):
|
||||
"""FasterRcnn evaluation."""
|
||||
ds = create_fasterrcnn_dataset(dataset_path, batch_size=config.test_batch_size, is_training=False)
|
||||
net = Faster_Rcnn_Resnet50(config)
|
||||
ds = create_fasterrcnn_dataset(config, dataset_path, batch_size=config.test_batch_size, is_training=False)
|
||||
net = Faster_Rcnn_Resnet(config)
|
||||
param_dict = load_checkpoint(ckpt_path)
|
||||
if args_opt.device_target == "GPU":
|
||||
for key, value in param_dict.items():
|
||||
|
@ -123,14 +137,14 @@ if __name__ == '__main__':
|
|||
if args_opt.dataset == "coco":
|
||||
if os.path.isdir(config.coco_root):
|
||||
print("Create Mindrecord. It may take some time.")
|
||||
data_to_mindrecord_byte_image("coco", False, prefix, file_num=1)
|
||||
data_to_mindrecord_byte_image(config, "coco", False, prefix, file_num=1)
|
||||
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
|
||||
else:
|
||||
print("coco_root not exits.")
|
||||
else:
|
||||
if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH):
|
||||
print("Create Mindrecord. It may take some time.")
|
||||
data_to_mindrecord_byte_image("other", False, prefix, file_num=1)
|
||||
data_to_mindrecord_byte_image(config, "other", False, prefix, file_num=1)
|
||||
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
|
||||
else:
|
||||
print("IMAGE_DIR or ANNO_PATH not exits.")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -16,12 +16,9 @@
|
|||
import argparse
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore import Tensor, load_checkpoint, load_param_into_net, export, context
|
||||
|
||||
from src.FasterRcnn.faster_rcnn_r50 import FasterRcnn_Infer
|
||||
from src.config import config
|
||||
import src.config as cfg
|
||||
|
||||
parser = argparse.ArgumentParser(description='fasterrcnn_export')
|
||||
parser.add_argument("--device_id", type=int, default=0, help="Device id")
|
||||
|
@ -30,12 +27,27 @@ parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"]
|
|||
parser.add_argument("--device_target", type=str, choices=["Ascend", "GPU", "CPU"], default="Ascend",
|
||||
help="device target")
|
||||
parser.add_argument('--ckpt_file', type=str, default='', help='fasterrcnn ckpt file.')
|
||||
parser.add_argument("--backbone", type=str, required=True, \
|
||||
help="backbone network name, options:resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152")
|
||||
args = parser.parse_args()
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
|
||||
if args.device_target == "Ascend":
|
||||
context.set_context(device_id=args.device_id)
|
||||
|
||||
if args.backbone in ("resnet_v1.5_50", "resnet_v1_101", "resnet_v1_152"):
|
||||
from src.FasterRcnn.faster_rcnn_resnet import FasterRcnn_Infer
|
||||
if args.backbone == "resnet_v1.5_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
elif args.backbone == "resnet_v1_101":
|
||||
config = cfg.get_config("./src/config_101.yaml")
|
||||
elif args.backbone == "resnet_v1_152":
|
||||
config = cfg.get_config("./src/config_152.yaml")
|
||||
|
||||
elif args.backbone == "resnet_v1_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
from src.FasterRcnn.faster_rcnn_resnet50v1 import FasterRcnn_Infer
|
||||
|
||||
if __name__ == '__main__':
|
||||
net = FasterRcnn_Infer(config=config)
|
||||
|
||||
|
@ -51,7 +63,7 @@ if __name__ == '__main__':
|
|||
if device_type == "Ascend":
|
||||
net.to_float(mstype.float16)
|
||||
|
||||
img = Tensor(np.zeros([config.test_batch_size, 3, config.img_height, config.img_width]), ms.float32)
|
||||
img_metas = Tensor(np.random.uniform(0.0, 1.0, size=[config.test_batch_size, 4]), ms.float32)
|
||||
img = Tensor(np.zeros([config.test_batch_size, 3, config.img_height, config.img_width]), mstype.float32)
|
||||
img_metas = Tensor(np.random.uniform(0.0, 1.0, size=[config.test_batch_size, 4]), mstype.float32)
|
||||
|
||||
export(net, img, img_metas, file_name=args.file_name, file_format=args.file_format)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -13,10 +13,28 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""hub config."""
|
||||
from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50
|
||||
from src.config import config
|
||||
import argparse
|
||||
import src.config as cfg
|
||||
|
||||
parser = argparse.ArgumentParser(description="FasterRcnn")
|
||||
parser.add_argument("--backbone", type=str, required=True, \
|
||||
help="backbone network name, options:resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152")
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
if args_opt.backbone in ("resnet_v1.5_50", "resnet_v1_101", "resnet_v1_152"):
|
||||
from src.FasterRcnn.faster_rcnn_resnet import Faster_Rcnn_Resnet
|
||||
if args_opt.backbone == "resnet_v1.5_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_101":
|
||||
config = cfg.get_config("./src/config_101.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_152":
|
||||
config = cfg.get_config("./src/config_152.yaml")
|
||||
|
||||
elif args_opt.backbone == "resnet_v1_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
from src.FasterRcnn.faster_rcnn_resnet50v1 import Faster_Rcnn_Resnet
|
||||
|
||||
def create_network(name, *args, **kwargs):
|
||||
if name == "faster_rcnn":
|
||||
return Faster_Rcnn_Resnet50(config=config)
|
||||
return Faster_Rcnn_Resnet(config=config)
|
||||
raise NotImplementedError(f"{name} is not implemented in the repo")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -18,8 +18,8 @@ import argparse
|
|||
import numpy as np
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
from src.config import config
|
||||
from src.util import coco_eval, bbox2result_1image, results2json
|
||||
import src.config as cfg
|
||||
|
||||
dst_width = 1280
|
||||
dst_height = 768
|
||||
|
@ -28,6 +28,7 @@ parser = argparse.ArgumentParser(description="FasterRcnn inference")
|
|||
parser.add_argument("--ann_file", type=str, required=True, help="ann file.")
|
||||
parser.add_argument("--result_path", type=str, required=True, help="result file path.")
|
||||
args = parser.parse_args()
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
|
||||
def get_eval_result(ann_file, result_path):
|
||||
""" get evaluation result of faster rcnn"""
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,9 +14,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -ne 2 ]
|
||||
if [ $# -ne 3 ]
|
||||
then
|
||||
echo "Usage: sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_PATH]"
|
||||
echo "Usage: sh run_distribute_train_ascend.sh [RANK_TABLE_FILE] [PRETRAINED_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $3 != "resnet_v1_50" ] && [ $3 != "resnet_v1.5_50" ] && [ $3 != "resnet_v1_101" ] && [ $3 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -63,6 +69,6 @@ do
|
|||
cd ./train_parallel$i || exit
|
||||
echo "start training for rank $RANK_ID, device $DEVICE_ID"
|
||||
env > env.log
|
||||
python train.py --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM --pre_trained=$PATH2 &> log &
|
||||
python train.py --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM --pre_trained=$PATH2 --backbone=$3 &> log &
|
||||
cd ..
|
||||
done
|
||||
done
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -16,14 +16,20 @@
|
|||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the script as: "
|
||||
echo "sh run_distribute_train_gpu.sh DEVICE_NUM PRETRAINED_PATH"
|
||||
echo "for example: sh run_distribute_train_gpu.sh 8 /path/pretrain.ckpt"
|
||||
echo "sh run_distribute_train_gpu.sh DEVICE_NUM PRETRAINED_PATH BACKBONE"
|
||||
echo "for example: sh run_distribute_train_gpu.sh 8 /path/pretrain.ckpt resnet_v1_50"
|
||||
echo "It is better to use absolute path."
|
||||
echo "=============================================================================================================="
|
||||
|
||||
if [ $# != 2 ]
|
||||
if [ $# != 3 ]
|
||||
then
|
||||
echo "Usage: sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_PATH]"
|
||||
echo "Usage: sh run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $3 != "resnet_v1_50" ] && [ $3 != "resnet_v1.5_50" ] && [ $3 != "resnet_v1_101" ] && [ $3 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -41,4 +47,5 @@ mpirun -n $RANK_SIZE \
|
|||
--run_distribute=True \
|
||||
--device_target="GPU" \
|
||||
--device_num=$RANK_SIZE \
|
||||
--pre_trained=$PRETRAINED_PATH > log 2>&1 &
|
||||
--pre_trained=$PRETRAINED_PATH \
|
||||
--backbone=$3 > log 2>&1 &
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,9 +14,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 2 ]
|
||||
if [ $# != 3 ]
|
||||
then
|
||||
echo "Usage: sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]"
|
||||
echo "Usage: sh run_eval_ascend.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $3 != "resnet_v1_50" ] && [ $3 != "resnet_v1.5_50" ] && [ $3 != "resnet_v1_101" ] && [ $3 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -61,5 +67,5 @@ cp -r ../src ./eval
|
|||
cd ./eval || exit
|
||||
env > env.log
|
||||
echo "start eval for device $DEVICE_ID"
|
||||
python eval.py --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 &> log &
|
||||
cd ..
|
||||
python eval.py --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 --backbone=$3 &> log &
|
||||
cd ..
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,9 +14,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 2 ]
|
||||
if [ $# != 3 ]
|
||||
then
|
||||
echo "Usage: sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]"
|
||||
echo "Usage: sh run_eval_gpu.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $3 != "resnet_v1_50" ] && [ $3 != "resnet_v1.5_50" ] && [ $3 != "resnet_v1_101" ] && [ $3 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -42,7 +48,7 @@ if [ ! -f $PATH2 ]
|
|||
then
|
||||
echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
export DEVICE_NUM=1
|
||||
export RANK_SIZE=$DEVICE_NUM
|
||||
|
@ -60,5 +66,5 @@ cp -r ../src ./eval
|
|||
cd ./eval || exit
|
||||
env > env.log
|
||||
echo "start eval for device $DEVICE_ID"
|
||||
python eval.py --device_target="GPU" --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 &> log &
|
||||
python eval.py --device_target="GPU" --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 --backbone=$3 &> log &
|
||||
cd ..
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,9 +14,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -ne 1 ]
|
||||
if [ $# -ne 2 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train_ascend.sh [PRETRAINED_PATH]"
|
||||
echo "Usage: sh run_standalone_train_ascend.sh [PRETRAINED_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $2 != "resnet_v1_50" ] && [ $2 != "resnet_v1.5_50" ] && [ $2 != "resnet_v1_101" ] && [ $2 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -54,5 +60,5 @@ cp -r ../src ./train
|
|||
cd ./train || exit
|
||||
echo "start training for device $DEVICE_ID"
|
||||
env > env.log
|
||||
python train.py --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log &
|
||||
cd ..
|
||||
python train.py --device_id=$DEVICE_ID --pre_trained=$PATH1 --backbone=$2 &> log &
|
||||
cd ..
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,9 +14,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -ne 1 ]
|
||||
if [ $# -ne 2 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train_gpu.sh [PRETRAINED_PATH]"
|
||||
echo "Usage: sh run_standalone_train_gpu.sh [PRETRAINED_PATH] [BACKBONE]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $2 != "resnet_v1_50" ] && [ $2 != "resnet_v1.5_50" ] && [ $2 != "resnet_v1_101" ] && [ $2 != "resnet_v1_152" ]
|
||||
then
|
||||
echo "error: the selected backbone must be resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -54,5 +60,5 @@ cp -r ../src ./train
|
|||
cd ./train || exit
|
||||
echo "start training for device $DEVICE_ID"
|
||||
env > env.log
|
||||
python train.py --device_id=$DEVICE_ID --pre_trained=$PATH1 --device_target="GPU" &> log &
|
||||
python train.py --device_id=$DEVICE_ID --pre_trained=$PATH1 --device_target="GPU" --backbone=$2 &> log &
|
||||
cd ..
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -14,7 +14,8 @@
|
|||
# ============================================================================
|
||||
"""FasterRcnn Init."""
|
||||
|
||||
from .resnet50 import ResNetFea, ResidualBlockUsing
|
||||
from .resnet import ResNetFea, ResidualBlockUsing
|
||||
from .resnet50v1 import ResidualBlockUsing_V1
|
||||
from .bbox_assign_sample import BboxAssignSample
|
||||
from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
|
||||
from .fpn_neck import FeatPyramidNeck
|
||||
|
@ -27,5 +28,5 @@ from .anchor_generator import AnchorGenerator
|
|||
__all__ = [
|
||||
"ResNetFea", "BboxAssignSample", "BboxAssignSampleForRcnn",
|
||||
"FeatPyramidNeck", "Proposal", "Rcnn",
|
||||
"RPN", "SingleRoIExtractor", "AnchorGenerator", "ResidualBlockUsing"
|
||||
"RPN", "SingleRoIExtractor", "AnchorGenerator", "ResidualBlockUsing", "ResidualBlockUsing_V1"
|
||||
]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""FasterRcnn based on ResNet50."""
|
||||
"""FasterRcnn based on ResNet."""
|
||||
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
|
@ -21,7 +21,7 @@ from mindspore.ops import operations as P
|
|||
from mindspore.common.tensor import Tensor
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore.ops import functional as F
|
||||
from .resnet50 import ResNetFea, ResidualBlockUsing
|
||||
from .resnet import ResNetFea, ResidualBlockUsing
|
||||
from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
|
||||
from .fpn_neck import FeatPyramidNeck
|
||||
from .proposal_generator import Proposal
|
||||
|
@ -31,12 +31,12 @@ from .roi_align import SingleRoIExtractor
|
|||
from .anchor_generator import AnchorGenerator
|
||||
|
||||
|
||||
class Faster_Rcnn_Resnet50(nn.Cell):
|
||||
class Faster_Rcnn_Resnet(nn.Cell):
|
||||
"""
|
||||
FasterRcnn Network.
|
||||
|
||||
Note:
|
||||
backbone = resnet50
|
||||
backbone = resnet
|
||||
|
||||
Returns:
|
||||
Tuple, tuple of output tensor.
|
||||
|
@ -48,10 +48,10 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
rcnn_reg_loss: Scalar, Regression loss of RCNN subnet.
|
||||
|
||||
Examples:
|
||||
net = Faster_Rcnn_Resnet50()
|
||||
net = Faster_Rcnn_Resnet()
|
||||
"""
|
||||
def __init__(self, config):
|
||||
super(Faster_Rcnn_Resnet50, self).__init__()
|
||||
super(Faster_Rcnn_Resnet, self).__init__()
|
||||
self.dtype = np.float32
|
||||
self.ms_type = mstype.float32
|
||||
self.train_batch_size = config.batch_size
|
||||
|
@ -79,7 +79,7 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
|
||||
self.anchor_list = self.get_anchors(featmap_sizes)
|
||||
|
||||
# Backbone resnet50
|
||||
# Backbone resnet
|
||||
self.backbone = ResNetFea(ResidualBlockUsing,
|
||||
config.resnet_block,
|
||||
config.resnet_in_channels,
|
||||
|
@ -121,7 +121,7 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
self.roi_init(config)
|
||||
|
||||
# Rcnn
|
||||
self.rcnn = Rcnn(config, config.rcnn_in_channels * config.roi_layer['out_size'] * config.roi_layer['out_size'],
|
||||
self.rcnn = Rcnn(config, config.rcnn_in_channels * config.roi_layer.out_size * config.roi_layer.out_size,
|
||||
self.train_batch_size, self.num_classes)
|
||||
|
||||
# Op declare
|
||||
|
@ -148,6 +148,19 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
self.device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others"
|
||||
|
||||
def roi_init(self, config):
|
||||
"""
|
||||
Initialize roi from the config file
|
||||
|
||||
Args:
|
||||
config (file): config file.
|
||||
roi_layer (dict): Numbers of block in different layers.
|
||||
roi_align_out_channels (int): Out channel in each layer.
|
||||
config.roi_align_featmap_strides (list): featmap_strides in each layer.
|
||||
roi_align_finest_scale (int): finest_scale in roi.
|
||||
|
||||
Examples:
|
||||
self.roi_init(config)
|
||||
"""
|
||||
self.roi_align = SingleRoIExtractor(config,
|
||||
config.roi_layer,
|
||||
config.roi_align_out_channels,
|
||||
|
@ -164,6 +177,19 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
self.roi_align_test.set_train_local(config, False)
|
||||
|
||||
def test_mode_init(self, config):
|
||||
"""
|
||||
Initialize test_mode from the config file.
|
||||
|
||||
Args:
|
||||
config (file): config file.
|
||||
test_batch_size (int): Size of test batch.
|
||||
rpn_max_num (int): max num of rpn.
|
||||
test_score_thresh (float): threshold of test score.
|
||||
test_iou_thr (float): threshold of test iou.
|
||||
|
||||
Examples:
|
||||
self.test_mode_init(config)
|
||||
"""
|
||||
self.test_batch_size = config.test_batch_size
|
||||
self.split = P.Split(axis=0, output_num=self.test_batch_size)
|
||||
self.split_shape = P.Split(axis=0, output_num=4)
|
||||
|
@ -195,6 +221,7 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
self.test_num_proposal = self.test_batch_size * self.rpn_max_num
|
||||
|
||||
def init_tensor(self, config):
|
||||
|
||||
roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i,
|
||||
dtype=self.dtype) for i in range(self.train_batch_size)]
|
||||
|
||||
|
@ -205,6 +232,19 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test))
|
||||
|
||||
def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids):
|
||||
"""
|
||||
construct the FasterRcnn Network.
|
||||
|
||||
Args:
|
||||
img_data: input image data.
|
||||
img_metas: meta label of img.
|
||||
gt_bboxes (Tensor): get the value of bboxes.
|
||||
gt_labels (Tensor): get the value of labels.
|
||||
gt_valids (Tensor): get the valid part of bboxes.
|
||||
|
||||
Returns:
|
||||
Tuple,tuple of output tensor
|
||||
"""
|
||||
x = self.backbone(img_data)
|
||||
x = self.fpn_ncek(x)
|
||||
|
||||
|
@ -440,7 +480,7 @@ class Faster_Rcnn_Resnet50(nn.Cell):
|
|||
class FasterRcnn_Infer(nn.Cell):
|
||||
def __init__(self, config):
|
||||
super(FasterRcnn_Infer, self).__init__()
|
||||
self.network = Faster_Rcnn_Resnet50(config)
|
||||
self.network = Faster_Rcnn_Resnet(config)
|
||||
self.network.set_train(False)
|
||||
|
||||
def construct(self, img_data, img_metas):
|
|
@ -0,0 +1,488 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""FasterRcnn based on ResNet50v1.0."""
|
||||
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore import context
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.common.tensor import Tensor
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore.ops import functional as F
|
||||
from .resnet import ResNetFea
|
||||
from .resnet50v1 import ResidualBlockUsing_V1
|
||||
from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
|
||||
from .fpn_neck import FeatPyramidNeck
|
||||
from .proposal_generator import Proposal
|
||||
from .rcnn import Rcnn
|
||||
from .rpn import RPN
|
||||
from .roi_align import SingleRoIExtractor
|
||||
from .anchor_generator import AnchorGenerator
|
||||
|
||||
|
||||
class Faster_Rcnn_Resnet(nn.Cell):
|
||||
"""
|
||||
FasterRcnn Network.
|
||||
|
||||
Note:
|
||||
backbone = resnet
|
||||
|
||||
Returns:
|
||||
Tuple, tuple of output tensor.
|
||||
rpn_loss: Scalar, Total loss of RPN subnet.
|
||||
rcnn_loss: Scalar, Total loss of RCNN subnet.
|
||||
rpn_cls_loss: Scalar, Classification loss of RPN subnet.
|
||||
rpn_reg_loss: Scalar, Regression loss of RPN subnet.
|
||||
rcnn_cls_loss: Scalar, Classification loss of RCNN subnet.
|
||||
rcnn_reg_loss: Scalar, Regression loss of RCNN subnet.
|
||||
|
||||
Examples:
|
||||
net = Faster_Rcnn_Resnet()
|
||||
"""
|
||||
def __init__(self, config):
|
||||
super(Faster_Rcnn_Resnet, self).__init__()
|
||||
self.dtype = np.float32
|
||||
self.ms_type = mstype.float32
|
||||
self.train_batch_size = config.batch_size
|
||||
self.num_classes = config.num_classes
|
||||
self.anchor_scales = config.anchor_scales
|
||||
self.anchor_ratios = config.anchor_ratios
|
||||
self.anchor_strides = config.anchor_strides
|
||||
self.target_means = tuple(config.rcnn_target_means)
|
||||
self.target_stds = tuple(config.rcnn_target_stds)
|
||||
|
||||
# Anchor generator
|
||||
anchor_base_sizes = None
|
||||
self.anchor_base_sizes = list(
|
||||
self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
|
||||
|
||||
self.anchor_generators = []
|
||||
for anchor_base in self.anchor_base_sizes:
|
||||
self.anchor_generators.append(
|
||||
AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios))
|
||||
|
||||
self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
|
||||
|
||||
featmap_sizes = config.feature_shapes
|
||||
assert len(featmap_sizes) == len(self.anchor_generators)
|
||||
|
||||
self.anchor_list = self.get_anchors(featmap_sizes)
|
||||
|
||||
# Backbone resnet
|
||||
self.backbone = ResNetFea(ResidualBlockUsing_V1,
|
||||
config.resnet_block,
|
||||
config.resnet_in_channels,
|
||||
config.resnet_out_channels,
|
||||
False)
|
||||
|
||||
# Fpn
|
||||
self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels,
|
||||
config.fpn_out_channels,
|
||||
config.fpn_num_outs)
|
||||
|
||||
# Rpn and rpn loss
|
||||
self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8))
|
||||
self.rpn_with_loss = RPN(config,
|
||||
self.train_batch_size,
|
||||
config.rpn_in_channels,
|
||||
config.rpn_feat_channels,
|
||||
config.num_anchors,
|
||||
config.rpn_cls_out_channels)
|
||||
|
||||
# Proposal
|
||||
self.proposal_generator = Proposal(config,
|
||||
self.train_batch_size,
|
||||
config.activate_num_classes,
|
||||
config.use_sigmoid_cls)
|
||||
self.proposal_generator.set_train_local(config, True)
|
||||
self.proposal_generator_test = Proposal(config,
|
||||
config.test_batch_size,
|
||||
config.activate_num_classes,
|
||||
config.use_sigmoid_cls)
|
||||
self.proposal_generator_test.set_train_local(config, False)
|
||||
|
||||
# Assign and sampler stage two
|
||||
self.bbox_assigner_sampler_for_rcnn = BboxAssignSampleForRcnn(config, self.train_batch_size,
|
||||
config.num_bboxes_stage2, True)
|
||||
self.decode = P.BoundingBoxDecode(max_shape=(config.img_height, config.img_width), means=self.target_means, \
|
||||
stds=self.target_stds)
|
||||
# Roi
|
||||
self.roi_init(config)
|
||||
|
||||
# Rcnn
|
||||
self.rcnn = Rcnn(config, config.rcnn_in_channels * config.roi_layer.out_size * config.roi_layer.out_size,
|
||||
self.train_batch_size, self.num_classes)
|
||||
|
||||
# Op declare
|
||||
self.squeeze = P.Squeeze()
|
||||
self.cast = P.Cast()
|
||||
|
||||
self.concat = P.Concat(axis=0)
|
||||
self.concat_1 = P.Concat(axis=1)
|
||||
self.concat_2 = P.Concat(axis=2)
|
||||
self.reshape = P.Reshape()
|
||||
self.select = P.Select()
|
||||
self.greater = P.Greater()
|
||||
self.transpose = P.Transpose()
|
||||
|
||||
# Improve speed
|
||||
self.concat_start = min(self.num_classes - 2, 55)
|
||||
self.concat_end = (self.num_classes - 1)
|
||||
|
||||
# Test mode
|
||||
self.test_mode_init(config)
|
||||
|
||||
# Init tensor
|
||||
self.init_tensor(config)
|
||||
self.device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others"
|
||||
|
||||
def roi_init(self, config):
|
||||
"""
|
||||
Initialize roi from the config file
|
||||
|
||||
Args:
|
||||
config (file): config file.
|
||||
roi_layer (dict): Numbers of block in different layers.
|
||||
roi_align_out_channels (int): Out channel in each layer.
|
||||
config.roi_align_featmap_strides (list): featmap_strides in each layer.
|
||||
roi_align_finest_scale (int): finest_scale in roi.
|
||||
|
||||
Examples:
|
||||
self.roi_init(config)
|
||||
"""
|
||||
self.roi_align = SingleRoIExtractor(config,
|
||||
config.roi_layer,
|
||||
config.roi_align_out_channels,
|
||||
config.roi_align_featmap_strides,
|
||||
self.train_batch_size,
|
||||
config.roi_align_finest_scale)
|
||||
self.roi_align.set_train_local(config, True)
|
||||
self.roi_align_test = SingleRoIExtractor(config,
|
||||
config.roi_layer,
|
||||
config.roi_align_out_channels,
|
||||
config.roi_align_featmap_strides,
|
||||
1,
|
||||
config.roi_align_finest_scale)
|
||||
self.roi_align_test.set_train_local(config, False)
|
||||
|
||||
def test_mode_init(self, config):
|
||||
"""
|
||||
Initialize test_mode from the config file.
|
||||
|
||||
Args:
|
||||
config (file): config file.
|
||||
test_batch_size (int): Size of test batch.
|
||||
rpn_max_num (int): max num of rpn.
|
||||
test_score_thresh (float): threshold of test score.
|
||||
test_iou_thr (float): threshold of test iou.
|
||||
|
||||
Examples:
|
||||
self.test_mode_init(config)
|
||||
"""
|
||||
self.test_batch_size = config.test_batch_size
|
||||
self.split = P.Split(axis=0, output_num=self.test_batch_size)
|
||||
self.split_shape = P.Split(axis=0, output_num=4)
|
||||
self.split_scores = P.Split(axis=1, output_num=self.num_classes)
|
||||
self.split_cls = P.Split(axis=0, output_num=self.num_classes-1)
|
||||
self.tile = P.Tile()
|
||||
self.gather = P.GatherNd()
|
||||
|
||||
self.rpn_max_num = config.rpn_max_num
|
||||
|
||||
self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(self.dtype))
|
||||
self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool)
|
||||
self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool)
|
||||
self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask,
|
||||
self.ones_mask, self.zeros_mask), axis=1))
|
||||
self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask,
|
||||
self.ones_mask, self.ones_mask, self.zeros_mask), axis=1))
|
||||
|
||||
self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * config.test_score_thr)
|
||||
self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * 0)
|
||||
self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(self.dtype) * -1)
|
||||
self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * config.test_iou_thr)
|
||||
self.test_max_per_img = config.test_max_per_img
|
||||
self.nms_test = P.NMSWithMask(config.test_iou_thr)
|
||||
self.softmax = P.Softmax(axis=1)
|
||||
self.logicand = P.LogicalAnd()
|
||||
self.oneslike = P.OnesLike()
|
||||
self.test_topk = P.TopK(sorted=True)
|
||||
self.test_num_proposal = self.test_batch_size * self.rpn_max_num
|
||||
|
||||
def init_tensor(self, config):
|
||||
roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i,
|
||||
dtype=self.dtype) for i in range(self.train_batch_size)]
|
||||
|
||||
roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=self.dtype) \
|
||||
for i in range(self.test_batch_size)]
|
||||
|
||||
self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index))
|
||||
self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test))
|
||||
|
||||
def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids):
|
||||
"""
|
||||
construct the FasterRcnn Network.
|
||||
|
||||
Args:
|
||||
img_data: input image data.
|
||||
img_metas: meta label of img.
|
||||
gt_bboxes (Tensor): get the value of bboxes.
|
||||
gt_labels (Tensor): get the value of labels.
|
||||
gt_valids (Tensor): get the valid part of bboxes.
|
||||
|
||||
Returns:
|
||||
Tuple,tuple of output tensor
|
||||
"""
|
||||
x = self.backbone(img_data)
|
||||
x = self.fpn_ncek(x)
|
||||
|
||||
rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x,
|
||||
img_metas,
|
||||
self.anchor_list,
|
||||
gt_bboxes,
|
||||
self.gt_labels_stage1,
|
||||
gt_valids)
|
||||
|
||||
if self.training:
|
||||
proposal, proposal_mask = self.proposal_generator(cls_score, bbox_pred, self.anchor_list)
|
||||
else:
|
||||
proposal, proposal_mask = self.proposal_generator_test(cls_score, bbox_pred, self.anchor_list)
|
||||
|
||||
gt_labels = self.cast(gt_labels, mstype.int32)
|
||||
gt_valids = self.cast(gt_valids, mstype.int32)
|
||||
bboxes_tuple = ()
|
||||
deltas_tuple = ()
|
||||
labels_tuple = ()
|
||||
mask_tuple = ()
|
||||
if self.training:
|
||||
for i in range(self.train_batch_size):
|
||||
gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::])
|
||||
|
||||
gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::])
|
||||
gt_labels_i = self.cast(gt_labels_i, mstype.uint8)
|
||||
|
||||
gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::])
|
||||
gt_valids_i = self.cast(gt_valids_i, mstype.bool_)
|
||||
|
||||
bboxes, deltas, labels, mask = self.bbox_assigner_sampler_for_rcnn(gt_bboxes_i,
|
||||
gt_labels_i,
|
||||
proposal_mask[i],
|
||||
proposal[i][::, 0:4:1],
|
||||
gt_valids_i)
|
||||
bboxes_tuple += (bboxes,)
|
||||
deltas_tuple += (deltas,)
|
||||
labels_tuple += (labels,)
|
||||
mask_tuple += (mask,)
|
||||
|
||||
bbox_targets = self.concat(deltas_tuple)
|
||||
rcnn_labels = self.concat(labels_tuple)
|
||||
bbox_targets = F.stop_gradient(bbox_targets)
|
||||
rcnn_labels = F.stop_gradient(rcnn_labels)
|
||||
rcnn_labels = self.cast(rcnn_labels, mstype.int32)
|
||||
else:
|
||||
mask_tuple += proposal_mask
|
||||
bbox_targets = proposal_mask
|
||||
rcnn_labels = proposal_mask
|
||||
for p_i in proposal:
|
||||
bboxes_tuple += (p_i[::, 0:4:1],)
|
||||
|
||||
if self.training:
|
||||
if self.train_batch_size > 1:
|
||||
bboxes_all = self.concat(bboxes_tuple)
|
||||
else:
|
||||
bboxes_all = bboxes_tuple[0]
|
||||
rois = self.concat_1((self.roi_align_index_tensor, bboxes_all))
|
||||
else:
|
||||
if self.test_batch_size > 1:
|
||||
bboxes_all = self.concat(bboxes_tuple)
|
||||
else:
|
||||
bboxes_all = bboxes_tuple[0]
|
||||
if self.device_type == "Ascend":
|
||||
bboxes_all = self.cast(bboxes_all, mstype.float16)
|
||||
rois = self.concat_1((self.roi_align_index_test_tensor, bboxes_all))
|
||||
|
||||
rois = self.cast(rois, mstype.float32)
|
||||
rois = F.stop_gradient(rois)
|
||||
|
||||
if self.training:
|
||||
roi_feats = self.roi_align(rois,
|
||||
self.cast(x[0], mstype.float32),
|
||||
self.cast(x[1], mstype.float32),
|
||||
self.cast(x[2], mstype.float32),
|
||||
self.cast(x[3], mstype.float32))
|
||||
else:
|
||||
roi_feats = self.roi_align_test(rois,
|
||||
self.cast(x[0], mstype.float32),
|
||||
self.cast(x[1], mstype.float32),
|
||||
self.cast(x[2], mstype.float32),
|
||||
self.cast(x[3], mstype.float32))
|
||||
|
||||
roi_feats = self.cast(roi_feats, self.ms_type)
|
||||
rcnn_masks = self.concat(mask_tuple)
|
||||
rcnn_masks = F.stop_gradient(rcnn_masks)
|
||||
rcnn_mask_squeeze = self.squeeze(self.cast(rcnn_masks, mstype.bool_))
|
||||
rcnn_loss, rcnn_cls_loss, rcnn_reg_loss, _ = self.rcnn(roi_feats,
|
||||
bbox_targets,
|
||||
rcnn_labels,
|
||||
rcnn_mask_squeeze)
|
||||
|
||||
output = ()
|
||||
if self.training:
|
||||
output += (rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss, rcnn_cls_loss, rcnn_reg_loss)
|
||||
else:
|
||||
output = self.get_det_bboxes(rcnn_cls_loss, rcnn_reg_loss, rcnn_masks, bboxes_all, img_metas)
|
||||
|
||||
return output
|
||||
|
||||
def get_det_bboxes(self, cls_logits, reg_logits, mask_logits, rois, img_metas):
|
||||
"""Get the actual detection box."""
|
||||
scores = self.softmax(cls_logits)
|
||||
|
||||
boxes_all = ()
|
||||
for i in range(self.num_classes):
|
||||
k = i * 4
|
||||
reg_logits_i = self.squeeze(reg_logits[::, k:k+4:1])
|
||||
out_boxes_i = self.decode(rois, reg_logits_i)
|
||||
boxes_all += (out_boxes_i,)
|
||||
|
||||
img_metas_all = self.split(img_metas)
|
||||
scores_all = self.split(scores)
|
||||
mask_all = self.split(self.cast(mask_logits, mstype.int32))
|
||||
|
||||
boxes_all_with_batchsize = ()
|
||||
for i in range(self.test_batch_size):
|
||||
scale = self.split_shape(self.squeeze(img_metas_all[i]))
|
||||
scale_h = scale[2]
|
||||
scale_w = scale[3]
|
||||
boxes_tuple = ()
|
||||
for j in range(self.num_classes):
|
||||
boxes_tmp = self.split(boxes_all[j])
|
||||
out_boxes_h = boxes_tmp[i] / scale_h
|
||||
out_boxes_w = boxes_tmp[i] / scale_w
|
||||
boxes_tuple += (self.select(self.bbox_mask, out_boxes_w, out_boxes_h),)
|
||||
boxes_all_with_batchsize += (boxes_tuple,)
|
||||
|
||||
output = self.multiclass_nms(boxes_all_with_batchsize, scores_all, mask_all)
|
||||
|
||||
return output
|
||||
|
||||
def multiclass_nms(self, boxes_all, scores_all, mask_all):
|
||||
"""Multiscale postprocessing."""
|
||||
all_bboxes = ()
|
||||
all_labels = ()
|
||||
all_masks = ()
|
||||
|
||||
for i in range(self.test_batch_size):
|
||||
bboxes = boxes_all[i]
|
||||
scores = scores_all[i]
|
||||
masks = self.cast(mask_all[i], mstype.bool_)
|
||||
|
||||
res_boxes_tuple = ()
|
||||
res_labels_tuple = ()
|
||||
res_masks_tuple = ()
|
||||
|
||||
for j in range(self.num_classes - 1):
|
||||
k = j + 1
|
||||
_cls_scores = scores[::, k:k + 1:1]
|
||||
_bboxes = self.squeeze(bboxes[k])
|
||||
_mask_o = self.reshape(masks, (self.rpn_max_num, 1))
|
||||
|
||||
cls_mask = self.greater(_cls_scores, self.test_score_thresh)
|
||||
_mask = self.logicand(_mask_o, cls_mask)
|
||||
|
||||
_reg_mask = self.cast(self.tile(self.cast(_mask, mstype.int32), (1, 4)), mstype.bool_)
|
||||
|
||||
_bboxes = self.select(_reg_mask, _bboxes, self.test_box_zeros)
|
||||
_cls_scores = self.select(_mask, _cls_scores, self.test_score_zeros)
|
||||
__cls_scores = self.squeeze(_cls_scores)
|
||||
scores_sorted, topk_inds = self.test_topk(__cls_scores, self.rpn_max_num)
|
||||
topk_inds = self.reshape(topk_inds, (self.rpn_max_num, 1))
|
||||
scores_sorted = self.reshape(scores_sorted, (self.rpn_max_num, 1))
|
||||
_bboxes_sorted = self.gather(_bboxes, topk_inds)
|
||||
_mask_sorted = self.gather(_mask, topk_inds)
|
||||
|
||||
scores_sorted = self.tile(scores_sorted, (1, 4))
|
||||
cls_dets = self.concat_1((_bboxes_sorted, scores_sorted))
|
||||
cls_dets = P.Slice()(cls_dets, (0, 0), (self.rpn_max_num, 5))
|
||||
|
||||
cls_dets, _index, _mask_nms = self.nms_test(cls_dets)
|
||||
_index = self.reshape(_index, (self.rpn_max_num, 1))
|
||||
_mask_nms = self.reshape(_mask_nms, (self.rpn_max_num, 1))
|
||||
|
||||
_mask_n = self.gather(_mask_sorted, _index)
|
||||
|
||||
_mask_n = self.logicand(_mask_n, _mask_nms)
|
||||
cls_labels = self.oneslike(_index) * j
|
||||
res_boxes_tuple += (cls_dets,)
|
||||
res_labels_tuple += (cls_labels,)
|
||||
res_masks_tuple += (_mask_n,)
|
||||
|
||||
res_boxes_start = self.concat(res_boxes_tuple[:self.concat_start])
|
||||
res_labels_start = self.concat(res_labels_tuple[:self.concat_start])
|
||||
res_masks_start = self.concat(res_masks_tuple[:self.concat_start])
|
||||
|
||||
res_boxes_end = self.concat(res_boxes_tuple[self.concat_start:self.concat_end])
|
||||
res_labels_end = self.concat(res_labels_tuple[self.concat_start:self.concat_end])
|
||||
res_masks_end = self.concat(res_masks_tuple[self.concat_start:self.concat_end])
|
||||
|
||||
res_boxes = self.concat((res_boxes_start, res_boxes_end))
|
||||
res_labels = self.concat((res_labels_start, res_labels_end))
|
||||
res_masks = self.concat((res_masks_start, res_masks_end))
|
||||
|
||||
reshape_size = (self.num_classes - 1) * self.rpn_max_num
|
||||
res_boxes = self.reshape(res_boxes, (1, reshape_size, 5))
|
||||
res_labels = self.reshape(res_labels, (1, reshape_size, 1))
|
||||
res_masks = self.reshape(res_masks, (1, reshape_size, 1))
|
||||
|
||||
all_bboxes += (res_boxes,)
|
||||
all_labels += (res_labels,)
|
||||
all_masks += (res_masks,)
|
||||
|
||||
all_bboxes = self.concat(all_bboxes)
|
||||
all_labels = self.concat(all_labels)
|
||||
all_masks = self.concat(all_masks)
|
||||
return all_bboxes, all_labels, all_masks
|
||||
|
||||
def get_anchors(self, featmap_sizes):
|
||||
"""Get anchors according to feature map sizes.
|
||||
|
||||
Args:
|
||||
featmap_sizes (list[tuple]): Multi-level feature map sizes.
|
||||
img_metas (list[dict]): Image meta info.
|
||||
|
||||
Returns:
|
||||
tuple: anchors of each image, valid flags of each image
|
||||
"""
|
||||
num_levels = len(featmap_sizes)
|
||||
|
||||
# since feature map sizes of all images are the same, we only compute
|
||||
# anchors for one time
|
||||
multi_level_anchors = ()
|
||||
for i in range(num_levels):
|
||||
anchors = self.anchor_generators[i].grid_anchors(
|
||||
featmap_sizes[i], self.anchor_strides[i])
|
||||
multi_level_anchors += (Tensor(anchors.astype(self.dtype)),)
|
||||
|
||||
return multi_level_anchors
|
||||
|
||||
class FasterRcnn_Infer(nn.Cell):
|
||||
def __init__(self, config):
|
||||
super(FasterRcnn_Infer, self).__init__()
|
||||
self.network = Faster_Rcnn_Resnet(config)
|
||||
self.network.set_train(False)
|
||||
|
||||
def construct(self, img_data, img_metas):
|
||||
output = self.network(img_data, img_metas, None, None, None)
|
||||
return output
|
|
@ -1,244 +1,262 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Resnet50 backbone."""
|
||||
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.ops import functional as F
|
||||
|
||||
|
||||
def weight_init_ones(shape):
|
||||
"""Weight init."""
|
||||
return Tensor(np.full(shape, 0.01).astype(np.float32))
|
||||
|
||||
|
||||
def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
|
||||
"""Conv2D wrapper."""
|
||||
shape = (out_channels, in_channels, kernel_size, kernel_size)
|
||||
weights = weight_init_ones(shape)
|
||||
return nn.Conv2d(in_channels, out_channels,
|
||||
kernel_size=kernel_size, stride=stride, padding=padding,
|
||||
pad_mode=pad_mode, weight_init=weights, has_bias=False)
|
||||
|
||||
|
||||
def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True):
|
||||
"""Batchnorm2D wrapper."""
|
||||
dtype = np.float32
|
||||
gamma_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init,
|
||||
beta_init=beta_init, moving_mean_init=moving_mean_init,
|
||||
moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics)
|
||||
|
||||
|
||||
class ResNetFea(nn.Cell):
|
||||
"""
|
||||
ResNet architecture.
|
||||
|
||||
Args:
|
||||
block (Cell): Block for network.
|
||||
layer_nums (list): Numbers of block in different layers.
|
||||
in_channels (list): Input channel in each layer.
|
||||
out_channels (list): Output channel in each layer.
|
||||
weights_update (bool): Weight update flag.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResNet(ResidualBlock,
|
||||
>>> [3, 4, 6, 3],
|
||||
>>> [64, 256, 512, 1024],
|
||||
>>> [256, 512, 1024, 2048],
|
||||
>>> False)
|
||||
"""
|
||||
def __init__(self,
|
||||
block,
|
||||
layer_nums,
|
||||
in_channels,
|
||||
out_channels,
|
||||
weights_update=False):
|
||||
super(ResNetFea, self).__init__()
|
||||
|
||||
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
|
||||
raise ValueError("the length of "
|
||||
"layer_num, inchannel, outchannel list must be 4!")
|
||||
|
||||
bn_training = False
|
||||
self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad')
|
||||
self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training)
|
||||
self.relu = P.ReLU()
|
||||
self.maxpool = P.MaxPool(kernel_size=3, strides=2, pad_mode="SAME")
|
||||
self.weights_update = weights_update
|
||||
|
||||
if not self.weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
|
||||
self.layer1 = self._make_layer(block,
|
||||
layer_nums[0],
|
||||
in_channel=in_channels[0],
|
||||
out_channel=out_channels[0],
|
||||
stride=1,
|
||||
training=bn_training,
|
||||
weights_update=self.weights_update)
|
||||
self.layer2 = self._make_layer(block,
|
||||
layer_nums[1],
|
||||
in_channel=in_channels[1],
|
||||
out_channel=out_channels[1],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer3 = self._make_layer(block,
|
||||
layer_nums[2],
|
||||
in_channel=in_channels[2],
|
||||
out_channel=out_channels[2],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer4 = self._make_layer(block,
|
||||
layer_nums[3],
|
||||
in_channel=in_channels[3],
|
||||
out_channel=out_channels[3],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
|
||||
def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False):
|
||||
"""Make block layer."""
|
||||
layers = []
|
||||
down_sample = False
|
||||
if stride != 1 or in_channel != out_channel:
|
||||
down_sample = True
|
||||
resblk = block(in_channel,
|
||||
out_channel,
|
||||
stride=stride,
|
||||
down_sample=down_sample,
|
||||
training=training,
|
||||
weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
for _ in range(1, layer_num):
|
||||
resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
return nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
c1 = self.maxpool(x)
|
||||
|
||||
c2 = self.layer1(c1)
|
||||
identity = c2
|
||||
if not self.weights_update:
|
||||
identity = F.stop_gradient(c2)
|
||||
c3 = self.layer2(identity)
|
||||
c4 = self.layer3(c3)
|
||||
c5 = self.layer4(c4)
|
||||
|
||||
return identity, c3, c4, c5
|
||||
|
||||
|
||||
class ResidualBlockUsing(nn.Cell):
|
||||
"""
|
||||
ResNet V1 residual block definition.
|
||||
|
||||
Args:
|
||||
in_channels (int) - Input channel.
|
||||
out_channels (int) - Output channel.
|
||||
stride (int) - Stride size for the initial convolutional layer. Default: 1.
|
||||
down_sample (bool) - If to do the downsample in block. Default: False.
|
||||
momentum (float) - Momentum for batchnorm layer. Default: 0.1.
|
||||
training (bool) - Training flag. Default: False.
|
||||
weights_updata (bool) - Weights update flag. Default: False.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
ResidualBlock(3,256,stride=2,down_sample=True)
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
stride=1,
|
||||
down_sample=False,
|
||||
momentum=0.1,
|
||||
training=False,
|
||||
weights_update=False):
|
||||
super(ResidualBlockUsing, self).__init__()
|
||||
|
||||
self.affine = weights_update
|
||||
|
||||
out_chls = out_channels // self.expansion
|
||||
self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0)
|
||||
self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1)
|
||||
self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0)
|
||||
self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
if training:
|
||||
self.bn1 = self.bn1.set_train()
|
||||
self.bn2 = self.bn2.set_train()
|
||||
self.bn3 = self.bn3.set_train()
|
||||
|
||||
if not weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
self.conv2.weight.requires_grad = False
|
||||
self.conv3.weight.requires_grad = False
|
||||
|
||||
self.relu = P.ReLU()
|
||||
self.downsample = down_sample
|
||||
if self.downsample:
|
||||
self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)
|
||||
self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine,
|
||||
use_batch_statistics=training)
|
||||
if training:
|
||||
self.bn_down_sample = self.bn_down_sample.set_train()
|
||||
if not weights_update:
|
||||
self.conv_down_sample.weight.requires_grad = False
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample:
|
||||
identity = self.conv_down_sample(identity)
|
||||
identity = self.bn_down_sample(identity)
|
||||
|
||||
out = self.add(out, identity)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Resnet backbone."""
|
||||
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.ops import functional as F
|
||||
|
||||
|
||||
def weight_init_ones(shape):
|
||||
"""Weight init."""
|
||||
return Tensor(np.full(shape, 0.01).astype(np.float32))
|
||||
|
||||
|
||||
def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
|
||||
"""Conv2D wrapper."""
|
||||
shape = (out_channels, in_channels, kernel_size, kernel_size)
|
||||
weights = weight_init_ones(shape)
|
||||
return nn.Conv2d(in_channels, out_channels,
|
||||
kernel_size=kernel_size, stride=stride, padding=padding,
|
||||
pad_mode=pad_mode, weight_init=weights, has_bias=False)
|
||||
|
||||
|
||||
def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True):
|
||||
"""Batchnorm2D wrapper."""
|
||||
dtype = np.float32
|
||||
gamma_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init,
|
||||
beta_init=beta_init, moving_mean_init=moving_mean_init,
|
||||
moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics)
|
||||
|
||||
|
||||
class ResNetFea(nn.Cell):
|
||||
"""
|
||||
ResNet architecture.
|
||||
|
||||
Args:
|
||||
block (Cell): Block for network.
|
||||
layer_nums (list): Numbers of block in different layers.
|
||||
in_channels (list): Input channel in each layer.
|
||||
out_channels (list): Output channel in each layer.
|
||||
weights_update (bool): Weight update flag.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResNet(ResidualBlock,
|
||||
>>> [3, 4, 6, 3],
|
||||
>>> [64, 256, 512, 1024],
|
||||
>>> [256, 512, 1024, 2048],
|
||||
>>> False)
|
||||
"""
|
||||
def __init__(self,
|
||||
block,
|
||||
layer_nums,
|
||||
in_channels,
|
||||
out_channels,
|
||||
weights_update=False):
|
||||
super(ResNetFea, self).__init__()
|
||||
|
||||
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
|
||||
raise ValueError("the length of "
|
||||
"layer_num, inchannel, outchannel list must be 4!")
|
||||
|
||||
bn_training = False
|
||||
self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad')
|
||||
self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training)
|
||||
self.relu = P.ReLU()
|
||||
self.maxpool = P.MaxPool(kernel_size=3, strides=2, pad_mode="SAME")
|
||||
self.weights_update = weights_update
|
||||
|
||||
if not self.weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
|
||||
self.layer1 = self._make_layer(block,
|
||||
layer_nums[0],
|
||||
in_channel=in_channels[0],
|
||||
out_channel=out_channels[0],
|
||||
stride=1,
|
||||
training=bn_training,
|
||||
weights_update=self.weights_update)
|
||||
self.layer2 = self._make_layer(block,
|
||||
layer_nums[1],
|
||||
in_channel=in_channels[1],
|
||||
out_channel=out_channels[1],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer3 = self._make_layer(block,
|
||||
layer_nums[2],
|
||||
in_channel=in_channels[2],
|
||||
out_channel=out_channels[2],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer4 = self._make_layer(block,
|
||||
layer_nums[3],
|
||||
in_channel=in_channels[3],
|
||||
out_channel=out_channels[3],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
|
||||
def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False):
|
||||
"""Make block layer."""
|
||||
layers = []
|
||||
down_sample = False
|
||||
if stride != 1 or in_channel != out_channel:
|
||||
down_sample = True
|
||||
resblk = block(in_channel,
|
||||
out_channel,
|
||||
stride=stride,
|
||||
down_sample=down_sample,
|
||||
training=training,
|
||||
weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
for _ in range(1, layer_num):
|
||||
resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
return nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
"""
|
||||
construct the ResNet Network
|
||||
|
||||
Args:
|
||||
x: input feature data.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
"""
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
c1 = self.maxpool(x)
|
||||
|
||||
c2 = self.layer1(c1)
|
||||
identity = c2
|
||||
if not self.weights_update:
|
||||
identity = F.stop_gradient(c2)
|
||||
c3 = self.layer2(identity)
|
||||
c4 = self.layer3(c3)
|
||||
c5 = self.layer4(c4)
|
||||
|
||||
return identity, c3, c4, c5
|
||||
|
||||
|
||||
class ResidualBlockUsing(nn.Cell):
|
||||
"""
|
||||
ResNet V1 residual block definition.
|
||||
|
||||
Args:
|
||||
in_channels (int) - Input channel.
|
||||
out_channels (int) - Output channel.
|
||||
stride (int) - Stride size for the initial convolutional layer. Default: 1.
|
||||
down_sample (bool) - If to do the downsample in block. Default: False.
|
||||
momentum (float) - Momentum for batchnorm layer. Default: 0.1.
|
||||
training (bool) - Training flag. Default: False.
|
||||
weights_updata (bool) - Weights update flag. Default: False.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
ResidualBlock(3,256,stride=2,down_sample=True)
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
stride=1,
|
||||
down_sample=False,
|
||||
momentum=0.1,
|
||||
training=False,
|
||||
weights_update=False):
|
||||
super(ResidualBlockUsing, self).__init__()
|
||||
|
||||
self.affine = weights_update
|
||||
|
||||
out_chls = out_channels // self.expansion
|
||||
self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0)
|
||||
self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1)
|
||||
self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0)
|
||||
self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
if training:
|
||||
self.bn1 = self.bn1.set_train()
|
||||
self.bn2 = self.bn2.set_train()
|
||||
self.bn3 = self.bn3.set_train()
|
||||
|
||||
if not weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
self.conv2.weight.requires_grad = False
|
||||
self.conv3.weight.requires_grad = False
|
||||
|
||||
self.relu = P.ReLU()
|
||||
self.downsample = down_sample
|
||||
if self.downsample:
|
||||
self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)
|
||||
self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine,
|
||||
use_batch_statistics=training)
|
||||
if training:
|
||||
self.bn_down_sample = self.bn_down_sample.set_train()
|
||||
if not weights_update:
|
||||
self.conv_down_sample.weight.requires_grad = False
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
"""
|
||||
construct the ResNet V1 residual block
|
||||
|
||||
Args:
|
||||
x: input feature data.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
"""
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample:
|
||||
identity = self.conv_down_sample(identity)
|
||||
identity = self.bn_down_sample(identity)
|
||||
|
||||
out = self.add(out, identity)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
|
@ -0,0 +1,264 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Resnet50v1.0 backbone."""
|
||||
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.ops import functional as F
|
||||
|
||||
|
||||
def weight_init_ones(shape):
|
||||
"""Weight init."""
|
||||
return Tensor(np.full(shape, 0.01).astype(np.float32))
|
||||
|
||||
|
||||
def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
|
||||
"""Conv2D wrapper."""
|
||||
shape = (out_channels, in_channels, kernel_size, kernel_size)
|
||||
weights = weight_init_ones(shape)
|
||||
return nn.Conv2d(in_channels, out_channels,
|
||||
kernel_size=kernel_size, stride=stride, padding=padding,
|
||||
pad_mode=pad_mode, weight_init=weights, has_bias=False)
|
||||
|
||||
|
||||
def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True):
|
||||
"""Batchnorm2D wrapper."""
|
||||
dtype = np.float32
|
||||
gamma_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype))
|
||||
moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(dtype))
|
||||
return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init,
|
||||
beta_init=beta_init, moving_mean_init=moving_mean_init,
|
||||
moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics)
|
||||
|
||||
|
||||
class ResNetFea(nn.Cell):
|
||||
"""
|
||||
ResNet architecture.
|
||||
|
||||
Args:
|
||||
block (Cell): Block for network.
|
||||
layer_nums (list): Numbers of block in different layers.
|
||||
in_channels (list): Input channel in each layer.
|
||||
out_channels (list): Output channel in each layer.
|
||||
weights_update (bool): Weight update flag.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResNet(ResidualBlock,
|
||||
>>> [3, 4, 6, 3],
|
||||
>>> [64, 256, 512, 1024],
|
||||
>>> [256, 512, 1024, 2048],
|
||||
>>> False)
|
||||
"""
|
||||
def __init__(self,
|
||||
block,
|
||||
layer_nums,
|
||||
in_channels,
|
||||
out_channels,
|
||||
weights_update=False):
|
||||
super(ResNetFea, self).__init__()
|
||||
|
||||
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
|
||||
raise ValueError("the length of "
|
||||
"layer_num, inchannel, outchannel list must be 4!")
|
||||
|
||||
bn_training = False
|
||||
self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad')
|
||||
self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training)
|
||||
self.relu = P.ReLU()
|
||||
self.maxpool = P.MaxPool(kernel_size=3, strides=2, pad_mode="SAME")
|
||||
self.weights_update = weights_update
|
||||
|
||||
if not self.weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
|
||||
self.layer1 = self._make_layer(block,
|
||||
layer_nums[0],
|
||||
in_channel=in_channels[0],
|
||||
out_channel=out_channels[0],
|
||||
stride=1,
|
||||
training=bn_training,
|
||||
weights_update=self.weights_update)
|
||||
self.layer2 = self._make_layer(block,
|
||||
layer_nums[1],
|
||||
in_channel=in_channels[1],
|
||||
out_channel=out_channels[1],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer3 = self._make_layer(block,
|
||||
layer_nums[2],
|
||||
in_channel=in_channels[2],
|
||||
out_channel=out_channels[2],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
self.layer4 = self._make_layer(block,
|
||||
layer_nums[3],
|
||||
in_channel=in_channels[3],
|
||||
out_channel=out_channels[3],
|
||||
stride=2,
|
||||
training=bn_training,
|
||||
weights_update=True)
|
||||
|
||||
def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False):
|
||||
"""Make block layer."""
|
||||
layers = []
|
||||
down_sample = False
|
||||
if stride != 1 or in_channel != out_channel:
|
||||
down_sample = True
|
||||
resblk = block(in_channel,
|
||||
out_channel,
|
||||
stride=stride,
|
||||
down_sample=down_sample,
|
||||
training=training,
|
||||
weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
for _ in range(1, layer_num):
|
||||
resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update)
|
||||
layers.append(resblk)
|
||||
|
||||
return nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
"""
|
||||
construct the ResNet Network
|
||||
|
||||
Args:
|
||||
x: input feature data.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
"""
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
c1 = self.maxpool(x)
|
||||
|
||||
c2 = self.layer1(c1)
|
||||
identity = c2
|
||||
if not self.weights_update:
|
||||
identity = F.stop_gradient(c2)
|
||||
c3 = self.layer2(identity)
|
||||
c4 = self.layer3(c3)
|
||||
c5 = self.layer4(c4)
|
||||
|
||||
return identity, c3, c4, c5
|
||||
|
||||
|
||||
class ResidualBlockUsing_V1(nn.Cell):
|
||||
"""
|
||||
ResNet V1 residual block definition.
|
||||
|
||||
Args:
|
||||
in_channels (int) - Input channel.
|
||||
out_channels (int) - Output channel.
|
||||
stride (int) - Stride size for the initial convolutional layer. Default: 1.
|
||||
down_sample (bool) - If to do the downsample in block. Default: False.
|
||||
momentum (float) - Momentum for batchnorm layer. Default: 0.1.
|
||||
training (bool) - Training flag. Default: False.
|
||||
weights_updata (bool) - Weights update flag. Default: False.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
ResidualBlock(3,256,stride=2,down_sample=True)
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
stride=1,
|
||||
down_sample=False,
|
||||
momentum=0.1,
|
||||
training=False,
|
||||
weights_update=False):
|
||||
super(ResidualBlockUsing_V1, self).__init__()
|
||||
|
||||
self.affine = weights_update
|
||||
|
||||
out_chls = out_channels // self.expansion
|
||||
# self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0)
|
||||
self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=stride, padding=0)
|
||||
self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
# self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1)
|
||||
self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=1, padding=1)
|
||||
self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0)
|
||||
self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training)
|
||||
|
||||
if training:
|
||||
self.bn1 = self.bn1.set_train()
|
||||
self.bn2 = self.bn2.set_train()
|
||||
self.bn3 = self.bn3.set_train()
|
||||
|
||||
if not weights_update:
|
||||
self.conv1.weight.requires_grad = False
|
||||
self.conv2.weight.requires_grad = False
|
||||
self.conv3.weight.requires_grad = False
|
||||
|
||||
self.relu = P.ReLU()
|
||||
self.downsample = down_sample
|
||||
if self.downsample:
|
||||
self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)
|
||||
self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine,
|
||||
use_batch_statistics=training)
|
||||
if training:
|
||||
self.bn_down_sample = self.bn_down_sample.set_train()
|
||||
if not weights_update:
|
||||
self.conv_down_sample.weight.requires_grad = False
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
"""
|
||||
construct the ResNet V1 residual block
|
||||
|
||||
Args:
|
||||
x: input feature data.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
"""
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample:
|
||||
identity = self.conv_down_sample(identity)
|
||||
identity = self.bn_down_sample(identity)
|
||||
|
||||
out = self.add(out, identity)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -84,8 +84,8 @@ class SingleRoIExtractor(nn.Cell):
|
|||
self.out_channels = out_channels
|
||||
self.featmap_strides = featmap_strides
|
||||
self.num_levels = len(self.featmap_strides)
|
||||
self.out_size = roi_layer['out_size']
|
||||
self.sample_num = roi_layer['sample_num']
|
||||
self.out_size = config.roi_layer.out_size
|
||||
self.sample_num = config.roi_layer.sample_num
|
||||
self.roi_layers = self.build_roi_layers(self.featmap_strides)
|
||||
self.roi_layers = L.CellList(self.roi_layers)
|
||||
|
||||
|
|
|
@ -11,147 +11,51 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
"""
|
||||
network config setting, will be used in train.py and eval.py
|
||||
"""
|
||||
from easydict import EasyDict as ed
|
||||
# ============================================================================
|
||||
|
||||
config = ed({
|
||||
"img_width": 1280,
|
||||
"img_height": 768,
|
||||
"keep_ratio": True,
|
||||
"flip_ratio": 0.5,
|
||||
"expand_ratio": 1.0,
|
||||
"""Parse arguments"""
|
||||
|
||||
# anchor
|
||||
"feature_shapes": [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)],
|
||||
"anchor_scales": [8],
|
||||
"anchor_ratios": [0.5, 1.0, 2.0],
|
||||
"anchor_strides": [4, 8, 16, 32, 64],
|
||||
"num_anchors": 3,
|
||||
from pprint import pprint, pformat
|
||||
import yaml
|
||||
|
||||
# resnet
|
||||
"resnet_block": [3, 4, 6, 3],
|
||||
"resnet_in_channels": [64, 256, 512, 1024],
|
||||
"resnet_out_channels": [256, 512, 1024, 2048],
|
||||
class Config:
|
||||
"""
|
||||
Configuration namespace. Convert dictionary to members.
|
||||
"""
|
||||
def __init__(self, cfg_dict):
|
||||
for k, v in cfg_dict.items():
|
||||
if isinstance(v, (list, tuple)):
|
||||
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
|
||||
else:
|
||||
setattr(self, k, Config(v) if isinstance(v, dict) else v)
|
||||
|
||||
# fpn
|
||||
"fpn_in_channels": [256, 512, 1024, 2048],
|
||||
"fpn_out_channels": 256,
|
||||
"fpn_num_outs": 5,
|
||||
def __str__(self):
|
||||
return pformat(self.__dict__)
|
||||
|
||||
# rpn
|
||||
"rpn_in_channels": 256,
|
||||
"rpn_feat_channels": 256,
|
||||
"rpn_loss_cls_weight": 1.0,
|
||||
"rpn_loss_reg_weight": 1.0,
|
||||
"rpn_cls_out_channels": 1,
|
||||
"rpn_target_means": [0., 0., 0., 0.],
|
||||
"rpn_target_stds": [1.0, 1.0, 1.0, 1.0],
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
# bbox_assign_sampler
|
||||
"neg_iou_thr": 0.3,
|
||||
"pos_iou_thr": 0.7,
|
||||
"min_pos_iou": 0.3,
|
||||
"num_bboxes": 245520,
|
||||
"num_gts": 128,
|
||||
"num_expected_neg": 256,
|
||||
"num_expected_pos": 128,
|
||||
def parse_yaml(yaml_path):
|
||||
"""
|
||||
Parse the yaml config file.
|
||||
|
||||
# proposal
|
||||
"activate_num_classes": 2,
|
||||
"use_sigmoid_cls": True,
|
||||
Args:
|
||||
yaml_path: Path to the yaml config.
|
||||
"""
|
||||
with open(yaml_path, 'r') as fin:
|
||||
try:
|
||||
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
|
||||
cfgs = [x for x in cfgs]
|
||||
if len(cfgs) == 1:
|
||||
cfg = cfgs[0]
|
||||
except:
|
||||
raise ValueError("Failed to parse yaml")
|
||||
return cfg
|
||||
|
||||
# roi_align
|
||||
"roi_layer": dict(type='RoIAlign', out_size=7, sample_num=2),
|
||||
"roi_align_out_channels": 256,
|
||||
"roi_align_featmap_strides": [4, 8, 16, 32],
|
||||
"roi_align_finest_scale": 56,
|
||||
"roi_sample_num": 640,
|
||||
|
||||
# bbox_assign_sampler_stage2
|
||||
"neg_iou_thr_stage2": 0.5,
|
||||
"pos_iou_thr_stage2": 0.5,
|
||||
"min_pos_iou_stage2": 0.5,
|
||||
"num_bboxes_stage2": 2000,
|
||||
"num_expected_pos_stage2": 128,
|
||||
"num_expected_neg_stage2": 512,
|
||||
"num_expected_total_stage2": 512,
|
||||
|
||||
# rcnn
|
||||
"rcnn_num_layers": 2,
|
||||
"rcnn_in_channels": 256,
|
||||
"rcnn_fc_out_channels": 1024,
|
||||
"rcnn_loss_cls_weight": 1,
|
||||
"rcnn_loss_reg_weight": 1,
|
||||
"rcnn_target_means": [0., 0., 0., 0.],
|
||||
"rcnn_target_stds": [0.1, 0.1, 0.2, 0.2],
|
||||
|
||||
# train proposal
|
||||
"rpn_proposal_nms_across_levels": False,
|
||||
"rpn_proposal_nms_pre": 2000,
|
||||
"rpn_proposal_nms_post": 2000,
|
||||
"rpn_proposal_max_num": 2000,
|
||||
"rpn_proposal_nms_thr": 0.7,
|
||||
"rpn_proposal_min_bbox_size": 0,
|
||||
|
||||
# test proposal
|
||||
"rpn_nms_across_levels": False,
|
||||
"rpn_nms_pre": 1000,
|
||||
"rpn_nms_post": 1000,
|
||||
"rpn_max_num": 1000,
|
||||
"rpn_nms_thr": 0.7,
|
||||
"rpn_min_bbox_min_size": 0,
|
||||
"test_score_thr": 0.05,
|
||||
"test_iou_thr": 0.5,
|
||||
"test_max_per_img": 100,
|
||||
"test_batch_size": 2,
|
||||
|
||||
"rpn_head_use_sigmoid": True,
|
||||
"rpn_head_weight": 1.0,
|
||||
|
||||
# LR
|
||||
"base_lr": 0.04,
|
||||
"warmup_step": 500,
|
||||
"warmup_ratio": 1/16.0,
|
||||
"sgd_step": [8, 11],
|
||||
"sgd_momentum": 0.9,
|
||||
|
||||
# train
|
||||
"batch_size": 2,
|
||||
"loss_scale": 256,
|
||||
"momentum": 0.91,
|
||||
"weight_decay": 1e-5,
|
||||
"epoch_size": 12,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 10,
|
||||
"save_checkpoint_path": "./",
|
||||
|
||||
# Number of threads used to process the dataset in parallel
|
||||
"num_parallel_workers": 8,
|
||||
# Parallelize Python operations with multiple worker processes
|
||||
"python_multiprocessing": True,
|
||||
"mindrecord_dir": "../MindRecord_COCO_TRAIN",
|
||||
"coco_root": "./cocodataset/",
|
||||
"train_data_type": "train2017",
|
||||
"val_data_type": "val2017",
|
||||
"instance_set": "annotations/instances_{}.json",
|
||||
"coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
||||
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
||||
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
||||
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
|
||||
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
||||
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
||||
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
||||
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
||||
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
||||
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
||||
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
||||
'teddy bear', 'hair drier', 'toothbrush'),
|
||||
"num_classes": 81
|
||||
})
|
||||
def get_config(config_path):
|
||||
"""
|
||||
Get Config according to the yaml file and cli arguments.
|
||||
"""
|
||||
default = parse_yaml(config_path)
|
||||
pprint(default)
|
||||
return Config(default)
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
|
||||
img_width: 1280
|
||||
img_height: 768
|
||||
keep_ratio: True
|
||||
flip_ratio: 0.5
|
||||
expand_ratio: 1.0
|
||||
|
||||
# anchor
|
||||
feature_shapes:
|
||||
- [192, 320]
|
||||
- [96, 160]
|
||||
- [48, 80]
|
||||
- [24, 40]
|
||||
- [12, 20]
|
||||
anchor_scales: [8]
|
||||
anchor_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_strides: [4, 8, 16, 32, 64]
|
||||
num_anchors: 3
|
||||
|
||||
# resnet
|
||||
resnet_block: [3, 4, 23, 3]
|
||||
resnet_in_channels: [64, 256, 512, 1024]
|
||||
resnet_out_channels: [256, 512, 1024, 2048]
|
||||
|
||||
# fpn
|
||||
fpn_in_channels: [256, 512, 1024, 2048]
|
||||
fpn_out_channels: 256
|
||||
fpn_num_outs: 5
|
||||
|
||||
# rpn
|
||||
rpn_in_channels: 256
|
||||
rpn_feat_channels: 256
|
||||
rpn_loss_cls_weight: 1.0
|
||||
rpn_loss_reg_weight: 1.0
|
||||
rpn_cls_out_channels: 1
|
||||
rpn_target_means: [0., 0., 0., 0.]
|
||||
rpn_target_stds: [1.0, 1.0, 1.0, 1.0]
|
||||
|
||||
# bbox_assign_sampler
|
||||
neg_iou_thr: 0.3
|
||||
pos_iou_thr: 0.7
|
||||
min_pos_iou: 0.3
|
||||
num_bboxes: 245520
|
||||
num_gts: 128
|
||||
num_expected_neg: 256
|
||||
num_expected_pos: 128
|
||||
|
||||
# proposal
|
||||
activate_num_classes: 2
|
||||
use_sigmoid_cls: True
|
||||
|
||||
# roi_align
|
||||
roi_layer: {type: 'RoIAlign', out_size: 7, sample_num: 2}
|
||||
roi_align_out_channels: 256
|
||||
roi_align_featmap_strides: [4, 8, 16, 32]
|
||||
roi_align_finest_scale: 56
|
||||
roi_sample_num: 640
|
||||
|
||||
# bbox_assign_sampler_stage2
|
||||
neg_iou_thr_stage2: 0.5
|
||||
pos_iou_thr_stage2: 0.5
|
||||
min_pos_iou_stage2: 0.5
|
||||
num_bboxes_stage2: 2000
|
||||
num_expected_pos_stage2: 128
|
||||
num_expected_neg_stage2: 512
|
||||
num_expected_total_stage2: 512
|
||||
|
||||
# rcnn
|
||||
rcnn_num_layers: 2
|
||||
rcnn_in_channels: 256
|
||||
rcnn_fc_out_channels: 1024
|
||||
rcnn_loss_cls_weight: 1
|
||||
rcnn_loss_reg_weight: 1
|
||||
rcnn_target_means: [0., 0., 0., 0.]
|
||||
rcnn_target_stds: [0.1, 0.1, 0.2, 0.2]
|
||||
|
||||
# train proposal
|
||||
rpn_proposal_nms_across_levels: False
|
||||
rpn_proposal_nms_pre: 2000
|
||||
rpn_proposal_nms_post: 2000
|
||||
rpn_proposal_max_num: 2000
|
||||
rpn_proposal_nms_thr: 0.7
|
||||
rpn_proposal_min_bbox_size: 0
|
||||
|
||||
# test proposal
|
||||
rpn_nms_across_levels: False
|
||||
rpn_nms_pre: 1000
|
||||
rpn_nms_post: 1000
|
||||
rpn_max_num: 1000
|
||||
rpn_nms_thr: 0.7
|
||||
rpn_min_bbox_min_size: 0
|
||||
test_score_thr: 0.05
|
||||
test_iou_thr: 0.5
|
||||
test_max_per_img: 100
|
||||
test_batch_size: 2
|
||||
|
||||
rpn_head_use_sigmoid: True
|
||||
rpn_head_weight: 1.0
|
||||
|
||||
# LR
|
||||
base_lr: 0.02
|
||||
warmup_step: 500
|
||||
warmup_ratio: 0.0625
|
||||
sgd_step: [8, 11]
|
||||
sgd_momentum: 0.9
|
||||
|
||||
# train
|
||||
batch_size: 2
|
||||
loss_scale: 256
|
||||
momentum: 0.91
|
||||
weight_decay: 0.00001
|
||||
epoch_size: 20
|
||||
save_checkpoint: True
|
||||
save_checkpoint_epochs: 1
|
||||
keep_checkpoint_max: 20
|
||||
save_checkpoint_path: "./"
|
||||
|
||||
# Number of threads used to process the dataset in parallel
|
||||
num_parallel_workers: 8
|
||||
# Parallelize Python operations with multiple worker processes
|
||||
python_multiprocessing: True
|
||||
mindrecord_dir: "/disk2/dataset/COCO2017/MindRecord_COCO_TRAIN"
|
||||
coco_root: "/disk2/dataset/COCO2017"
|
||||
train_data_type: "train2017"
|
||||
val_data_type: "val2017"
|
||||
instance_set: "annotations/instances_{}.json"
|
||||
coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
||||
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
||||
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
||||
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
|
||||
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
||||
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
||||
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
||||
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
||||
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
||||
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
||||
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
||||
'teddy bear', 'hair drier', 'toothbrush']
|
||||
num_classes: 81
|
|
@ -0,0 +1,156 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
|
||||
img_width: 1280
|
||||
img_height: 768
|
||||
keep_ratio: True
|
||||
flip_ratio: 0.5
|
||||
expand_ratio: 1.0
|
||||
|
||||
# anchor
|
||||
feature_shapes:
|
||||
- [192, 320]
|
||||
- [96, 160]
|
||||
- [48, 80]
|
||||
- [24, 40]
|
||||
- [12, 20]
|
||||
anchor_scales: [8]
|
||||
anchor_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_strides: [4, 8, 16, 32, 64]
|
||||
num_anchors: 3
|
||||
|
||||
# resnet
|
||||
resnet_block: [3, 8, 36, 3]
|
||||
resnet_in_channels: [64, 256, 512, 1024]
|
||||
resnet_out_channels: [256, 512, 1024, 2048]
|
||||
|
||||
# fpn
|
||||
fpn_in_channels: [256, 512, 1024, 2048]
|
||||
fpn_out_channels: 256
|
||||
fpn_num_outs: 5
|
||||
|
||||
# rpn
|
||||
rpn_in_channels: 256
|
||||
rpn_feat_channels: 256
|
||||
rpn_loss_cls_weight: 1.0
|
||||
rpn_loss_reg_weight: 1.0
|
||||
rpn_cls_out_channels: 1
|
||||
rpn_target_means: [0., 0., 0., 0.]
|
||||
rpn_target_stds: [1.0, 1.0, 1.0, 1.0]
|
||||
|
||||
# bbox_assign_sampler
|
||||
neg_iou_thr: 0.3
|
||||
pos_iou_thr: 0.7
|
||||
min_pos_iou: 0.3
|
||||
num_bboxes: 245520
|
||||
num_gts: 128
|
||||
num_expected_neg: 256
|
||||
num_expected_pos: 128
|
||||
|
||||
# proposal
|
||||
activate_num_classes: 2
|
||||
use_sigmoid_cls: True
|
||||
|
||||
# roi_align
|
||||
roi_layer: {type: 'RoIAlign', out_size: 7, sample_num: 2}
|
||||
roi_align_out_channels: 256
|
||||
roi_align_featmap_strides: [4, 8, 16, 32]
|
||||
roi_align_finest_scale: 56
|
||||
roi_sample_num: 640
|
||||
|
||||
# bbox_assign_sampler_stage2
|
||||
neg_iou_thr_stage2: 0.5
|
||||
pos_iou_thr_stage2: 0.5
|
||||
min_pos_iou_stage2: 0.5
|
||||
num_bboxes_stage2: 2000
|
||||
num_expected_pos_stage2: 128
|
||||
num_expected_neg_stage2: 512
|
||||
num_expected_total_stage2: 512
|
||||
|
||||
# rcnn
|
||||
rcnn_num_layers: 2
|
||||
rcnn_in_channels: 256
|
||||
rcnn_fc_out_channels: 1024
|
||||
rcnn_loss_cls_weight: 1
|
||||
rcnn_loss_reg_weight: 1
|
||||
rcnn_target_means: [0., 0., 0., 0.]
|
||||
rcnn_target_stds: [0.1, 0.1, 0.2, 0.2]
|
||||
|
||||
# train proposal
|
||||
rpn_proposal_nms_across_levels: False
|
||||
rpn_proposal_nms_pre: 2000
|
||||
rpn_proposal_nms_post: 2000
|
||||
rpn_proposal_max_num: 2000
|
||||
rpn_proposal_nms_thr: 0.7
|
||||
rpn_proposal_min_bbox_size: 0
|
||||
|
||||
# test proposal
|
||||
rpn_nms_across_levels: False
|
||||
rpn_nms_pre: 1000
|
||||
rpn_nms_post: 1000
|
||||
rpn_max_num: 1000
|
||||
rpn_nms_thr: 0.7
|
||||
rpn_min_bbox_min_size: 0
|
||||
test_score_thr: 0.05
|
||||
test_iou_thr: 0.5
|
||||
test_max_per_img: 100
|
||||
test_batch_size: 2
|
||||
|
||||
rpn_head_use_sigmoid: True
|
||||
rpn_head_weight: 1.0
|
||||
|
||||
# LR
|
||||
base_lr: 0.02
|
||||
warmup_step: 500
|
||||
warmup_ratio: 0.0625
|
||||
sgd_step: [8, 11]
|
||||
sgd_momentum: 0.9
|
||||
|
||||
# train
|
||||
batch_size: 2
|
||||
loss_scale: 256
|
||||
momentum: 0.91
|
||||
weight_decay: 0.00001
|
||||
epoch_size: 20
|
||||
save_checkpoint: True
|
||||
save_checkpoint_epochs: 1
|
||||
keep_checkpoint_max: 20
|
||||
save_checkpoint_path: "./"
|
||||
|
||||
# Number of threads used to process the dataset in parallel
|
||||
num_parallel_workers: 8
|
||||
# Parallelize Python operations with multiple worker processes
|
||||
python_multiprocessing: True
|
||||
mindrecord_dir: "/disk2/dataset/COCO2017/MindRecord_COCO_TRAIN"
|
||||
coco_root: "/disk2/dataset/COCO2017"
|
||||
train_data_type: "train2017"
|
||||
val_data_type: "val2017"
|
||||
instance_set: "annotations/instances_{}.json"
|
||||
coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
||||
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
||||
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
||||
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
|
||||
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
||||
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
||||
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
||||
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
||||
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
||||
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
||||
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
||||
'teddy bear', 'hair drier', 'toothbrush']
|
||||
num_classes: 81
|
|
@ -0,0 +1,156 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
|
||||
img_width: 1280
|
||||
img_height: 768
|
||||
keep_ratio: True
|
||||
flip_ratio: 0.5
|
||||
expand_ratio: 1.0
|
||||
|
||||
# anchor
|
||||
feature_shapes:
|
||||
- [192, 320]
|
||||
- [96, 160]
|
||||
- [48, 80]
|
||||
- [24, 40]
|
||||
- [12, 20]
|
||||
anchor_scales: [8]
|
||||
anchor_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_strides: [4, 8, 16, 32, 64]
|
||||
num_anchors: 3
|
||||
|
||||
# resnet
|
||||
resnet_block: [3, 4, 6, 3]
|
||||
resnet_in_channels: [64, 256, 512, 1024]
|
||||
resnet_out_channels: [256, 512, 1024, 2048]
|
||||
|
||||
# fpn
|
||||
fpn_in_channels: [256, 512, 1024, 2048]
|
||||
fpn_out_channels: 256
|
||||
fpn_num_outs: 5
|
||||
|
||||
# rpn
|
||||
rpn_in_channels: 256
|
||||
rpn_feat_channels: 256
|
||||
rpn_loss_cls_weight: 1.0
|
||||
rpn_loss_reg_weight: 1.0
|
||||
rpn_cls_out_channels: 1
|
||||
rpn_target_means: [0., 0., 0., 0.]
|
||||
rpn_target_stds: [1.0, 1.0, 1.0, 1.0]
|
||||
|
||||
# bbox_assign_sampler
|
||||
neg_iou_thr: 0.3
|
||||
pos_iou_thr: 0.7
|
||||
min_pos_iou: 0.3
|
||||
num_bboxes: 245520
|
||||
num_gts: 128
|
||||
num_expected_neg: 256
|
||||
num_expected_pos: 128
|
||||
|
||||
# proposal
|
||||
activate_num_classes: 2
|
||||
use_sigmoid_cls: True
|
||||
|
||||
# roi_align
|
||||
roi_layer: {type: 'RoIAlign', out_size: 7, sample_num: 2}
|
||||
roi_align_out_channels: 256
|
||||
roi_align_featmap_strides: [4, 8, 16, 32]
|
||||
roi_align_finest_scale: 56
|
||||
roi_sample_num: 640
|
||||
|
||||
# bbox_assign_sampler_stage2
|
||||
neg_iou_thr_stage2: 0.5
|
||||
pos_iou_thr_stage2: 0.5
|
||||
min_pos_iou_stage2: 0.5
|
||||
num_bboxes_stage2: 2000
|
||||
num_expected_pos_stage2: 128
|
||||
num_expected_neg_stage2: 512
|
||||
num_expected_total_stage2: 512
|
||||
|
||||
# rcnn
|
||||
rcnn_num_layers: 2
|
||||
rcnn_in_channels: 256
|
||||
rcnn_fc_out_channels: 1024
|
||||
rcnn_loss_cls_weight: 1
|
||||
rcnn_loss_reg_weight: 1
|
||||
rcnn_target_means: [0., 0., 0., 0.]
|
||||
rcnn_target_stds: [0.1, 0.1, 0.2, 0.2]
|
||||
|
||||
# train proposal
|
||||
rpn_proposal_nms_across_levels: False
|
||||
rpn_proposal_nms_pre: 2000
|
||||
rpn_proposal_nms_post: 2000
|
||||
rpn_proposal_max_num: 2000
|
||||
rpn_proposal_nms_thr: 0.7
|
||||
rpn_proposal_min_bbox_size: 0
|
||||
|
||||
# test proposal
|
||||
rpn_nms_across_levels: False
|
||||
rpn_nms_pre: 1000
|
||||
rpn_nms_post: 1000
|
||||
rpn_max_num: 1000
|
||||
rpn_nms_thr: 0.7
|
||||
rpn_min_bbox_min_size: 0
|
||||
test_score_thr: 0.05
|
||||
test_iou_thr: 0.5
|
||||
test_max_per_img: 100
|
||||
test_batch_size: 2
|
||||
|
||||
rpn_head_use_sigmoid: True
|
||||
rpn_head_weight: 1.0
|
||||
|
||||
# LR
|
||||
base_lr: 0.04
|
||||
warmup_step: 500
|
||||
warmup_ratio: 0.0625
|
||||
sgd_step: [8, 11]
|
||||
sgd_momentum: 0.9
|
||||
|
||||
# train
|
||||
batch_size: 2
|
||||
loss_scale: 256
|
||||
momentum: 0.91
|
||||
weight_decay: 0.00001
|
||||
epoch_size: 20
|
||||
save_checkpoint: True
|
||||
save_checkpoint_epochs: 1
|
||||
keep_checkpoint_max: 20
|
||||
save_checkpoint_path: "./"
|
||||
|
||||
# Number of threads used to process the dataset in parallel
|
||||
num_parallel_workers: 8
|
||||
# Parallelize Python operations with multiple worker processes
|
||||
python_multiprocessing: True
|
||||
mindrecord_dir: "/disk2/dataset/COCO2017/MindRecord_COCO_TRAIN"
|
||||
coco_root: "/disk2/dataset/COCO2017"
|
||||
train_data_type: "train2017"
|
||||
val_data_type: "val2017"
|
||||
instance_set: "annotations/instances_{}.json"
|
||||
coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
||||
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
||||
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
||||
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
|
||||
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
||||
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
||||
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
||||
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
||||
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
||||
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
||||
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
||||
'teddy bear', 'hair drier', 'toothbrush']
|
||||
num_classes: 81
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
"""
|
||||
convert resnet50 pretrain model to faster_rcnn backbone pretrain model
|
||||
convert resnet pretrain model to faster_rcnn backbone pretrain model
|
||||
"""
|
||||
import argparse
|
||||
from mindspore.train.serialization import load_checkpoint, save_checkpoint
|
||||
|
@ -26,10 +26,10 @@ parser.add_argument('--ckpt_file', type=str, default='', help='ckpt file path')
|
|||
args_opt = parser.parse_args()
|
||||
def load_weights(model_path, use_fp16_weight):
|
||||
"""
|
||||
load resnet50 pretrain checkpoint file.
|
||||
load resnet pretrain checkpoint file.
|
||||
|
||||
Args:
|
||||
model_path (str): resnet50 pretrain checkpoint file .
|
||||
model_path (str): resnet pretrain checkpoint file .
|
||||
use_fp16_weight(bool): whether save weight into float16.
|
||||
|
||||
Returns:
|
||||
|
@ -61,4 +61,4 @@ def load_weights(model_path, use_fp16_weight):
|
|||
|
||||
if __name__ == "__main__":
|
||||
parameter_list = load_weights(args_opt.ckpt_file, use_fp16_weight=False)
|
||||
save_checkpoint(parameter_list, "resnet50_backbone.ckpt")
|
||||
save_checkpoint(parameter_list, "resnet_backbone.ckpt")
|
||||
|
|
|
@ -25,8 +25,6 @@ import mmcv
|
|||
import mindspore.dataset as de
|
||||
import mindspore.dataset.vision.c_transforms as C
|
||||
from mindspore.mindrecord import FileWriter
|
||||
from src.config import config
|
||||
|
||||
|
||||
def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
|
||||
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
|
||||
|
@ -160,7 +158,7 @@ class Expand:
|
|||
return img, boxes, labels
|
||||
|
||||
|
||||
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
|
||||
"""rescale operation for image"""
|
||||
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
|
||||
if img_data.shape[0] > config.img_height:
|
||||
|
@ -183,7 +181,7 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
|||
|
||||
return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||
|
||||
def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||
def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config):
|
||||
"""rescale operation for image of eval"""
|
||||
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
|
||||
if img_data.shape[0] > config.img_height:
|
||||
|
@ -203,7 +201,7 @@ def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
|||
return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||
|
||||
|
||||
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
|
||||
"""resize operation for image"""
|
||||
img_data = img
|
||||
img_data, w_scale, h_scale = mmcv.imresize(
|
||||
|
@ -221,7 +219,7 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
|||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||
|
||||
|
||||
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config):
|
||||
"""resize operation for image of eval"""
|
||||
img_data = img
|
||||
img_data, w_scale, h_scale = mmcv.imresize(
|
||||
|
@ -239,7 +237,7 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
|
|||
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
|
||||
|
||||
|
||||
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
||||
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
|
||||
"""impad operation for image"""
|
||||
img_data = mmcv.impad(img, (config.img_height, config.img_width))
|
||||
img_data = img_data.astype(np.float32)
|
||||
|
@ -294,16 +292,16 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
|
|||
return (img, img_shape, gt_bboxes, gt_label, gt_num)
|
||||
|
||||
|
||||
def preprocess_fn(image, box, is_training):
|
||||
def preprocess_fn(image, box, is_training, config):
|
||||
"""Preprocess function for dataset."""
|
||||
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
|
||||
image_shape = image_shape[:2]
|
||||
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
|
||||
|
||||
if config.keep_ratio:
|
||||
input_data = rescale_column_test(*input_data)
|
||||
input_data = rescale_column_test(*input_data, config=config)
|
||||
else:
|
||||
input_data = resize_column_test(*input_data)
|
||||
input_data = resize_column_test(*input_data, config=config)
|
||||
input_data = imnormalize_column(*input_data)
|
||||
|
||||
output_data = transpose_column(*input_data)
|
||||
|
@ -336,9 +334,9 @@ def preprocess_fn(image, box, is_training):
|
|||
if expand:
|
||||
input_data = expand_column(*input_data)
|
||||
if config.keep_ratio:
|
||||
input_data = rescale_column(*input_data)
|
||||
input_data = rescale_column(*input_data, config=config)
|
||||
else:
|
||||
input_data = resize_column(*input_data)
|
||||
input_data = resize_column(*input_data, config=config)
|
||||
input_data = imnormalize_column(*input_data)
|
||||
if flip:
|
||||
input_data = flip_column(*input_data)
|
||||
|
@ -349,7 +347,7 @@ def preprocess_fn(image, box, is_training):
|
|||
return _data_aug(image, box, is_training)
|
||||
|
||||
|
||||
def create_coco_label(is_training):
|
||||
def create_coco_label(is_training, config):
|
||||
"""Get image path and annotation from COCO."""
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
|
@ -431,13 +429,13 @@ def filter_valid_data(image_dir, anno_path):
|
|||
return image_files, image_anno_dict
|
||||
|
||||
|
||||
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
|
||||
def data_to_mindrecord_byte_image(config, dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
|
||||
"""Create MindRecord file."""
|
||||
mindrecord_dir = config.mindrecord_dir
|
||||
mindrecord_path = os.path.join(mindrecord_dir, prefix)
|
||||
writer = FileWriter(mindrecord_path, file_num)
|
||||
if dataset == "coco":
|
||||
image_files, image_anno_dict = create_coco_label(is_training)
|
||||
image_files, image_anno_dict = create_coco_label(is_training, config=config)
|
||||
else:
|
||||
image_files, image_anno_dict = filter_valid_data(config.IMAGE_DIR, config.ANNO_PATH)
|
||||
|
||||
|
@ -456,7 +454,7 @@ def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fast
|
|||
writer.commit()
|
||||
|
||||
|
||||
def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id=0, is_training=True,
|
||||
def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num=1, rank_id=0, is_training=True,
|
||||
num_parallel_workers=8, python_multiprocessing=False):
|
||||
"""Create FasterRcnn dataset with MindDataset."""
|
||||
cv2.setNumThreads(0)
|
||||
|
@ -465,7 +463,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_
|
|||
num_parallel_workers=4, shuffle=is_training)
|
||||
decode = C.Decode()
|
||||
ds = ds.map(input_columns=["image"], operations=decode)
|
||||
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))
|
||||
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training, config=config))
|
||||
|
||||
if is_training:
|
||||
ds = ds.map(input_columns=["image", "annotation"],
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -31,11 +31,10 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
|||
from mindspore.nn import SGD
|
||||
from mindspore.common import set_seed
|
||||
|
||||
from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50
|
||||
from src.network_define import LossCallBack, WithLossCell, TrainOneStepCell, LossNet
|
||||
from src.config import config
|
||||
from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset
|
||||
from src.lr_schedule import dynamic_lr
|
||||
import src.config as cfg
|
||||
|
||||
set_seed(1)
|
||||
|
||||
|
@ -48,10 +47,25 @@ parser.add_argument("--device_target", type=str, default="Ascend",
|
|||
parser.add_argument("--device_id", type=int, default=0, help="Device id, default: 0.")
|
||||
parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default: 1.")
|
||||
parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default: 0.")
|
||||
parser.add_argument("--backbone", type=str, required=True, \
|
||||
help="backbone network name, options:resnet_v1_50, resnet_v1.5_50, resnet_v1_101, resnet_v1_152")
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=args_opt.device_id)
|
||||
|
||||
if args_opt.backbone in ("resnet_v1.5_50", "resnet_v1_101", "resnet_v1_152"):
|
||||
from src.FasterRcnn.faster_rcnn_resnet import Faster_Rcnn_Resnet
|
||||
if args_opt.backbone == "resnet_v1.5_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_101":
|
||||
config = cfg.get_config("./src/config_101.yaml")
|
||||
elif args_opt.backbone == "resnet_v1_152":
|
||||
config = cfg.get_config("./src/config_152.yaml")
|
||||
|
||||
elif args_opt.backbone == "resnet_v1_50":
|
||||
config = cfg.get_config("./src/config_50.yaml")
|
||||
from src.FasterRcnn.faster_rcnn_resnet50v1 import Faster_Rcnn_Resnet
|
||||
|
||||
if __name__ == '__main__':
|
||||
if args_opt.device_target == "GPU":
|
||||
context.set_context(enable_graph_kernel=True)
|
||||
|
@ -91,7 +105,7 @@ if __name__ == '__main__':
|
|||
print("Please make sure config:coco_root is valid.")
|
||||
raise ValueError(config.coco_root)
|
||||
print("Create Mindrecord. It may take some time.")
|
||||
data_to_mindrecord_byte_image("coco", True, prefix)
|
||||
data_to_mindrecord_byte_image(config, "coco", True, prefix)
|
||||
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
|
||||
else:
|
||||
print("coco_root not exits.")
|
||||
|
@ -101,7 +115,7 @@ if __name__ == '__main__':
|
|||
print("Please make sure config:image_dir is valid.")
|
||||
raise ValueError(config.image_dir)
|
||||
print("Create Mindrecord. It may take some time.")
|
||||
data_to_mindrecord_byte_image("other", True, prefix)
|
||||
data_to_mindrecord_byte_image(config, "other", True, prefix)
|
||||
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
|
||||
else:
|
||||
print("image_dir or anno_path not exits.")
|
||||
|
@ -114,7 +128,7 @@ if __name__ == '__main__':
|
|||
loss_scale = float(config.loss_scale)
|
||||
|
||||
# When create MindDataset, using the fitst mindrecord file, such as FasterRcnn.mindrecord0.
|
||||
dataset = create_fasterrcnn_dataset(mindrecord_file, batch_size=config.batch_size,
|
||||
dataset = create_fasterrcnn_dataset(config, mindrecord_file, batch_size=config.batch_size,
|
||||
device_num=device_num, rank_id=rank,
|
||||
num_parallel_workers=config.num_parallel_workers,
|
||||
python_multiprocessing=config.python_multiprocessing)
|
||||
|
@ -122,7 +136,7 @@ if __name__ == '__main__':
|
|||
dataset_size = dataset.get_dataset_size()
|
||||
print("Create dataset done!")
|
||||
|
||||
net = Faster_Rcnn_Resnet50(config=config)
|
||||
net = Faster_Rcnn_Resnet(config=config)
|
||||
net = net.set_train()
|
||||
|
||||
load_path = args_opt.pre_trained
|
||||
|
|
Loading…
Reference in New Issue