!16549 modify model_zoo ssd_ghostnet for clould

From: @Somnus2020
Reviewed-by: @oacjiewen,@wuxuejian
Signed-off-by: @wuxuejian
This commit is contained in:
mindspore-ci-bot 2021-05-19 11:30:01 +08:00 committed by Gitee
commit 5e1e4f3ba8
30 changed files with 553 additions and 244 deletions

View File

@ -45,7 +45,7 @@ def test_net(data_dir,
load_param_into_net(net, param_dict)
net = UnetEval(net)
if hasattr(config, "dataset") and config.dataset != "ISBI":
split = config.split if hasattr(config, "dataset") else 0.8
split = config.split if hasattr(config, "split") else 0.8
valid_dataset = create_multi_class_dataset(data_dir, config.image_size, 1, 1,
num_classes=config.num_classes, is_train=False,
eval_resize=config.eval_resize, split=split,

View File

@ -109,7 +109,7 @@ If you want to run in modelarts, please check the official documentation of [mod
├── src
│ ├── dataset.py // Processing dataset
│ ├── textcnn.py // textcnn architecture
├── utils
├── model_utils
│ ├──device_adapter.py // device adapter
│ ├──local_adapter.py // local adapter
│ ├──moxing_adapter.py // moxing adapter

View File

@ -22,9 +22,9 @@ from mindspore import context
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id
from src.model_utils.config import config
from model_utils.moxing_adapter import moxing_wrapper
from model_utils.device_adapter import get_device_id
from model_utils.config import config
from src.textcnn import TextCNN
from src.dataset import MovieReview, SST2, Subjectivity

View File

@ -20,7 +20,7 @@ import numpy as np
from mindspore import Tensor, load_checkpoint, load_param_into_net, export, context
from src.model_utils.config import config
from model_utils.config import config
from src.textcnn import TextCNN
from src.dataset import MovieReview, SST2, Subjectivity
@ -42,7 +42,7 @@ if __name__ == '__main__':
net = TextCNN(vocab_len=instance.get_dict_len(), word_len=config.word_len,
num_classes=config.num_classes, vec_length=config.vec_length)
param_dict = load_checkpoint(config.ckpt_file)
param_dict = load_checkpoint(config.checkpoint_file_path)
load_param_into_net(net, param_dict)
input_arr = Tensor(np.ones([config.batch_size, config.word_len], np.int32))

View File

@ -0,0 +1,27 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Device adapter for ModelArts"""
from model_utils.config import config
if config.enable_modelarts:
from model_utils.moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
else:
from model_utils.local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
__all__ = [
"get_device_id", "get_device_num", "get_rank_id", "get_job_id"
]

View File

@ -0,0 +1,115 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Moxing adapter for ModelArts"""
import os
import functools
from mindspore import context
from model_utils.config import config
_global_sync_count = 0
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id
def sync_data(from_path, to_path):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local path
Upload data from local directory to remote obs in contrast.
"""
import moxing as mox
import time
global _global_sync_count
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
_global_sync_count += 1
# Each server contains 8 devices as most.
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print("from path: ", from_path)
print("to path: ", to_path)
mox.file.copy_parallel(from_path, to_path)
print("===finish data synchronization===")
try:
os.mknod(sync_lock)
except IOError:
pass
print("===save flag===")
while True:
if os.path.exists(sync_lock):
break
time.sleep(1)
print("Finish sync data from {} to {}.".format(from_path, to_path))
def moxing_wrapper(pre_process=None, post_process=None):
"""
Moxing wrapper to download dataset and upload outputs.
"""
def wrapper(run_func):
@functools.wraps(run_func)
def wrapped_func(*args, **kwargs):
# Download data from data_url
if config.enable_modelarts:
if config.data_url:
sync_data(config.data_url, config.data_path)
print("Dataset downloaded: ", os.listdir(config.data_path))
if config.checkpoint_url:
sync_data(config.checkpoint_url, config.load_path)
print("Preload downloaded: ", os.listdir(config.load_path))
if config.train_url:
sync_data(config.train_url, config.output_path)
print("Workspace downloaded: ", os.listdir(config.output_path))
context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
config.device_num = get_device_num()
config.device_id = get_device_id()
if not os.path.exists(config.output_path):
os.makedirs(config.output_path)
if pre_process:
pre_process()
run_func(*args, **kwargs)
# Upload data to train_url
if config.enable_modelarts:
if post_process:
post_process()
if config.train_url:
print("Start to copy output directory")
sync_data(config.output_path, config.train_url)
return wrapped_func
return wrapper

View File

@ -31,9 +31,8 @@ result_path: './preprocess_Result/'
# Export options
device_id: 0
ckpt_file: ""
file_name: ""
file_format: ""
file_name: "textcnn"
file_format: "AIR"
---
# Help description for each configuration

View File

@ -18,7 +18,7 @@
import os
import numpy as np
from mindspore.nn.metrics import Accuracy
from src.model_utils.config import config
from model_utils.config import config
if __name__ == '__main__':

View File

@ -17,7 +17,7 @@
"""
import os
import numpy as np
from src.model_utils.config import config
from model_utils.config import config
from src.dataset import MovieReview, SST2, Subjectivity
if __name__ == '__main__':

View File

@ -31,9 +31,8 @@ result_path: './preprocess_Result/'
# Export options
device_id: 0
ckpt_file: ""
file_name: ""
file_format: ""
file_name: "textcnn"
file_format: "AIR"
---
# Help description for each configuration

View File

@ -31,9 +31,8 @@ result_path: './preprocess_Result/'
# Export options
device_id: 0
ckpt_file: ""
file_name: ""
file_format: ""
file_name: "textcnn"
file_format: "AIR"
---
# Help description for each configuration

View File

@ -26,9 +26,9 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMoni
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id, get_rank_id
from src.model_utils.config import config
from model_utils.moxing_adapter import moxing_wrapper
from model_utils.device_adapter import get_device_id, get_rank_id
from model_utils.config import config
from src.textcnn import TextCNN
from src.textcnn import SoftmaxCrossEntropyExpand
from src.dataset import MovieReview, SST2, Subjectivity

View File

@ -83,6 +83,37 @@ sh run_distribute_train_ghostnet.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] [RA
python eval.py --device_id 0 --dataset coco --checkpoint_path LOG4/ssd-500_458.ckpt
```
If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training and evaluation as follows:
```python
# run distributed training on modelarts example
# (1) First, Perform a or b.
# a. Set "enable_modelarts=True" on yaml file.
# Set other parameters on yaml file you need.
# b. Add "enable_modelarts=True" on the website UI interface.
# Add other parameters on the website UI interface.
# (2) Set the Dataset directory in config file.
# (3) Set the code directory to "/path/ssd_ghostne" on the website UI interface.
# (4) Set the startup file to "train.py" on the website UI interface.
# (5) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
# (6) Create your job.
# run evaluation on modelarts example
# (1) Copy or upload your trained model to S3 bucket.
# (2) Perform a or b.
# a. Set "enable_modelarts=True" on yaml file.
# Set "checkpoint_file_path='/cache/checkpoint_path/model.ckpt'" on yaml file.
# Set "checkpoint_url=/The path of checkpoint in S3/" on yaml file.
# b. Add "enable_modelarts=True" on the website UI interface.
# Add "checkpoint_file_path='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
# Add "checkpoint_url=/The path of checkpoint in S3/" on the website UI interface.
# (3) Set the Dataset directory in config file.
# (4) Set the code directory to "/path/ssd_ghostnet" on the website UI interface.
# (5) Set the startup file to "eval.py" on the website UI interface.
# (6) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
# (7) Create your job.
```
# [Script Description](#contents)
## [Script and Sample Code](#contents)
@ -96,20 +127,25 @@ python eval.py --device_id 0 --dataset coco --checkpoint_path LOG4/ssd-500_458.c
├── src
├─ box_util.py ## bbox utils
├─ coco_eval.py ## coco metrics utils
├─ config_ghostnet_13x.py ## total config
├─ dataset.py ## create dataset and process dataset
├─ init_params.py ## parameters utils
├─ lr_schedule.py ## learning ratio generator
└─ ssd_ghostnet.py ## ssd architecture
├── model_utils
│ ├── config.py ## parameter configuration
│ ├── device_adapter.py ## device adapter
│ ├── local_adapter.py ## local adapter
│ ├── moxing_adapter.py ## moxing adapter
├── default_config.yaml ## parameter configuration
├── eval.py ## eval scripts
├── train.py ## train scripts
├── mindspore_hub_conf.py # export model for hub
├── mindspore_hub_conf.py ## export model for hub
```
## [Script Parameters](#contents)
```python
Major parameters in train.py and config_ghostnet_13x.py as follows:
Major parameters in train.py and default_config.yaml as follows:
"device_num": 1 # Use device nums
"lr": 0.05 # Learning rate init value

View File

@ -0,0 +1,108 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
# Url for modelarts
data_url: ""
train_url: ""
checkpoint_url: ""
# Path for local
run_distribute: False
enable_profiling: False
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path/"
device_target: 'Ascend'
checkpoint_path: './checkpoint/'
checkpoint_file_path: 'ssd-500_458.ckpt'
# ==============================================================================
# Training options
sink_mode: "sink"
dataset: "coco"
device_id: 0
device_num: 8
only_create_dataset: False
img_shape: [300, 300]
num_ssd_boxes: 1917
neg_pre_positive: 3
match_thershold: 0.5
nms_thershold: 0.6
min_score: 0.1
max_boxes: 100
epoch_size: 500
batch_size: 32
loss_scale: 1024
#learning rate settings
lr: 0.5
global_step: 0
lr_init: 0.001
lr_end_rate: 0.001
warmup_epochs: 2
momentum: 0.9
weight_decay: 0.00015
filter_weight: False
# network
num_default: [3, 6, 6, 6, 6, 6]
extras_in_channels: [256, 864, 1248, 512, 256, 256]
extras_out_channels: [864, 1248, 512, 256, 256, 128]
extras_srides: [1, 1, 2, 2, 2, 2]
extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
feature_size: [19, 10, 5, 3, 2, 1]
min_scale: 0.2
max_scale: 0.95
aspect_ratios: [[2,], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
steps: [16, 32, 64, 100, 150, 300]
prior_scaling: [0.1, 0.2]
gamma: 2.0
alpha: 0.75
pre_trained: ""
pre_trained_epoch_size: 0
save_checkpoint_epochs: 10
file_name: "ssd_ghostnet"
file_format: "AIR"
# `mindrecord_dir` and `coco_root` are better to use absolute path.
train_data_type: "train2017"
val_data_type: "val2017"
instances_set: "annotations/instances_{}.json"
coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
num_classes: 81
# The annotation.json position of voc validation dataset.
voc_root: ""
# voc original dataset.
voc_dir: ""
# if coco or voc used, `image_dir` and `anno_path` are useless.
image_dir: ""
anno_path: ""
---
# Help description for each configuration
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
checkpoint_url: 'The location of checkpoint for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'
load_path: 'The location of checkpoint for obs'
device_target: 'Target device type, available: [Ascend, GPU, CPU]'
enable_profiling: 'Whether enable profiling while training, default: False'
num_classes: 'Class for dataset'
batch_size: "Batch size for training and evaluation"
epoch_size: "Total training epochs."
keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
checkpoint_path: "The location of the checkpoint file."
checkpoint_file_path: "The location of the checkpoint file."

View File

@ -16,15 +16,16 @@
"""Evaluation for SSD"""
import os
import argparse
import time
import numpy as np
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.ssd_ghostnet import SSD300, ssd_ghostnet
from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image, voc_data_to_mindrecord
from src.config_ghostnet_13x import config
from src.coco_eval import metrics
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id
def ssd_eval(dataset_path, ckpt_path):
@ -32,7 +33,7 @@ def ssd_eval(dataset_path, ckpt_path):
batch_size = 1
ds = create_ssd_dataset(
dataset_path, batch_size=batch_size, repeat_num=1, is_training=False)
net = SSD300(ssd_ghostnet(), config, is_training=False)
net = SSD300(ssd_ghostnet(), is_training=False)
print("Load Checkpoint!")
param_dict = load_checkpoint(ckpt_path)
net.init_parameters_data()
@ -68,35 +69,29 @@ def ssd_eval(dataset_path, ckpt_path):
print(f"mAP: {mAP}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='SSD evaluation')
parser.add_argument("--device_id", type=int, default=0,
help="Device id, default is 0.")
parser.add_argument("--dataset", type=str, default="coco",
help="Dataset, default is coco.")
parser.add_argument("--checkpoint_path", type=str,
required=True, help="Checkpoint file path.")
args_opt = parser.parse_args()
@moxing_wrapper()
def eval_net():
"""eval net"""
context.set_context(mode=context.GRAPH_MODE,
device_target="Ascend", device_id=args_opt.device_id)
device_target="Ascend", device_id=get_device_id())
prefix = "ssd_eval.mindrecord"
mindrecord_dir = config.mindrecord_dir
mindrecord_dir = os.path.join(config.data_path, "MindRecord_COCO")
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if args_opt.dataset == "voc":
config.coco_root = config.voc_root
if config.dataset == "voc":
coco_root = config.voc_root
if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco":
if os.path.isdir(config.coco_root):
if config.dataset == "coco":
coco_root = os.path.join(config.data_path, "coco_ori")
if os.path.isdir(coco_root):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", False, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("coco_root not exits.")
elif args_opt.dataset == "voc":
elif config.dataset == "voc":
if os.path.isdir(config.voc_dir) and os.path.isdir(config.voc_root):
print("Create Mindrecord.")
voc_data_to_mindrecord(mindrecord_dir, False, prefix)
@ -112,4 +107,7 @@ if __name__ == '__main__':
print("IMAGE_DIR or ANNO_PATH not exits.")
print("Start Eval!")
ssd_eval(mindrecord_file, args_opt.checkpoint_path)
ssd_eval(mindrecord_file, config.checkpoint_file_path)
if __name__ == '__main__':
eval_net()

View File

@ -14,35 +14,24 @@
# ============================================================================
"""export"""
import argparse
import numpy as np
from mindspore import Tensor
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net, export
from src.ssd_ghostnet import SSD300, ssd_ghostnet
from src.config_ghostnet_13x import config
from src.model_utils.config import config
parser = argparse.ArgumentParser(description="openpose export")
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--file_name", type=str, default="ssd_ghostnet", help="output file name.")
parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="AIR", help="file format")
parser.add_argument("--device_target", type=str, default="Ascend",
choices=["Ascend", "GPU", "CPU"], help="device target (default: Ascend)")
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
if __name__ == "__main__":
context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
# define net
net = SSD300(ssd_ghostnet(), config, is_training=False)
net = SSD300(ssd_ghostnet(), is_training=False)
# load checkpoint
param_dict = load_checkpoint(args.ckpt_file)
param_dict = load_checkpoint(config.checkpoint_file_path)
load_param_into_net(net, param_dict)
input_shape = config["img_shape"]
inputs = np.ones([args.batch_size, 3, input_shape[0], input_shape[1]]).astype(np.float32)
export(net, Tensor(inputs), file_name=args.file_name, file_format=args.file_format)
input_shape = config.img_shape
inputs = np.ones([config.batch_size, 3, input_shape[0], input_shape[1]]).astype(np.float32)
export(net, Tensor(inputs), file_name=config.file_name, file_format=config.file_format)

View File

@ -14,9 +14,8 @@
# ============================================================================
"""hub config."""
from src.ssd_ghostnet import SSD300, ssd_ghostnet
from src.config_ghostnet_13x import config
def create_network(name, *args, **kwargs):
if name == 'ghostnet_ssd':
return SSD300(ssd_ghostnet(), config, **kwargs)
return SSD300(ssd_ghostnet(), **kwargs)
raise NotImplementedError(f"{name} is not implemented in the repo")

View File

@ -50,6 +50,7 @@ do
mkdir ./LOG$i
cp ./*.py ./LOG$i
cp -r ./src ./LOG$i
cp -r ./*yaml ./LOG$i
cd ./LOG$i || exit
export RANK_ID=$i
echo "start training for rank $i, device $DEVICE_ID"
@ -57,25 +58,27 @@ do
if [ $# == 5 ]
then
python train.py \
--distribute=True \
--run_distribute=True \
--lr=$LR \
--dataset=$DATASET \
--device_num=$RANK_SIZE \
--device_id=$DEVICE_ID \
--epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
--epoch_size=$EPOCH_SIZE \
--output_path './output' > log.txt 2>&1 &
fi
if [ $# == 7 ]
then
python train.py \
--distribute=True \
--run_distribute=True \
--lr=$LR \
--dataset=$DATASET \
--device_num=$RANK_SIZE \
--device_id=$DEVICE_ID \
--pre_trained=$PRE_TRAINED \
--pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
--epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
--epoch_size=$EPOCH_SIZE \
--output_path './output' > log.txt 2>&1 &
fi
cd ../

View File

@ -18,7 +18,7 @@
import math
import itertools as it
import numpy as np
from .config_ghostnet_13x import config
from src.model_utils.config import config
class GeneratDefaultBoxes():

View File

@ -17,7 +17,7 @@
import os
import json
import numpy as np
from .config_ghostnet_13x import config
from src.model_utils.config import config
from .box_utils import ssd_bboxes_decode
@ -62,7 +62,7 @@ def metrics(pred_data):
from pycocotools.cocoeval import COCOeval
num_classes = config.num_classes
coco_root = config.coco_root
coco_root = os.path.join(config.data_path, "coco_ori")
data_type = config.val_data_type
# Classes need to train or test.

View File

@ -1,81 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# " ============================================================================
"""Config parameters for SSD models."""
from easydict import EasyDict as ed
config = ed({
"img_shape": [300, 300],
"num_ssd_boxes": 1917,
"neg_pre_positive": 3,
"match_thershold": 0.5,
"nms_thershold": 0.6,
"min_score": 0.1,
"max_boxes": 100,
# learing rate settings
"global_step": 0,
"lr_init": 0.001,
"lr_end_rate": 0.001,
"warmup_epochs": 2,
"momentum": 0.9,
"weight_decay": 1.5e-4,
# network
"num_default": [3, 6, 6, 6, 6, 6],
"extras_in_channels": [256, 864, 1248, 512, 256, 256],
"extras_out_channels": [864, 1248, 512, 256, 256, 128],
"extras_srides": [1, 1, 2, 2, 2, 2],
"extras_ratio": [0.2, 0.2, 0.2, 0.25, 0.5, 0.25],
"feature_size": [19, 10, 5, 3, 2, 1],
"min_scale": 0.2,
"max_scale": 0.95,
"aspect_ratios": [(2,), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)],
"steps": (16, 32, 64, 100, 150, 300),
"prior_scaling": (0.1, 0.2),
"gamma": 2.0,
"alpha": 0.75,
# `mindrecord_dir` and `coco_root` are better to use absolute path.
"mindrecord_dir": "/ssd0/liuchuanjian/mscoco2017/MindRecord_COCO",
"coco_root": "/ssd0/liuchuanjian/mscoco2017",
"train_data_type": "train2017",
"val_data_type": "val2017",
"instances_set": "annotations/instances_{}.json",
"coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'),
"num_classes": 81,
# The annotation.json position of voc validation dataset.
"voc_root": "",
# voc original dataset.
"voc_dir": "",
# if coco or voc used, `image_dir` and `anno_path` are useless.
"image_dir": "",
"anno_path": "",
})

View File

@ -26,7 +26,7 @@ import cv2
import mindspore.dataset as de
import mindspore.dataset.vision.c_transforms as C2
from mindspore.mindrecord import FileWriter
from .config_ghostnet_13x import config
from src.model_utils.config import config
from .box_utils import jaccard_numpy, ssd_bboxes_encode
@ -252,7 +252,7 @@ def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
coco_root = config.coco_root
coco_root = os.path.join(config.data_path, "coco_ori")
data_type = config.val_data_type
if is_training:
data_type = config.train_data_type
@ -364,7 +364,7 @@ def voc_data_to_mindrecord(mindrecord_dir, is_training, prefix="ssd.mindrecord",
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8):
"""Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir
mindrecord_dir = os.path.join(config.data_path, "MindRecord_COCO")
mindrecord_path = os.path.join(mindrecord_dir, prefix)
writer = FileWriter(mindrecord_path, file_num)
if dataset == "coco":

View File

@ -0,0 +1,124 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Parse arguments"""
import os
import ast
import argparse
from pprint import pformat
import yaml
_config_path = "./default_config.yaml"
class Config:
"""
Configuration namespace. Convert dictionary to members.
"""
def __init__(self, cfg_dict):
for k, v in cfg_dict.items():
if isinstance(v, (list, tuple)):
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, Config(v) if isinstance(v, dict) else v)
def __str__(self):
return pformat(self.__dict__)
def __repr__(self):
return self.__str__()
def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
"""
Parse command line arguments to the configuration according to the default yaml.
Args:
parser: Parent parser.
cfg: Base configuration.
helper: Helper description.
cfg_path: Path to the default yaml config.
"""
parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
parents=[parser])
helper = {} if helper is None else helper
choices = {} if choices is None else choices
for item in cfg:
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
choice = choices[item] if item in choices else None
if isinstance(cfg[item], bool):
parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
help=help_description)
else:
parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
help=help_description)
args = parser.parse_args()
return args
def parse_yaml(yaml_path):
"""
Parse the yaml config file.
Args:
yaml_path: Path to the yaml config.
"""
with open(yaml_path, 'r') as fin:
try:
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
cfgs = [x for x in cfgs]
if len(cfgs) == 1:
cfg_helper = {}
cfg = cfgs[0]
elif len(cfgs) == 2:
cfg, cfg_helper = cfgs
else:
raise ValueError("At most 2 docs (config and help description for help) are supported in config yaml")
print(cfg_helper)
except:
raise ValueError("Failed to parse yaml")
return cfg, cfg_helper
def merge(args, cfg):
"""
Merge the base config from yaml file and command line arguments.
Args:
args: Command line arguments.
cfg: Base configuration.
"""
args_var = vars(args)
for item in args_var:
cfg[item] = args_var[item]
return cfg
def get_config():
"""
Get Config according to the yaml file and cli arguments.
"""
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../default_config.yaml"),
help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper = parse_yaml(path_args.config_path)
args = parse_cli_to_yaml(parser, default, helper, path_args.config_path)
final_config = merge(args, default)
return Config(final_config)
config = get_config()

View File

@ -0,0 +1,36 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Local adapter"""
import os
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
return "Local Job"

View File

@ -26,6 +26,7 @@ from mindspore.communication.management import get_group_size
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from src.model_utils.config import config
def _make_divisible(x, divisor=4):
@ -364,14 +365,11 @@ class FlattenConcat(nn.Cell):
"""
Concatenate predictions into a single tensor.
Args:
config (dict): The default config of SSD.
Returns:
Tensor, flatten predictions.
"""
def __init__(self, config):
def __init__(self):
super(FlattenConcat, self).__init__()
self.num_ssd_boxes = config.num_ssd_boxes
self.concat = P.Concat(axis=1)
@ -391,15 +389,12 @@ class MultiBox(nn.Cell):
"""
Multibox conv layers. Each multibox layer contains class conf scores and localization predictions.
Args:
config (dict): The default config of SSD.
Returns:
Tensor, localization predictions.
Tensor, class conf scores.
"""
def __init__(self, config):
def __init__(self):
super(MultiBox, self).__init__()
num_classes = config.num_classes
out_channels = config.extras_out_channels
@ -415,7 +410,7 @@ class MultiBox(nn.Cell):
self.multi_loc_layers = nn.layer.CellList(loc_layers)
self.multi_cls_layers = nn.layer.CellList(cls_layers)
self.flatten_concat = FlattenConcat(config)
self.flatten_concat = FlattenConcat()
def construct(self, inputs):
loc_outputs = ()
@ -432,18 +427,16 @@ class SSD300(nn.Cell):
Args:
backbone (Cell): Backbone Network.
config (dict): The default config of SSD.
Returns:
Tensor, localization predictions.
Tensor, class conf scores.
Examples:backbone
SSD300(backbone=resnet34(num_classes=None),
config=config).
SSD300(backbone=resnet34(num_classes=None)).
"""
def __init__(self, backbone, config, is_training=True, **kwargs):
def __init__(self, backbone, is_training=True, **kwargs):
super(SSD300, self).__init__()
self.backbone = backbone
@ -457,7 +450,7 @@ class SSD300(nn.Cell):
expand_ratio=ratios[i], last_relu=True)
residual_list.append(residual)
self.multi_residual = nn.layer.CellList(residual_list)
self.multi_box = MultiBox(config)
self.multi_box = MultiBox()
self.is_training = is_training
if not is_training:
self.activation = P.Sigmoid()
@ -520,13 +513,12 @@ class SSDWithLossCell(nn.Cell):
Args:
network (Cell): The training network.
config (dict): SSD config.
Returns:
Tensor, the loss of the network.
"""
def __init__(self, network, config):
def __init__(self, network):
super(SSDWithLossCell, self).__init__()
self.network = network
self.less = P.Less()

View File

@ -16,8 +16,6 @@
"""Train SSD and get checkpoint files."""
import os
import argparse
import ast
import mindspore.nn as nn
from mindspore import context, Tensor
from mindspore.communication.management import init
@ -26,83 +24,50 @@ from mindspore.train import Model
from mindspore.context import ParallelMode
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.ssd_ghostnet import SSD300, SSDWithLossCell, TrainingWrapper, ssd_ghostnet
from src.config_ghostnet_13x import config
from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image, voc_data_to_mindrecord
from src.lr_schedule import get_lr
from src.init_params import init_net_param, filter_checkpoint_parameter
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
def get_args():
"""
Parse arguments
"""
parser = argparse.ArgumentParser(description="SSD training")
parser.add_argument("--only_create_dataset", type=ast.literal_eval, default=False,
help="If set it true, only create Mindrecord, default is False.")
parser.add_argument("--distribute", type=ast.literal_eval, default=False,
help="Run distribute, default is False.")
parser.add_argument("--device_id", type=int, default=4,
help="Device id, default is 0.")
parser.add_argument("--device_num", type=int, default=1,
help="Use device nums, default is 1.")
parser.add_argument("--lr", type=float, default=0.05,
help="Learning rate, default is 0.05.")
parser.add_argument("--mode", type=str, default="sink",
help="Run sink mode or not, default is sink.")
parser.add_argument("--dataset", type=str, default="coco",
help="Dataset, default is coco.")
parser.add_argument("--epoch_size", type=int, default=500,
help="Epoch size, default is 500.")
parser.add_argument("--batch_size", type=int, default=32,
help="Batch size, default is 32.")
parser.add_argument("--pre_trained", type=str, default=None,
help="Pretrained Checkpoint file path.")
parser.add_argument("--pre_trained_epoch_size", type=int,
default=0, help="Pretrained epoch size.")
parser.add_argument("--save_checkpoint_epochs", type=int,
default=10, help="Save checkpoint epochs, default is 10.")
parser.add_argument("--loss_scale", type=int, default=1024,
help="Loss scale, default is 1024.")
parser.add_argument("--filter_weight", type=ast.literal_eval, default=False,
help="Filter weight parameters, default is False.")
args_opt = parser.parse_args()
return args_opt
def main():
args_opt = get_args()
@moxing_wrapper()
def train_net():
"""train net"""
context.set_context(mode=context.GRAPH_MODE,
device_target="Ascend", device_id=args_opt.device_id)
device_target="Ascend", device_id=config.device_id)
if args_opt.distribute:
device_num = args_opt.device_num
if config.run_distribute:
device_num = config.device_num
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
device_num=device_num)
init()
rank = args_opt.device_id % device_num
rank = config.device_id % device_num
else:
rank = 0
device_num = 1
print("Start create dataset!")
# It will generate mindrecord file in args_opt.mindrecord_dir,
# It will generate mindrecord file in config.mindrecord_dir,
# and the file name is ssd.mindrecord0, 1, ... file_num.
prefix = "ssd.mindrecord"
mindrecord_dir = config.mindrecord_dir
mindrecord_dir = os.path.join(config.data_path, "MindRecord_COCO")
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco":
if os.path.isdir(config.coco_root):
if config.dataset == "coco":
coco_root = os.path.join(config.data_path, "coco_ori")
if os.path.isdir(coco_root):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", True, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("coco_root not exits.")
elif args_opt.dataset == "voc":
elif config.dataset == "voc":
if os.path.isdir(config.voc_dir):
print("Create Mindrecord.")
voc_data_to_mindrecord(mindrecord_dir, True, prefix)
@ -117,40 +82,41 @@ def main():
else:
print("image_dir or anno_path not exits.")
if not args_opt.only_create_dataset:
loss_scale = float(args_opt.loss_scale)
if not config.only_create_dataset:
loss_scale = float(config.loss_scale)
# When create MindDataset, using the fitst mindrecord file, such as ssd.mindrecord0.
dataset = create_ssd_dataset(mindrecord_file, repeat_num=1,
batch_size=args_opt.batch_size, device_num=device_num, rank=rank)
batch_size=config.batch_size, device_num=device_num, rank=rank)
dataset_size = dataset.get_dataset_size()
print("Create dataset done!")
backbone = ssd_ghostnet()
ssd = SSD300(backbone=backbone, config=config)
net = SSDWithLossCell(ssd, config)
ssd = SSD300(backbone=backbone)
net = SSDWithLossCell(ssd)
init_net_param(net)
# checkpoint
ckpt_save_dir = os.path.join(config.output_path, config.checkpoint_path)
ckpt_config = CheckpointConfig(
save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs, keep_checkpoint_max=60)
save_checkpoint_steps=dataset_size * config.save_checkpoint_epochs, keep_checkpoint_max=60)
ckpoint_cb = ModelCheckpoint(
prefix="ssd", directory=None, config=ckpt_config)
prefix="ssd", directory=ckpt_save_dir, config=ckpt_config)
if args_opt.pre_trained:
if args_opt.pre_trained_epoch_size <= 0:
if config.pre_trained:
if config.pre_trained_epoch_size <= 0:
raise KeyError(
"pre_trained_epoch_size must be greater than 0.")
param_dict = load_checkpoint(args_opt.pre_trained)
if args_opt.filter_weight:
param_dict = load_checkpoint(config.pre_trained)
if config.filter_weight:
filter_checkpoint_parameter(param_dict)
load_param_into_net(net, param_dict)
lr = Tensor(get_lr(global_step=config.global_step,
lr_init=config.lr_init, lr_end=config.lr_end_rate * args_opt.lr, lr_max=args_opt.lr,
lr_init=config.lr_init, lr_end=config.lr_end_rate * config.lr, lr_max=config.lr,
warmup_epochs=config.warmup_epochs,
total_epochs=args_opt.epoch_size,
total_epochs=config.epoch_size,
steps_per_epoch=dataset_size))
opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
config.momentum, config.weight_decay, loss_scale)
@ -161,13 +127,13 @@ def main():
model = Model(net)
dataset_sink_mode = False
if args_opt.mode == "sink":
if config.sink_mode == "sink":
print("In sink mode, one epoch return a loss.")
dataset_sink_mode = True
print("Start train SSD, the first epoch will be slower because of the graph compilation.")
model.train(args_opt.epoch_size, dataset,
model.train(config.epoch_size, dataset,
callbacks=callback, dataset_sink_mode=dataset_sink_mode)
if __name__ == '__main__':
main()
train_net()