forked from mindspore-Ecosystem/mindspore
!16792 modify FaceDetection network for clould
From: @zhanghuiyao Reviewed-by: @c_34,@wuxuejian Signed-off-by: @c_34
This commit is contained in:
commit
12033501d9
|
@ -84,7 +84,7 @@ Dataset used: [COCO2014](https://cocodataset.org/#download)
|
|||
- Pretrained_backbone can use src/convert_weight.py, convert darknet53.conv.74 to mindspore ckpt.
|
||||
|
||||
```
|
||||
python convert_weight.py --input_file ./darknet53.conv.74
|
||||
python src/convert_weight.py --input_file ./darknet53.conv.74
|
||||
```
|
||||
|
||||
darknet53.conv.74 can get from [download](https://pjreddie.com/media/files/darknet53.conv.74) .
|
||||
|
|
|
@ -88,7 +88,7 @@ YOLOv3使用DarkNet53执行特征提取,这是YOLOv2中的Darknet-19和残差
|
|||
- 使用src路径下的convert_weight.py脚本将darknet53.conv.74转换成mindspore ckpt格式。
|
||||
|
||||
```command
|
||||
python convert_weight.py --input_file ./darknet53.conv.74
|
||||
python src/convert_weight.py --input_file ./darknet53.conv.74
|
||||
```
|
||||
|
||||
可以从网站[下载](https://pjreddie.com/media/files/darknet53.conv.74) darknet53.conv.74文件。
|
||||
|
|
|
@ -14,12 +14,12 @@
|
|||
# ============================================================================
|
||||
"""Convert weight to mindspore ckpt."""
|
||||
import os
|
||||
import argparse
|
||||
import numpy as np
|
||||
from mindspore.train.serialization import save_checkpoint
|
||||
from mindspore import Tensor
|
||||
|
||||
from src.yolo import YOLOV3DarkNet53
|
||||
from model_utils.config import config
|
||||
|
||||
def load_weight(weights_file):
|
||||
"""Loads pre-trained weights."""
|
||||
|
@ -72,9 +72,4 @@ def convert(weights_file, output_file):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="yolov3 weight convert.")
|
||||
parser.add_argument("--input_file", type=str, default="./darknet53.conv.74", help="input file path.")
|
||||
parser.add_argument("--output_file", type=str, default="./backbone_darknet53.ckpt", help="output file path.")
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
convert(args_opt.input_file, args_opt.output_file)
|
||||
convert(config.input_file, config.output_file)
|
||||
|
|
|
@ -75,6 +75,10 @@ file_name: "yolov3_darknet53"
|
|||
file_format: "AIR" # ["AIR", "ONNX", "MINDIR"]
|
||||
|
||||
|
||||
# convert weight option
|
||||
input_file: "./darknet53.conv.74"
|
||||
output_file: "./backbone_darknet53.ckpt"
|
||||
|
||||
# Other default config
|
||||
hue: 0.1
|
||||
saturation: 1.5
|
||||
|
@ -165,4 +169,8 @@ batch_size: "batch size"
|
|||
ckpt_file: "Checkpoint file path."
|
||||
file_name: "output file name."
|
||||
file_format: "file format choices in ['AIR', 'ONNX', 'MINDIR']"
|
||||
device_target: "device target. choices in ['Ascend', 'GPU'] for train. choices in ['Ascend', 'GPU', 'CPU'] for export."
|
||||
device_target: "device target. choices in ['Ascend', 'GPU'] for train. choices in ['Ascend', 'GPU', 'CPU'] for export."
|
||||
|
||||
# convert weight option
|
||||
input_file: "input file path."
|
||||
output_file: "output file path."
|
|
@ -83,10 +83,16 @@ We use about 13K images as training dataset and 3K as evaluating dataset in this
|
|||
|
||||
The entire code structure is as following:
|
||||
|
||||
```python
|
||||
```text
|
||||
.
|
||||
└─ Face Detection
|
||||
├─ README.md
|
||||
├─ model_utils
|
||||
├─ __init__.py # init file
|
||||
├─ config.py # Parse arguments
|
||||
├─ device_adapter.py # Device adapter for ModelArts
|
||||
├─ local_adapter.py # Local adapter
|
||||
└─ moxing_adapter.py # Moxing adapter for ModelArts
|
||||
├─ scripts
|
||||
├─ run_standalone_train.sh # launch standalone training(1p) in ascend
|
||||
├─ run_distribute_train.sh # launch distributed training(8p) in ascend
|
||||
|
@ -98,7 +104,6 @@ The entire code structure is as following:
|
|||
├─ yolo_loss.py # loss function
|
||||
├─ yolo_postprocess.py # post process
|
||||
└─ yolov3.py # network
|
||||
├─ config.py # parameter configuration
|
||||
├─ data_preprocess.py # preprocess
|
||||
├─ logging.py # log function
|
||||
├─ lrsche_factory.py # generate learning rate
|
||||
|
@ -107,6 +112,7 @@ The entire code structure is as following:
|
|||
├─ data_to_mindrecord_train.py # convert dataset to mindrecord for training
|
||||
├─ data_to_mindrecord_train_append.py # add dataset to an existed mindrecord for training
|
||||
└─ data_to_mindrecord_eval.py # convert dataset to mindrecord for evaluating
|
||||
├─ default_config.yaml # default configurations
|
||||
├─ train.py # training scripts
|
||||
├─ eval.py # evaluation scripts
|
||||
└─ export.py # export air model
|
||||
|
@ -158,20 +164,84 @@ The entire code structure is as following:
|
|||
bash run_distribute_train.sh /home/train.mindrecord ./rank_table_8p.json /home/a.ckpt
|
||||
```
|
||||
|
||||
*Distribute mode doesn't support running on CPU*. You will get the loss value of each step as following in "./output/[TIME]/[TIME].log" or "./scripts/device0/train.log":
|
||||
*Distribute mode doesn't support running on CPU*. You will get the loss value of each step as following in "./scripts/device0/output/[TIME]/[TIME].log" or "./scripts/device0/train.log":
|
||||
|
||||
```python
|
||||
rank[0], iter[0], loss[318555.8], overflow:False, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[1], loss[95394.28], overflow:True, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[2], loss[81332.92], overflow:True, loss_scale:512.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[3], loss[27250.805], overflow:True, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
...
|
||||
```python
|
||||
rank[0], iter[0], loss[318555.8], overflow:False, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[1], loss[95394.28], overflow:True, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[2], loss[81332.92], overflow:True, loss_scale:512.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[3], loss[27250.805], overflow:True, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
...
|
||||
rank[0], iter[62496], loss[2218.6282], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62497], loss[3788.5146], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62498], loss[3427.5479], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62499], loss[4294.194], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
```
|
||||
|
||||
rank[0], iter[62496], loss[2218.6282], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62497], loss[3788.5146], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62498], loss[3427.5479], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
rank[0], iter[62499], loss[4294.194], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
|
||||
```
|
||||
- Train on [ModelArts](https://support.huaweicloud.com/modelarts/)
|
||||
|
||||
```python
|
||||
# Train 8p with Ascend
|
||||
# (1) Perform a or b.
|
||||
# a. Set "enable_modelarts=True" on base_config.yaml file.
|
||||
# Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
|
||||
# (optional)Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
|
||||
# (optional)Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
|
||||
# Set other parameters on default_config.yaml file you need.
|
||||
# b. Add "enable_modelarts=True" on the website UI interface.
|
||||
# Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on the website UI interface.
|
||||
# (optional)Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
|
||||
# (optional)Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
|
||||
# Add other parameters on the website UI interface.
|
||||
# (3) (optional) Upload or copy your pretrained model to S3 bucket.
|
||||
# (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
|
||||
# (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
|
||||
# (6) Set the startup file to "train.py" on the website UI interface.
|
||||
# (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
|
||||
# (8) Create your job.
|
||||
#
|
||||
# Train 1p with Ascend
|
||||
# (1) Perform a or b.
|
||||
# a. Set "enable_modelarts=True" on base_config.yaml file.
|
||||
# Set "run_platform='Ascend'" on default_config.yaml file.
|
||||
# Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
|
||||
# (optional)Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
|
||||
# (optional)Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
|
||||
# Set other parameters on default_config.yaml file you need.
|
||||
# b. Add "enable_modelarts=True" on the website UI interface.
|
||||
# Add "run_platform='Ascend'" on the website UI interface.
|
||||
# Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on the website UI interface.
|
||||
# (optional)Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
|
||||
# (optional)Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
|
||||
# Add other parameters on the website UI interface.
|
||||
# (3) (optional) Upload or copy your pretrained model to S3 bucket.
|
||||
# (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
|
||||
# (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
|
||||
# (6) Set the startup file to "train.py" on the website UI interface.
|
||||
# (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
|
||||
# (8) Create your job.
|
||||
#
|
||||
# Eval 1p with Ascend
|
||||
# (1) Perform a or b.
|
||||
# a. Set "enable_modelarts=True" on base_config.yaml file.
|
||||
# Set "run_platform='Ascend'" on default_config.yaml file.
|
||||
# Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
|
||||
# Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
|
||||
# Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
|
||||
# Set other parameters on default_config.yaml file you need.
|
||||
# b. Add "enable_modelarts=True" on the website UI interface.
|
||||
# Add "run_platform='Ascend'" on the website UI interface.
|
||||
# Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_test/data.mindrecord'" on the website UI interface.
|
||||
# Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
|
||||
# Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
|
||||
# Add other parameters on the website UI interface.
|
||||
# (3) Upload or copy your pretrained model to S3 bucket.
|
||||
# (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
|
||||
# (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
|
||||
# (6) Set the startup file to "eval.py" on the website UI interface.
|
||||
# (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
|
||||
# (8) Create your job.
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
|
||||
|
@ -214,7 +284,7 @@ bash run_export.sh [PLATFORM] [BATCH_SIZE] [USE_DEVICE_ID] [PRETRAINED_BACKBONE]
|
|||
| Parameters | Face Detection |
|
||||
| -------------------------- | ----------------------------------------------------------- |
|
||||
| Model Version | V1 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
|
||||
| uploaded Date | 09/30/2020 (month/day/year) |
|
||||
| MindSpore Version | 1.0.0 |
|
||||
| Dataset | 13K images |
|
||||
|
@ -231,7 +301,7 @@ bash run_export.sh [PLATFORM] [BATCH_SIZE] [USE_DEVICE_ID] [PRETRAINED_BACKBONE]
|
|||
| Parameters | Face Detection |
|
||||
| ------------------- | --------------------------- |
|
||||
| Model Version | V1 |
|
||||
| Resource | Ascend 910; OS Euler2.8 |
|
||||
| Resource | Ascend 910; OS Euler2.8 |
|
||||
| Uploaded Date | 09/30/2020 (month/day/year) |
|
||||
| MindSpore Version | 1.0.0 |
|
||||
| Dataset | 3K images |
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
|
||||
enable_modelarts: False
|
||||
# Url for modelarts
|
||||
data_url: ""
|
||||
train_url: ""
|
||||
checkpoint_url: ""
|
||||
# Path for local
|
||||
data_path: "/cache/data"
|
||||
output_path: "/cache/train"
|
||||
load_path: "/cache/checkpoint_path"
|
||||
need_modelarts_dataset_unzip: True
|
||||
modelarts_dataset_unzip_name: "face_detect_dataset"
|
||||
|
||||
# ==============================================================================
|
||||
# train options
|
||||
run_platform: "Ascend" # choices in ("Ascend", "CPU")
|
||||
mindrecord_path: ""
|
||||
pretrained: ""
|
||||
use_loss_scale: True
|
||||
|
||||
# default options
|
||||
batch_size: 64
|
||||
warmup_lr: 0.0004
|
||||
lr_rates: [0.002, 0.004, 0.002, 0.0008, 0.0004, 0.0002, 0.00008, 0.00004, 0.000004]
|
||||
lr_steps: [1000, 10000, 40000, 60000, 80000, 100000, 130000, 160000, 190000]
|
||||
gamma: 0.5
|
||||
weight_decay: 0.0005
|
||||
momentum: 0.5
|
||||
max_epoch: 2500
|
||||
|
||||
log_interval: 10
|
||||
ckpt_path: "../../output"
|
||||
ckpt_interval: 1000
|
||||
result_path: "../../results"
|
||||
|
||||
input_shape: [768, 448]
|
||||
jitter: 0.3
|
||||
flip: 0.5
|
||||
hue: 0.1
|
||||
sat: 1.5
|
||||
val: 1.5
|
||||
num_classes: 1
|
||||
anchors: [[3, 4],
|
||||
[5, 6],
|
||||
[7, 9],
|
||||
[10, 13],
|
||||
[15, 19],
|
||||
[21, 26],
|
||||
[28, 36],
|
||||
[38, 49],
|
||||
[54, 71],
|
||||
[77, 102],
|
||||
[122, 162],
|
||||
[207, 268]]
|
||||
|
||||
anchors_mask: [[8, 9, 10, 11], [4, 5, 6, 7], [0, 1, 2, 3]]
|
||||
|
||||
conf_thresh: 0.1
|
||||
nms_thresh: 0.45
|
||||
|
||||
---
|
||||
|
||||
# Help description for each configuration
|
||||
# train options
|
||||
run_platform: "run platform, support Ascend and CPU."
|
||||
mindrecord_path: "dataset path, e.g. /home/data.mindrecord"
|
||||
pretrained: "pretrained model to load"
|
||||
local_rank: "current rank to support distributed"
|
||||
use_loss_scale: "Whether use dynamic loss scale, default is True."
|
|
@ -14,7 +14,7 @@
|
|||
# ============================================================================
|
||||
"""Face detection eval."""
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from mindspore import context
|
||||
|
@ -24,50 +24,104 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
|||
from mindspore.common import dtype as mstype
|
||||
import mindspore.dataset as de
|
||||
|
||||
|
||||
|
||||
|
||||
from src.data_preprocess import SingleScaleTrans
|
||||
from src.config import config
|
||||
from src.FaceDetection.yolov3 import HwYolov3 as backbone_HwYolov3
|
||||
from src.FaceDetection import voc_wrapper
|
||||
from src.network_define import BuildTestNetwork, get_bounding_boxes, tensor_to_brambox, \
|
||||
parse_gt_from_anno, parse_rets, calc_recall_precision_ap
|
||||
|
||||
from model_utils.config import config
|
||||
from model_utils.moxing_adapter import moxing_wrapper
|
||||
from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
|
||||
|
||||
|
||||
plt.switch_backend('agg')
|
||||
|
||||
def parse_args():
|
||||
'''parse_args'''
|
||||
parser = argparse.ArgumentParser('Yolov3 Face Detection')
|
||||
parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
|
||||
help="run platform, support Ascend and CPU.")
|
||||
parser.add_argument('--mindrecord_path', type=str, default='', help='dataset path, e.g. /home/data.mindrecord')
|
||||
parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
|
||||
parser.add_argument('--local_rank', type=int, default=0, help='current rank to support distributed')
|
||||
parser.add_argument('--world_size', type=int, default=1, help='current process number to support distributed')
|
||||
def load_pretrain(net, cfg):
|
||||
'''load pretrain model'''
|
||||
if os.path.isfile(cfg.pretrained):
|
||||
param_dict = load_checkpoint(cfg.pretrained)
|
||||
param_dict_new = {}
|
||||
for key, values in param_dict.items():
|
||||
if key.startswith('moments.'):
|
||||
continue
|
||||
elif key.startswith('network.'):
|
||||
param_dict_new[key[8:]] = values
|
||||
else:
|
||||
param_dict_new[key] = values
|
||||
load_param_into_net(net, param_dict_new)
|
||||
print('load model {} success'.format(cfg.pretrained))
|
||||
else:
|
||||
print('load model {} failed, please check the path of model, evaluating end'.format(cfg.pretrained))
|
||||
exit(0)
|
||||
|
||||
arg, _ = parser.parse_known_args()
|
||||
return net
|
||||
|
||||
return arg
|
||||
def modelarts_pre_process():
|
||||
'''modelarts pre process function.'''
|
||||
def unzip(zip_file, save_dir):
|
||||
import zipfile
|
||||
s_time = time.time()
|
||||
if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
|
||||
zip_isexist = zipfile.is_zipfile(zip_file)
|
||||
if zip_isexist:
|
||||
fz = zipfile.ZipFile(zip_file, 'r')
|
||||
data_num = len(fz.namelist())
|
||||
print("Extract Start...")
|
||||
print("unzip file num: {}".format(data_num))
|
||||
data_print = int(data_num / 100) if data_num > 100 else 1
|
||||
i = 0
|
||||
for file in fz.namelist():
|
||||
if i % data_print == 0:
|
||||
print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
|
||||
i += 1
|
||||
fz.extract(file, save_dir)
|
||||
print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60),
|
||||
int(int(time.time() - s_time) % 60)))
|
||||
print("Extract Done.")
|
||||
else:
|
||||
print("This is not zip.")
|
||||
else:
|
||||
print("Zip has been extracted.")
|
||||
|
||||
if config.need_modelarts_dataset_unzip:
|
||||
zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
|
||||
save_dir_1 = os.path.join(config.data_path)
|
||||
|
||||
sync_lock = "/tmp/unzip_sync.lock"
|
||||
|
||||
# Each server contains 8 devices as most.
|
||||
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
|
||||
print("Zip file path: ", zip_file_1)
|
||||
print("Unzip file save dir: ", save_dir_1)
|
||||
unzip(zip_file_1, save_dir_1)
|
||||
print("===Finish extract data synchronization===")
|
||||
try:
|
||||
os.mknod(sync_lock)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
while True:
|
||||
if os.path.exists(sync_lock):
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
|
||||
|
||||
config.result_path = os.path.join(config.output_path, "results")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def run_eval():
|
||||
'''run eval'''
|
||||
config.world_size = get_device_num()
|
||||
config.local_rank = get_rank_id()
|
||||
devid = get_device_id() if config.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)
|
||||
print('=============yolov3 start evaluating==================')
|
||||
|
||||
# logger
|
||||
args.batch_size = config.batch_size
|
||||
args.input_shape = config.input_shape
|
||||
args.result_path = config.result_path
|
||||
args.conf_thresh = config.conf_thresh
|
||||
args.nms_thresh = config.nms_thresh
|
||||
|
||||
context.set_auto_parallel_context(parallel_mode=ParallelMode.STAND_ALONE, device_num=args.world_size,
|
||||
context.set_auto_parallel_context(parallel_mode=ParallelMode.STAND_ALONE, device_num=config.world_size,
|
||||
gradients_mean=True)
|
||||
mindrecord_path = args.mindrecord_path
|
||||
print('Loading data from {}'.format(mindrecord_path))
|
||||
|
||||
num_classes = config.num_classes
|
||||
if num_classes > 1:
|
||||
|
@ -84,34 +138,18 @@ if __name__ == "__main__":
|
|||
classes = {0: 'face'}
|
||||
|
||||
# dataloader
|
||||
ds = de.MindDataset(mindrecord_path + "0", columns_list=["image", "annotation", "image_name", "image_size"])
|
||||
print('Loading data from {}'.format(config.mindrecord_path))
|
||||
ds = de.MindDataset(config.mindrecord_path + "0", columns_list=["image", "annotation", "image_name", "image_size"])
|
||||
|
||||
single_scale_trans = SingleScaleTrans(resize=args.input_shape)
|
||||
|
||||
ds = ds.batch(args.batch_size, per_batch_map=single_scale_trans,
|
||||
single_scale_trans = SingleScaleTrans(resize=config.input_shape)
|
||||
ds = ds.batch(config.batch_size, per_batch_map=single_scale_trans,
|
||||
input_columns=["image", "annotation", "image_name", "image_size"], num_parallel_workers=8)
|
||||
|
||||
args.steps_per_epoch = ds.get_dataset_size()
|
||||
config.steps_per_epoch = ds.get_dataset_size()
|
||||
|
||||
# backbone
|
||||
network = backbone_HwYolov3(num_classes, num_anchors_list, args)
|
||||
|
||||
# load pretrain model
|
||||
if os.path.isfile(args.pretrained):
|
||||
param_dict = load_checkpoint(args.pretrained)
|
||||
param_dict_new = {}
|
||||
for key, values in param_dict.items():
|
||||
if key.startswith('moments.'):
|
||||
continue
|
||||
elif key.startswith('network.'):
|
||||
param_dict_new[key[8:]] = values
|
||||
else:
|
||||
param_dict_new[key] = values
|
||||
load_param_into_net(network, param_dict_new)
|
||||
print('load model {} success'.format(args.pretrained))
|
||||
else:
|
||||
print('load model {} failed, please check the path of model, evaluating end'.format(args.pretrained))
|
||||
exit(0)
|
||||
network = backbone_HwYolov3(num_classes, num_anchors_list, config)
|
||||
network = load_pretrain(network, config)
|
||||
|
||||
ds = ds.repeat(1)
|
||||
|
||||
|
@ -119,30 +157,25 @@ if __name__ == "__main__":
|
|||
img_size = {}
|
||||
img_anno = {}
|
||||
|
||||
model_name = args.pretrained.split('/')[-1].replace('.ckpt', '')
|
||||
result_path = os.path.join(args.result_path, model_name)
|
||||
model_name = config.pretrained.split('/')[-1].replace('.ckpt', '')
|
||||
result_path = os.path.join(config.result_path, model_name)
|
||||
if os.path.exists(result_path):
|
||||
pass
|
||||
if not os.path.isdir(result_path):
|
||||
os.makedirs(result_path, exist_ok=True)
|
||||
|
||||
# result file
|
||||
ret_files_set = {
|
||||
'face': os.path.join(result_path, 'comp4_det_test_face_rm5050.txt'),
|
||||
}
|
||||
ret_files_set = {'face': os.path.join(result_path, 'comp4_det_test_face_rm5050.txt'),}
|
||||
|
||||
test_net = BuildTestNetwork(network, reduction_0, reduction_1, reduction_2, anchors, anchors_mask, num_classes,
|
||||
args)
|
||||
config)
|
||||
|
||||
print('conf_thresh:', args.conf_thresh)
|
||||
print('conf_thresh:', config.conf_thresh)
|
||||
|
||||
eval_times = 0
|
||||
|
||||
for data in ds.create_tuple_iterator(output_numpy=True):
|
||||
batch_images = data[0]
|
||||
batch_labels = data[1]
|
||||
batch_image_name = data[2]
|
||||
batch_image_size = data[3]
|
||||
batch_images, batch_labels, batch_image_name, batch_image_size = data[0:4]
|
||||
eval_times += 1
|
||||
|
||||
img_tensor = Tensor(batch_images, mstype.float32)
|
||||
|
@ -153,11 +186,11 @@ if __name__ == "__main__":
|
|||
coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2, cls_scores_2 = test_net(img_tensor)
|
||||
|
||||
boxes_0, boxes_1, boxes_2 = get_bounding_boxes(coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2,
|
||||
cls_scores_2, args.conf_thresh, args.input_shape,
|
||||
cls_scores_2, config.conf_thresh, config.input_shape,
|
||||
num_classes)
|
||||
|
||||
converted_boxes_0, converted_boxes_1, converted_boxes_2 = tensor_to_brambox(boxes_0, boxes_1, boxes_2,
|
||||
args.input_shape, labels)
|
||||
config.input_shape, labels)
|
||||
|
||||
tdets.append(converted_boxes_0)
|
||||
tdets.append(converted_boxes_1)
|
||||
|
@ -175,11 +208,11 @@ if __name__ == "__main__":
|
|||
img_anno.update({batch_image_name[k].decode('UTF-8'): v for k, v in enumerate(batch_labels)})
|
||||
|
||||
print('eval times:', eval_times)
|
||||
print('batch size: ', args.batch_size)
|
||||
print('batch size: ', config.batch_size)
|
||||
|
||||
netw, neth = args.input_shape
|
||||
netw, neth = config.input_shape
|
||||
reorg_dets = voc_wrapper.reorg_detection(det, netw, neth, img_size)
|
||||
voc_wrapper.gen_results(reorg_dets, result_path, img_size, args.nms_thresh)
|
||||
voc_wrapper.gen_results(reorg_dets, result_path, img_size, config.nms_thresh)
|
||||
|
||||
# compute mAP
|
||||
ground_truth = parse_gt_from_anno(img_anno, classes)
|
||||
|
@ -208,3 +241,6 @@ if __name__ == "__main__":
|
|||
plt.savefig(ap_save_path)
|
||||
|
||||
print('=============yolov3 evaluating finished==================')
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_eval()
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
# ============================================================================
|
||||
"""Convert ckpt to air."""
|
||||
import os
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from mindspore import context
|
||||
|
@ -22,22 +21,22 @@ from mindspore import Tensor
|
|||
from mindspore.train.serialization import export, load_checkpoint, load_param_into_net
|
||||
|
||||
from src.FaceDetection.yolov3 import HwYolov3 as backbone_HwYolov3
|
||||
from src.config import config
|
||||
from model_utils.config import config
|
||||
|
||||
def save_air(args):
|
||||
def save_air():
|
||||
'''save air'''
|
||||
print('============= yolov3 start save air ==================')
|
||||
devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
|
||||
devid = int(os.getenv('DEVICE_ID', '0')) if config.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)
|
||||
|
||||
num_classes = config.num_classes
|
||||
anchors_mask = config.anchors_mask
|
||||
num_anchors_list = [len(x) for x in anchors_mask]
|
||||
|
||||
network = backbone_HwYolov3(num_classes, num_anchors_list, args)
|
||||
network = backbone_HwYolov3(num_classes, num_anchors_list, config)
|
||||
|
||||
if os.path.isfile(args.pretrained):
|
||||
param_dict = load_checkpoint(args.pretrained)
|
||||
if os.path.isfile(config.pretrained):
|
||||
param_dict = load_checkpoint(config.pretrained)
|
||||
param_dict_new = {}
|
||||
for key, values in param_dict.items():
|
||||
if key.startswith('moments.'):
|
||||
|
@ -47,23 +46,16 @@ def save_air(args):
|
|||
else:
|
||||
param_dict_new[key] = values
|
||||
load_param_into_net(network, param_dict_new)
|
||||
print('load model {} success'.format(args.pretrained))
|
||||
print('load model {} success'.format(config.pretrained))
|
||||
|
||||
input_data = np.random.uniform(low=0, high=1.0, size=(args.batch_size, 3, 448, 768)).astype(np.float32)
|
||||
input_data = np.random.uniform(low=0, high=1.0, size=(config.batch_size, 3, 448, 768)).astype(np.float32)
|
||||
|
||||
tensor_input_data = Tensor(input_data)
|
||||
export(network, tensor_input_data,
|
||||
file_name=args.pretrained.replace('.ckpt', '_' + str(args.batch_size) + 'b.air'), file_format='AIR')
|
||||
file_name=config.pretrained.replace('.ckpt', '_' + str(config.batch_size) + 'b.air'), file_format='AIR')
|
||||
|
||||
print("export model success.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Convert ckpt to air')
|
||||
parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
|
||||
help="run platform, support Ascend and CPU.")
|
||||
parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
|
||||
parser.add_argument('--batch_size', type=int, default=8, help='batch size')
|
||||
|
||||
arg = parser.parse_args()
|
||||
save_air(arg)
|
||||
save_air()
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Parse arguments"""
|
||||
|
||||
import os
|
||||
import ast
|
||||
import argparse
|
||||
from pprint import pformat
|
||||
import yaml
|
||||
|
||||
class Config:
|
||||
"""
|
||||
Configuration namespace. Convert dictionary to members.
|
||||
"""
|
||||
def __init__(self, cfg_dict):
|
||||
for k, v in cfg_dict.items():
|
||||
if isinstance(v, (list, tuple)):
|
||||
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
|
||||
else:
|
||||
setattr(self, k, Config(v) if isinstance(v, dict) else v)
|
||||
|
||||
def __str__(self):
|
||||
return pformat(self.__dict__)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
|
||||
"""
|
||||
Parse command line arguments to the configuration according to the default yaml.
|
||||
|
||||
Args:
|
||||
parser: Parent parser.
|
||||
cfg: Base configuration.
|
||||
helper: Helper description.
|
||||
cfg_path: Path to the default yaml config.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
|
||||
parents=[parser])
|
||||
helper = {} if helper is None else helper
|
||||
choices = {} if choices is None else choices
|
||||
for item in cfg:
|
||||
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
|
||||
help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
|
||||
choice = choices[item] if item in choices else None
|
||||
if isinstance(cfg[item], bool):
|
||||
parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
|
||||
help=help_description)
|
||||
else:
|
||||
parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
|
||||
help=help_description)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def parse_yaml(yaml_path):
|
||||
"""
|
||||
Parse the yaml config file.
|
||||
|
||||
Args:
|
||||
yaml_path: Path to the yaml config.
|
||||
"""
|
||||
with open(yaml_path, 'r') as fin:
|
||||
try:
|
||||
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
|
||||
cfgs = [x for x in cfgs]
|
||||
if len(cfgs) == 1:
|
||||
cfg_helper = {}
|
||||
cfg = cfgs[0]
|
||||
cfg_choices = {}
|
||||
elif len(cfgs) == 2:
|
||||
cfg, cfg_helper = cfgs
|
||||
cfg_choices = {}
|
||||
elif len(cfgs) == 3:
|
||||
cfg, cfg_helper, cfg_choices = cfgs
|
||||
else:
|
||||
raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
|
||||
print(cfg_helper)
|
||||
except:
|
||||
raise ValueError("Failed to parse yaml")
|
||||
return cfg, cfg_helper, cfg_choices
|
||||
|
||||
|
||||
def merge(args, cfg):
|
||||
"""
|
||||
Merge the base config from yaml file and command line arguments.
|
||||
|
||||
Args:
|
||||
args: Command line arguments.
|
||||
cfg: Base configuration.
|
||||
"""
|
||||
args_var = vars(args)
|
||||
for item in args_var:
|
||||
cfg[item] = args_var[item]
|
||||
return cfg
|
||||
|
||||
|
||||
def get_config():
|
||||
"""
|
||||
Get Config according to the yaml file and cli arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="default name", add_help=False)
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
|
||||
help="Config file path")
|
||||
path_args, _ = parser.parse_known_args()
|
||||
default, helper, choices = parse_yaml(path_args.config_path)
|
||||
args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
|
||||
final_config = merge(args, default)
|
||||
return Config(final_config)
|
||||
|
||||
config = get_config()
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Device adapter for ModelArts"""
|
||||
|
||||
from .config import config
|
||||
|
||||
if config.enable_modelarts:
|
||||
from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
|
||||
else:
|
||||
from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
|
||||
|
||||
__all__ = [
|
||||
"get_device_id", "get_device_num", "get_rank_id", "get_job_id"
|
||||
]
|
|
@ -0,0 +1,36 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Local adapter"""
|
||||
|
||||
import os
|
||||
|
||||
def get_device_id():
|
||||
device_id = os.getenv('DEVICE_ID', '0')
|
||||
return int(device_id)
|
||||
|
||||
|
||||
def get_device_num():
|
||||
device_num = os.getenv('RANK_SIZE', '1')
|
||||
return int(device_num)
|
||||
|
||||
|
||||
def get_rank_id():
|
||||
global_rank_id = os.getenv('RANK_ID', '0')
|
||||
return int(global_rank_id)
|
||||
|
||||
|
||||
def get_job_id():
|
||||
return "Local Job"
|
|
@ -0,0 +1,116 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Moxing adapter for ModelArts"""
|
||||
|
||||
import os
|
||||
import functools
|
||||
from mindspore import context
|
||||
from .config import config
|
||||
|
||||
_global_sync_count = 0
|
||||
|
||||
def get_device_id():
|
||||
device_id = os.getenv('DEVICE_ID', '0')
|
||||
return int(device_id)
|
||||
|
||||
|
||||
def get_device_num():
|
||||
device_num = os.getenv('RANK_SIZE', '1')
|
||||
return int(device_num)
|
||||
|
||||
|
||||
def get_rank_id():
|
||||
global_rank_id = os.getenv('RANK_ID', '0')
|
||||
return int(global_rank_id)
|
||||
|
||||
|
||||
def get_job_id():
|
||||
job_id = os.getenv('JOB_ID')
|
||||
job_id = job_id if job_id != "" else "default"
|
||||
return job_id
|
||||
|
||||
def sync_data(from_path, to_path):
|
||||
"""
|
||||
Download data from remote obs to local directory if the first url is remote url and the second one is local path
|
||||
Upload data from local directory to remote obs in contrast.
|
||||
"""
|
||||
import moxing as mox
|
||||
import time
|
||||
global _global_sync_count
|
||||
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
|
||||
_global_sync_count += 1
|
||||
|
||||
# Each server contains 8 devices as most.
|
||||
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
|
||||
print("from path: ", from_path)
|
||||
print("to path: ", to_path)
|
||||
mox.file.copy_parallel(from_path, to_path)
|
||||
print("===finish data synchronization===")
|
||||
try:
|
||||
os.mknod(sync_lock)
|
||||
except IOError:
|
||||
pass
|
||||
print("===save flag===")
|
||||
|
||||
while True:
|
||||
if os.path.exists(sync_lock):
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
print("Finish sync data from {} to {}.".format(from_path, to_path))
|
||||
|
||||
|
||||
def moxing_wrapper(pre_process=None, post_process=None):
|
||||
"""
|
||||
Moxing wrapper to download dataset and upload outputs.
|
||||
"""
|
||||
def wrapper(run_func):
|
||||
@functools.wraps(run_func)
|
||||
def wrapped_func(*args, **kwargs):
|
||||
# Download data from data_url
|
||||
if config.enable_modelarts:
|
||||
if config.data_url:
|
||||
sync_data(config.data_url, config.data_path)
|
||||
print("Dataset downloaded: ", os.listdir(config.data_path))
|
||||
if config.checkpoint_url:
|
||||
sync_data(config.checkpoint_url, config.load_path)
|
||||
print("Preload downloaded: ", os.listdir(config.load_path))
|
||||
if config.train_url:
|
||||
sync_data(config.train_url, config.output_path)
|
||||
print("Workspace downloaded: ", os.listdir(config.output_path))
|
||||
|
||||
context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
|
||||
config.device_num = get_device_num()
|
||||
config.device_id = get_device_id()
|
||||
if not os.path.exists(config.output_path):
|
||||
os.makedirs(config.output_path)
|
||||
|
||||
if pre_process:
|
||||
pre_process()
|
||||
|
||||
# Run the main function
|
||||
run_func(*args, **kwargs)
|
||||
|
||||
# Upload data to train_url
|
||||
if config.enable_modelarts:
|
||||
if post_process:
|
||||
post_process()
|
||||
|
||||
if config.train_url:
|
||||
print("Start to copy output directory")
|
||||
sync_data(config.output_path, config.train_url)
|
||||
return wrapped_func
|
||||
return wrapper
|
|
@ -60,10 +60,10 @@ echo $PRETRAINED_BACKBONE
|
|||
|
||||
echo 'start evaluating'
|
||||
export RANK_ID=0
|
||||
rm -rf ${current_exec_path}/device$USE_DEVICE_ID
|
||||
rm -rf ${current_exec_path}/eval
|
||||
echo 'start device '$USE_DEVICE_ID
|
||||
mkdir ${current_exec_path}/device$USE_DEVICE_ID
|
||||
cd ${current_exec_path}/device$USE_DEVICE_ID || exit
|
||||
mkdir ${current_exec_path}/eval
|
||||
cd ${current_exec_path}/eval || exit
|
||||
dev=`expr $USE_DEVICE_ID + 0`
|
||||
export DEVICE_ID=$dev
|
||||
python ${dirname_path}/${SCRIPT_NAME} \
|
||||
|
|
|
@ -73,7 +73,6 @@ dev=`expr $USE_DEVICE_ID + 0`
|
|||
export DEVICE_ID=$dev
|
||||
python ${dirname_path}/${SCRIPT_NAME} \
|
||||
--run_platform=$PLATFORM \
|
||||
--world_size=1 \
|
||||
--mindrecord_path=$MINDRECORD_FILE \
|
||||
--pretrained=$PRETRAINED_BACKBONE > train.log 2>&1 &
|
||||
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ===========================================================================
|
||||
"""Network config setting, will be used in train.py and eval.py"""
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
config = ed({
|
||||
'batch_size': 64,
|
||||
'warmup_lr': 0.0004,
|
||||
'lr_rates': [0.002, 0.004, 0.002, 0.0008, 0.0004, 0.0002, 0.00008, 0.00004, 0.000004],
|
||||
'lr_steps': [1000, 10000, 40000, 60000, 80000, 100000, 130000, 160000, 190000],
|
||||
'gamma': 0.5,
|
||||
'weight_decay': 0.0005,
|
||||
'momentum': 0.5,
|
||||
'max_epoch': 2500,
|
||||
|
||||
'log_interval': 10,
|
||||
'ckpt_path': '../../output',
|
||||
'ckpt_interval': 1000,
|
||||
'result_path': '../../results',
|
||||
|
||||
'input_shape': [768, 448],
|
||||
'jitter': 0.3,
|
||||
'flip': 0.5,
|
||||
'hue': 0.1,
|
||||
'sat': 1.5,
|
||||
'val': 1.5,
|
||||
'num_classes': 1,
|
||||
'anchors': [
|
||||
[3, 4],
|
||||
[5, 6],
|
||||
[7, 9],
|
||||
[10, 13],
|
||||
[15, 19],
|
||||
[21, 26],
|
||||
[28, 36],
|
||||
[38, 49],
|
||||
[54, 71],
|
||||
[77, 102],
|
||||
[122, 162],
|
||||
[207, 268],
|
||||
],
|
||||
'anchors_mask': [(8, 9, 10, 11), (4, 5, 6, 7), (0, 1, 2, 3)],
|
||||
|
||||
'conf_thresh': 0.1,
|
||||
'nms_thresh': 0.45,
|
||||
})
|
|
@ -19,7 +19,7 @@ import mindspore.dataset.vision.py_transforms as P
|
|||
import mindspore.dataset as de
|
||||
|
||||
from src.transforms import RandomCropLetterbox, RandomFlip, HSVShift, ResizeLetterbox
|
||||
from src.config import config
|
||||
from model_utils.config import config
|
||||
|
||||
|
||||
class SingleScaleTrans:
|
||||
|
|
|
@ -14,16 +14,14 @@
|
|||
# ============================================================================
|
||||
"""Face detection train."""
|
||||
import os
|
||||
import ast
|
||||
import time
|
||||
import datetime
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from mindspore import context
|
||||
from mindspore.train.loss_scale_manager import DynamicLossScaleManager
|
||||
from mindspore import Tensor
|
||||
from mindspore.communication.management import init, get_rank, get_group_size
|
||||
from mindspore.communication.management import init
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.train.callback import ModelCheckpoint, RunContext
|
||||
from mindspore.train.callback import _InternalCallbackParam, CheckpointConfig
|
||||
|
@ -31,75 +29,104 @@ from mindspore.common import dtype as mstype
|
|||
|
||||
from src.logging import get_logger
|
||||
from src.data_preprocess import create_dataset
|
||||
from src.config import config
|
||||
from src.network_define import define_network
|
||||
|
||||
def parse_args():
|
||||
'''parse_args'''
|
||||
parser = argparse.ArgumentParser('Yolov3 Face Detection')
|
||||
parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
|
||||
help="run platform, support Ascend and CPU.")
|
||||
parser.add_argument('--mindrecord_path', type=str, default='', help='dataset path, e.g. /home/data.mindrecord')
|
||||
parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
|
||||
parser.add_argument('--local_rank', type=int, default=0, help='current rank to support distributed')
|
||||
parser.add_argument('--world_size', type=int, default=8, help='current process number to support distributed')
|
||||
parser.add_argument("--use_loss_scale", type=ast.literal_eval, default=True,
|
||||
help="Whether use dynamic loss scale, default is True.")
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
args.batch_size = config.batch_size
|
||||
args.warmup_lr = config.warmup_lr
|
||||
args.lr_rates = config.lr_rates
|
||||
if args.run_platform == "CPU":
|
||||
args.use_loss_scale = False
|
||||
args.world_size = 1
|
||||
args.local_rank = 0
|
||||
if args.world_size != 8:
|
||||
args.lr_steps = [i * 8 // args.world_size for i in config.lr_steps]
|
||||
else:
|
||||
args.lr_steps = config.lr_steps
|
||||
args.gamma = config.gamma
|
||||
args.weight_decay = config.weight_decay if args.world_size != 1 else 0.
|
||||
args.momentum = config.momentum
|
||||
args.max_epoch = config.max_epoch
|
||||
args.log_interval = config.log_interval
|
||||
args.ckpt_path = config.ckpt_path
|
||||
args.ckpt_interval = config.ckpt_interval
|
||||
args.outputs_dir = os.path.join(args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
|
||||
print('args.outputs_dir', args.outputs_dir)
|
||||
args.num_classes = config.num_classes
|
||||
args.anchors = config.anchors
|
||||
args.anchors_mask = config.anchors_mask
|
||||
args.num_anchors_list = [len(x) for x in args.anchors_mask]
|
||||
return args
|
||||
from model_utils.config import config
|
||||
from model_utils.moxing_adapter import moxing_wrapper
|
||||
from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
|
||||
|
||||
|
||||
def train(args):
|
||||
def modelarts_pre_process():
|
||||
'''modelarts pre process function.'''
|
||||
def unzip(zip_file, save_dir):
|
||||
import zipfile
|
||||
s_time = time.time()
|
||||
if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
|
||||
zip_isexist = zipfile.is_zipfile(zip_file)
|
||||
if zip_isexist:
|
||||
fz = zipfile.ZipFile(zip_file, 'r')
|
||||
data_num = len(fz.namelist())
|
||||
print("Extract Start...")
|
||||
print("unzip file num: {}".format(data_num))
|
||||
data_print = int(data_num / 100) if data_num > 100 else 1
|
||||
i = 0
|
||||
for file in fz.namelist():
|
||||
if i % data_print == 0:
|
||||
print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
|
||||
i += 1
|
||||
fz.extract(file, save_dir)
|
||||
print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60),
|
||||
int(int(time.time() - s_time) % 60)))
|
||||
print("Extract Done.")
|
||||
else:
|
||||
print("This is not zip.")
|
||||
else:
|
||||
print("Zip has been extracted.")
|
||||
|
||||
if config.need_modelarts_dataset_unzip:
|
||||
zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
|
||||
save_dir_1 = os.path.join(config.data_path)
|
||||
|
||||
sync_lock = "/tmp/unzip_sync.lock"
|
||||
|
||||
# Each server contains 8 devices as most.
|
||||
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
|
||||
print("Zip file path: ", zip_file_1)
|
||||
print("Unzip file save dir: ", save_dir_1)
|
||||
unzip(zip_file_1, save_dir_1)
|
||||
print("===Finish extract data synchronization===")
|
||||
try:
|
||||
os.mknod(sync_lock)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
while True:
|
||||
if os.path.exists(sync_lock):
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
|
||||
|
||||
config.ckpt_path = os.path.join(config.output_path, "output")
|
||||
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def run_train():
|
||||
'''train'''
|
||||
config.world_size = get_device_num()
|
||||
config.local_rank = get_rank_id()
|
||||
if config.run_platform == "CPU":
|
||||
config.use_loss_scale = False
|
||||
config.world_size = 1
|
||||
config.local_rank = 0
|
||||
if config.world_size != 8:
|
||||
config.lr_steps = [i * 8 // config.world_size for i in config.lr_steps]
|
||||
config.weight_decay = config.weight_decay if config.world_size != 1 else 0.
|
||||
config.outputs_dir = os.path.join(config.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
|
||||
print('config.outputs_dir', config.outputs_dir)
|
||||
config.num_anchors_list = [len(x) for x in config.anchors_mask]
|
||||
print('=============yolov3 start trainging==================')
|
||||
devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
|
||||
devid = int(os.getenv('DEVICE_ID', '0')) if config.run_platform != 'CPU' else 0
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)
|
||||
# init distributed
|
||||
if args.world_size != 1:
|
||||
if config.world_size != 1:
|
||||
init()
|
||||
args.local_rank = get_rank()
|
||||
args.world_size = get_group_size()
|
||||
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=args.world_size,
|
||||
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=config.world_size,
|
||||
gradients_mean=True)
|
||||
args.logger = get_logger(args.outputs_dir, args.local_rank)
|
||||
config.logger = get_logger(config.outputs_dir, config.local_rank)
|
||||
|
||||
# dataloader
|
||||
ds = create_dataset(args)
|
||||
ds = create_dataset(config)
|
||||
|
||||
args.logger.important_info('start create network')
|
||||
config.logger.important_info('start create network')
|
||||
create_network_start = time.time()
|
||||
|
||||
train_net = define_network(args)
|
||||
train_net = define_network(config)
|
||||
|
||||
# checkpoint
|
||||
ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
|
||||
train_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
|
||||
ckpt_cb = ModelCheckpoint(config=train_config, directory=args.outputs_dir, prefix='{}'.format(args.local_rank))
|
||||
ckpt_max_num = config.max_epoch * config.steps_per_epoch // config.ckpt_interval
|
||||
train_config = CheckpointConfig(save_checkpoint_steps=config.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
|
||||
ckpt_cb = ModelCheckpoint(config=train_config, directory=config.outputs_dir, prefix='{}'.format(config.local_rank))
|
||||
cb_params = _InternalCallbackParam()
|
||||
cb_params.train_network = train_net
|
||||
cb_params.epoch_num = ckpt_max_num
|
||||
|
@ -112,7 +139,7 @@ def train(args):
|
|||
t_epoch = time.time()
|
||||
old_progress = -1
|
||||
i = 0
|
||||
if args.use_loss_scale:
|
||||
if config.use_loss_scale:
|
||||
scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000)
|
||||
for data in ds.create_tuple_iterator(output_numpy=True):
|
||||
batch_images = data[0]
|
||||
|
@ -120,7 +147,7 @@ def train(args):
|
|||
input_list = [Tensor(batch_images, mstype.float32)]
|
||||
for idx in range(2, 26):
|
||||
input_list.append(Tensor(data[idx], mstype.float32))
|
||||
if args.use_loss_scale:
|
||||
if config.use_loss_scale:
|
||||
scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32)
|
||||
loss0, overflow, _ = train_net(*input_list, scaling_sens)
|
||||
overflow = np.all(overflow.asnumpy())
|
||||
|
@ -128,50 +155,49 @@ def train(args):
|
|||
scale_manager.update_loss_scale(overflow)
|
||||
else:
|
||||
scale_manager.update_loss_scale(False)
|
||||
args.logger.info('rank[{}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
|
||||
'batch_labels:{}'.format(args.local_rank, i, loss0, overflow, scaling_sens, args.lr[i],
|
||||
batch_images.shape, batch_labels.shape))
|
||||
config.logger.info('rank[{:d}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
|
||||
'batch_labels:{}'.format(config.local_rank, i, loss0, overflow, scaling_sens,
|
||||
config.lr[i], batch_images.shape, batch_labels.shape))
|
||||
else:
|
||||
loss0 = train_net(*input_list)
|
||||
args.logger.info('rank[{}], iter[{}], loss[{}], lr:{}, batch_images:{}, '
|
||||
'batch_labels:{}'.format(args.local_rank, i, loss0, args.lr[i],
|
||||
batch_images.shape, batch_labels.shape))
|
||||
config.logger.info('rank[{:d}], iter[{}], loss[{}], lr:{}, batch_images:{}, '
|
||||
'batch_labels:{}'.format(config.local_rank, i, loss0,
|
||||
config.lr[i], batch_images.shape, batch_labels.shape))
|
||||
# save ckpt
|
||||
cb_params.cur_step_num = i + 1 # current step number
|
||||
cb_params.batch_num = i + 2
|
||||
if args.local_rank == 0:
|
||||
if config.local_rank == 0:
|
||||
ckpt_cb.step_end(run_context)
|
||||
|
||||
# save Log
|
||||
if i == 0:
|
||||
time_for_graph_compile = time.time() - create_network_start
|
||||
args.logger.important_info('Yolov3, graph compile time={:.2f}s'.format(time_for_graph_compile))
|
||||
config.logger.important_info('Yolov3, graph compile time={:.2f}s'.format(time_for_graph_compile))
|
||||
|
||||
if i % args.steps_per_epoch == 0:
|
||||
if i % config.steps_per_epoch == 0:
|
||||
cb_params.cur_epoch_num += 1
|
||||
|
||||
if i % args.log_interval == 0 and args.local_rank == 0:
|
||||
if i % config.log_interval == 0 and config.local_rank == 0:
|
||||
time_used = time.time() - t_end
|
||||
epoch = int(i / args.steps_per_epoch)
|
||||
fps = args.batch_size * (i - old_progress) * args.world_size / time_used
|
||||
args.logger.info('epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(epoch, i, loss0, fps))
|
||||
epoch = int(i / config.steps_per_epoch)
|
||||
fps = config.batch_size * (i - old_progress) * config.world_size / time_used
|
||||
config.logger.info('epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(epoch, i, loss0, fps))
|
||||
t_end = time.time()
|
||||
old_progress = i
|
||||
|
||||
if i % args.steps_per_epoch == 0 and args.local_rank == 0:
|
||||
if i % config.steps_per_epoch == 0 and config.local_rank == 0:
|
||||
epoch_time_used = time.time() - t_epoch
|
||||
epoch = int(i / args.steps_per_epoch)
|
||||
fps = args.batch_size * args.world_size * args.steps_per_epoch / epoch_time_used
|
||||
args.logger.info('=================================================')
|
||||
args.logger.info('epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(epoch, i, fps))
|
||||
args.logger.info('=================================================')
|
||||
epoch = int(i / config.steps_per_epoch)
|
||||
fps = config.batch_size * config.world_size * config.steps_per_epoch / epoch_time_used
|
||||
config.logger.info('=================================================')
|
||||
config.logger.info('epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(epoch, i, fps))
|
||||
config.logger.info('=================================================')
|
||||
t_epoch = time.time()
|
||||
|
||||
i = i + 1
|
||||
|
||||
args.logger.info('=============yolov3 training finished==================')
|
||||
config.logger.info('=============yolov3 training finished==================')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg = parse_args()
|
||||
train(arg)
|
||||
run_train()
|
||||
|
|
|
@ -28,9 +28,9 @@ def test_FaceDetection_WIDER():
|
|||
model_name = "FaceDetection"
|
||||
utils.copy_files(model_path, cur_path, model_name)
|
||||
cur_model_path = os.path.join(cur_path, model_name)
|
||||
old_list = ["'max_epoch': 2500,"]
|
||||
new_list = ["'max_epoch': 1,"]
|
||||
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "src/config.py"))
|
||||
old_list = ["max_epoch: 2500"]
|
||||
new_list = ["max_epoch: 1"]
|
||||
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "default_config.yaml"))
|
||||
dataset_path = os.path.join(utils.data_root, "widerface/mindrecord_train/data.mindrecord")
|
||||
device_id = int(os.environ.get("DEVICE_ID", "0"))
|
||||
model_train_command = "cd {}/scripts;sh run_standalone_train.sh Ascend {} {}"\
|
||||
|
|
Loading…
Reference in New Issue