modify FaceDetection net for clould

2021-05-22 10:02:09 +08:00 · 2021-05-22 10:02:09 +08:00 · b62a3f9116
parent 659fb1dbbb
commit b62a3f9116
19 changed files with 702 additions and 260 deletions
--- a/model_zoo/official/cv/yolov3_darknet53/README.md
+++ b/model_zoo/official/cv/yolov3_darknet53/README.md
@ -84,7 +84,7 @@ Dataset used: [COCO2014](https://cocodataset.org/#download)
    - Pretrained_backbone can use src/convert_weight.py, convert darknet53.conv.74 to mindspore ckpt.

      ```
-      python convert_weight.py --input_file ./darknet53.conv.74
+      python src/convert_weight.py --input_file ./darknet53.conv.74
      ```

      darknet53.conv.74 can get from [download](https://pjreddie.com/media/files/darknet53.conv.74) .
--- a/model_zoo/official/cv/yolov3_darknet53/README_CN.md
+++ b/model_zoo/official/cv/yolov3_darknet53/README_CN.md
@ -88,7 +88,7 @@ YOLOv3使用DarkNet53执行特征提取，这是YOLOv2中的Darknet-19和残差
    - 使用src路径下的convert_weight.py脚本将darknet53.conv.74转换成mindspore ckpt格式。

      ```command
-      python convert_weight.py --input_file ./darknet53.conv.74
+      python src/convert_weight.py --input_file ./darknet53.conv.74
      ```

      可以从网站[下载](https://pjreddie.com/media/files/darknet53.conv.74) darknet53.conv.74文件。
--- a/model_zoo/official/cv/yolov3_darknet53/default_config.yaml
+++ b/model_zoo/official/cv/yolov3_darknet53/default_config.yaml
@ -75,6 +75,10 @@ file_name: "yolov3_darknet53"
 file_format: "AIR" # ["AIR", "ONNX", "MINDIR"]


+# convert weight option
+input_file: "./darknet53.conv.74"
+output_file: "./backbone_darknet53.ckpt"
+
 # Other default config
 hue: 0.1
 saturation: 1.5
@ -165,4 +169,8 @@ batch_size: "batch size"
 ckpt_file: "Checkpoint file path."
 file_name: "output file name."
 file_format: "file format choices in ['AIR', 'ONNX', 'MINDIR']"
-device_target: "device target. choices in ['Ascend', 'GPU'] for train. choices in ['Ascend', 'GPU', 'CPU'] for export."
+device_target: "device target. choices in ['Ascend', 'GPU'] for train. choices in ['Ascend', 'GPU', 'CPU'] for export."
+
+# convert weight option
+input_file: "input file path."
+output_file: "output file path."
--- a/model_zoo/official/cv/yolov3_darknet53/src/convert_weight.py
+++ b/model_zoo/official/cv/yolov3_darknet53/src/convert_weight.py
@ -14,12 +14,12 @@
 # ============================================================================
 """Convert weight to mindspore ckpt."""
 import os
-import argparse
 import numpy as np
 from mindspore.train.serialization import save_checkpoint
 from mindspore import Tensor

 from src.yolo import YOLOV3DarkNet53
+from model_utils.config import config

 def load_weight(weights_file):
    """Loads pre-trained weights."""
@ -72,9 +72,4 @@ def convert(weights_file, output_file):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="yolov3 weight convert.")
-    parser.add_argument("--input_file", type=str, default="./darknet53.conv.74", help="input file path.")
-    parser.add_argument("--output_file", type=str, default="./backbone_darknet53.ckpt", help="output file path.")
-    args_opt = parser.parse_args()
-
-    convert(args_opt.input_file, args_opt.output_file)
+    convert(config.input_file, config.output_file)
--- a/model_zoo/research/cv/FaceDetection/README.md
+++ b/model_zoo/research/cv/FaceDetection/README.md
@ -83,10 +83,16 @@ We use about 13K images as training dataset and 3K as evaluating dataset in this

 The entire code structure is as following:

-```python
+```text
 .
 └─ Face Detection
  ├─ README.md
+  ├─ model_utils
+    ├─ __init__.py                          # init file
+    ├─ config.py                            # Parse arguments
+    ├─ device_adapter.py                    # Device adapter for ModelArts
+    ├─ local_adapter.py                     # Local adapter
+    └─ moxing_adapter.py                    # Moxing adapter for ModelArts
  ├─ scripts
    ├─ run_standalone_train.sh              # launch standalone training(1p) in ascend
    ├─ run_distribute_train.sh              # launch distributed training(8p) in ascend
@ -98,7 +104,6 @@ The entire code structure is as following:
      ├─ yolo_loss.py                       # loss function
      ├─ yolo_postprocess.py                # post process
      └─ yolov3.py                          # network
-    ├─ config.py                            # parameter configuration
    ├─ data_preprocess.py                   # preprocess
    ├─ logging.py                           # log function
    ├─ lrsche_factory.py                    # generate learning rate
@ -107,6 +112,7 @@ The entire code structure is as following:
    ├─ data_to_mindrecord_train.py          # convert dataset to mindrecord for training
    ├─ data_to_mindrecord_train_append.py   # add dataset to an existed mindrecord for training
    └─ data_to_mindrecord_eval.py           # convert dataset to mindrecord for evaluating
+  ├─ default_config.yaml                    # default configurations
  ├─ train.py                               # training scripts
  ├─ eval.py                                # evaluation scripts
  └─ export.py                              # export air model
@ -158,20 +164,84 @@ The entire code structure is as following:
    bash run_distribute_train.sh /home/train.mindrecord ./rank_table_8p.json /home/a.ckpt
    ```

-*Distribute mode doesn't support running on CPU*. You will get the loss value of each step as following in "./output/[TIME]/[TIME].log" or "./scripts/device0/train.log":
+    *Distribute mode doesn't support running on CPU*. You will get the loss value of each step as following in "./scripts/device0/output/[TIME]/[TIME].log" or "./scripts/device0/train.log":

-```python
-rank[0], iter[0], loss[318555.8], overflow:False, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[1], loss[95394.28], overflow:True, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[2], loss[81332.92], overflow:True, loss_scale:512.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[3], loss[27250.805], overflow:True, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-...
+    ```python
+    rank[0], iter[0], loss[318555.8], overflow:False, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[1], loss[95394.28], overflow:True, loss_scale:1024.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[2], loss[81332.92], overflow:True, loss_scale:512.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[3], loss[27250.805], overflow:True, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    ...
+    rank[0], iter[62496], loss[2218.6282], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[62497], loss[3788.5146], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[62498], loss[3427.5479], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    rank[0], iter[62499], loss[4294.194], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
+    ```

-rank[0], iter[62496], loss[2218.6282], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[62497], loss[3788.5146], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[62498], loss[3427.5479], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-rank[0], iter[62499], loss[4294.194], overflow:False, loss_scale:256.0, lr:6.24999984211172e-06, batch_images:(64, 3, 448, 768), batch_labels:(64, 200, 6)
-```
+- Train on [ModelArts](https://support.huaweicloud.com/modelarts/)
+
+    ```python
+    # Train 8p with Ascend
+    # (1) Perform a or b.
+    #       a. Set "enable_modelarts=True" on base_config.yaml file.
+    #          Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
+    #          (optional)Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
+    #          (optional)Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
+    #          Set other parameters on default_config.yaml file you need.
+    #       b. Add "enable_modelarts=True" on the website UI interface.
+    #          Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on the website UI interface.
+    #          (optional)Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
+    #          (optional)Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
+    #          Add other parameters on the website UI interface.
+    # (3) (optional) Upload or copy your pretrained model to S3 bucket.
+    # (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
+    # (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
+    # (6) Set the startup file to "train.py" on the website UI interface.
+    # (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
+    # (8) Create your job.
+    #
+    # Train 1p with Ascend
+    # (1) Perform a or b.
+    #       a. Set "enable_modelarts=True" on base_config.yaml file.
+    #          Set "run_platform='Ascend'" on default_config.yaml file.
+    #          Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
+    #          (optional)Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
+    #          (optional)Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
+    #          Set other parameters on default_config.yaml file you need.
+    #       b. Add "enable_modelarts=True" on the website UI interface.
+    #          Add "run_platform='Ascend'" on the website UI interface.
+    #          Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on the website UI interface.
+    #          (optional)Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
+    #          (optional)Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
+    #          Add other parameters on the website UI interface.
+    # (3) (optional) Upload or copy your pretrained model to S3 bucket.
+    # (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
+    # (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
+    # (6) Set the startup file to "train.py" on the website UI interface.
+    # (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
+    # (8) Create your job.
+    #
+    # Eval 1p with Ascend
+    # (1) Perform a or b.
+    #       a. Set "enable_modelarts=True" on base_config.yaml file.
+    #          Set "run_platform='Ascend'" on default_config.yaml file.
+    #          Set "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_train/data.mindrecord'" on default_config.yaml file.
+    #          Set "checkpoint_url='s3://dir_to_your_pretrain/'" on default_config.yaml file.
+    #          Set "pretrained='/cache/checkpoint_path/model.ckpt'" on default_config.yaml file.
+    #          Set other parameters on default_config.yaml file you need.
+    #       b. Add "enable_modelarts=True" on the website UI interface.
+    #          Add "run_platform='Ascend'" on the website UI interface.
+    #          Add "mindrecord_path='/cache/data/face_detect_dataset/mindrecord_test/data.mindrecord'" on the website UI interface.
+    #          Add "checkpoint_url='s3://dir_to_your_pretrain/'" on the website UI interface.
+    #          Add "pretrained='/cache/checkpoint_path/model.ckpt'" on the website UI interface.
+    #          Add other parameters on the website UI interface.
+    # (3) Upload or copy your pretrained model to S3 bucket.
+    # (4) Upload a zip dataset to S3 bucket. (you could also upload the origin dataset, but it can be so slow.)
+    # (5) Set the code directory to "/path/FaceDetection" on the website UI interface.
+    # (6) Set the startup file to "eval.py" on the website UI interface.
+    # (7) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface.
+    # (8) Create your job.
+    ```

 ### Evaluation

@ -214,7 +284,7 @@ bash run_export.sh [PLATFORM] [BATCH_SIZE] [USE_DEVICE_ID] [PRETRAINED_BACKBONE]
 | Parameters                 | Face Detection                                              |
 | -------------------------- | ----------------------------------------------------------- |
 | Model Version              | V1                                                          |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8                |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
 | uploaded Date              | 09/30/2020 (month/day/year)                                 |
 | MindSpore Version          | 1.0.0                                                       |
 | Dataset                    | 13K images                                                  |
@ -231,7 +301,7 @@ bash run_export.sh [PLATFORM] [BATCH_SIZE] [USE_DEVICE_ID] [PRETRAINED_BACKBONE]
 | Parameters          | Face Detection              |
 | ------------------- | --------------------------- |
 | Model Version       | V1                          |
-| Resource            | Ascend 910; OS Euler2.8                  |
+| Resource            | Ascend 910; OS Euler2.8     |
 | Uploaded Date       | 09/30/2020 (month/day/year) |
 | MindSpore Version   | 1.0.0                       |
 | Dataset             | 3K images                   |
--- a/model_zoo/research/cv/FaceDetection/default_config.yaml
+++ b/model_zoo/research/cv/FaceDetection/default_config.yaml
@ -0,0 +1,69 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path"
+need_modelarts_dataset_unzip: True
+modelarts_dataset_unzip_name: "face_detect_dataset"
+
+# ==============================================================================
+# train options
+run_platform: "Ascend" # choices in ("Ascend", "CPU")
+mindrecord_path: ""
+pretrained: ""
+use_loss_scale: True
+
+# default options
+batch_size: 64
+warmup_lr: 0.0004
+lr_rates: [0.002, 0.004, 0.002, 0.0008, 0.0004, 0.0002, 0.00008, 0.00004, 0.000004]
+lr_steps: [1000, 10000, 40000, 60000, 80000, 100000, 130000, 160000, 190000]
+gamma: 0.5
+weight_decay: 0.0005
+momentum: 0.5
+max_epoch: 2500
+
+log_interval: 10
+ckpt_path: "../../output"
+ckpt_interval: 1000
+result_path: "../../results"
+
+input_shape: [768, 448]
+jitter: 0.3
+flip: 0.5
+hue: 0.1
+sat: 1.5
+val: 1.5
+num_classes: 1
+anchors: [[3, 4],
+          [5, 6],
+          [7, 9],
+          [10, 13],
+          [15, 19],
+          [21, 26],
+          [28, 36],
+          [38, 49],
+          [54, 71],
+          [77, 102],
+          [122, 162],
+          [207, 268]]
+
+anchors_mask: [[8, 9, 10, 11], [4, 5, 6, 7], [0, 1, 2, 3]]
+
+conf_thresh: 0.1
+nms_thresh: 0.45
+
+---
+
+# Help description for each configuration
+# train options
+run_platform: "run platform, support Ascend and CPU."
+mindrecord_path: "dataset path, e.g. /home/data.mindrecord"
+pretrained: "pretrained model to load"
+local_rank: "current rank to support distributed"
+use_loss_scale: "Whether use dynamic loss scale, default is True."
--- a/model_zoo/research/cv/FaceDetection/eval.py
+++ b/model_zoo/research/cv/FaceDetection/eval.py
@ -14,7 +14,7 @@
 # ============================================================================
 """Face detection eval."""
 import os
-import argparse
+import time
 import matplotlib.pyplot as plt

 from mindspore import context
@ -24,50 +24,104 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.common import dtype as mstype
 import mindspore.dataset as de

-
-
-
 from src.data_preprocess import SingleScaleTrans
-from src.config import config
 from src.FaceDetection.yolov3 import HwYolov3 as backbone_HwYolov3
 from src.FaceDetection import voc_wrapper
 from src.network_define import BuildTestNetwork, get_bounding_boxes, tensor_to_brambox, \
    parse_gt_from_anno, parse_rets, calc_recall_precision_ap

+from model_utils.config import config
+from model_utils.moxing_adapter import moxing_wrapper
+from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
+
+
 plt.switch_backend('agg')

-def parse_args():
-    '''parse_args'''
-    parser = argparse.ArgumentParser('Yolov3 Face Detection')
-    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
-                        help="run platform, support Ascend and CPU.")
-    parser.add_argument('--mindrecord_path', type=str, default='', help='dataset path, e.g. /home/data.mindrecord')
-    parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
-    parser.add_argument('--local_rank', type=int, default=0, help='current rank to support distributed')
-    parser.add_argument('--world_size', type=int, default=1, help='current process number to support distributed')
+def load_pretrain(net, cfg):
+    '''load pretrain model'''
+    if os.path.isfile(cfg.pretrained):
+        param_dict = load_checkpoint(cfg.pretrained)
+        param_dict_new = {}
+        for key, values in param_dict.items():
+            if key.startswith('moments.'):
+                continue
+            elif key.startswith('network.'):
+                param_dict_new[key[8:]] = values
+            else:
+                param_dict_new[key] = values
+        load_param_into_net(net, param_dict_new)
+        print('load model {} success'.format(cfg.pretrained))
+    else:
+        print('load model {} failed, please check the path of model, evaluating end'.format(cfg.pretrained))
+        exit(0)

-    arg, _ = parser.parse_known_args()
+    return net

-    return arg
+def modelarts_pre_process():
+    '''modelarts pre process function.'''
+    def unzip(zip_file, save_dir):
+        import zipfile
+        s_time = time.time()
+        if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
+            zip_isexist = zipfile.is_zipfile(zip_file)
+            if zip_isexist:
+                fz = zipfile.ZipFile(zip_file, 'r')
+                data_num = len(fz.namelist())
+                print("Extract Start...")
+                print("unzip file num: {}".format(data_num))
+                data_print = int(data_num / 100) if data_num > 100 else 1
+                i = 0
+                for file in fz.namelist():
+                    if i % data_print == 0:
+                        print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
+                    i += 1
+                    fz.extract(file, save_dir)
+                print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60),
+                                                     int(int(time.time() - s_time) % 60)))
+                print("Extract Done.")
+            else:
+                print("This is not zip.")
+        else:
+            print("Zip has been extracted.")
+
+    if config.need_modelarts_dataset_unzip:
+        zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
+        save_dir_1 = os.path.join(config.data_path)
+
+        sync_lock = "/tmp/unzip_sync.lock"
+
+        # Each server contains 8 devices as most.
+        if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+            print("Zip file path: ", zip_file_1)
+            print("Unzip file save dir: ", save_dir_1)
+            unzip(zip_file_1, save_dir_1)
+            print("===Finish extract data synchronization===")
+            try:
+                os.mknod(sync_lock)
+            except IOError:
+                pass
+
+        while True:
+            if os.path.exists(sync_lock):
+                break
+            time.sleep(1)
+
+        print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
+
+    config.result_path = os.path.join(config.output_path, "results")


-if __name__ == "__main__":
-    args = parse_args()
-    devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
-    context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
+@moxing_wrapper(pre_process=modelarts_pre_process)
+def run_eval():
+    '''run eval'''
+    config.world_size = get_device_num()
+    config.local_rank = get_rank_id()
+    devid = get_device_id() if config.run_platform != 'CPU' else 0
+    context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)
    print('=============yolov3 start evaluating==================')

-    # logger
-    args.batch_size = config.batch_size
-    args.input_shape = config.input_shape
-    args.result_path = config.result_path
-    args.conf_thresh = config.conf_thresh
-    args.nms_thresh = config.nms_thresh
-
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.STAND_ALONE, device_num=args.world_size,
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.STAND_ALONE, device_num=config.world_size,
                                      gradients_mean=True)
-    mindrecord_path = args.mindrecord_path
-    print('Loading data from {}'.format(mindrecord_path))

    num_classes = config.num_classes
    if num_classes > 1:
@ -84,34 +138,18 @@ if __name__ == "__main__":
    classes = {0: 'face'}

    # dataloader
-    ds = de.MindDataset(mindrecord_path + "0", columns_list=["image", "annotation", "image_name", "image_size"])
+    print('Loading data from {}'.format(config.mindrecord_path))
+    ds = de.MindDataset(config.mindrecord_path + "0", columns_list=["image", "annotation", "image_name", "image_size"])

-    single_scale_trans = SingleScaleTrans(resize=args.input_shape)
-
-    ds = ds.batch(args.batch_size, per_batch_map=single_scale_trans,
+    single_scale_trans = SingleScaleTrans(resize=config.input_shape)
+    ds = ds.batch(config.batch_size, per_batch_map=single_scale_trans,
                  input_columns=["image", "annotation", "image_name", "image_size"], num_parallel_workers=8)

-    args.steps_per_epoch = ds.get_dataset_size()
+    config.steps_per_epoch = ds.get_dataset_size()

    # backbone
-    network = backbone_HwYolov3(num_classes, num_anchors_list, args)
-
-    # load pretrain model
-    if os.path.isfile(args.pretrained):
-        param_dict = load_checkpoint(args.pretrained)
-        param_dict_new = {}
-        for key, values in param_dict.items():
-            if key.startswith('moments.'):
-                continue
-            elif key.startswith('network.'):
-                param_dict_new[key[8:]] = values
-            else:
-                param_dict_new[key] = values
-        load_param_into_net(network, param_dict_new)
-        print('load model {} success'.format(args.pretrained))
-    else:
-        print('load model {} failed, please check the path of model, evaluating end'.format(args.pretrained))
-        exit(0)
+    network = backbone_HwYolov3(num_classes, num_anchors_list, config)
+    network = load_pretrain(network, config)

    ds = ds.repeat(1)

@ -119,30 +157,25 @@ if __name__ == "__main__":
    img_size = {}
    img_anno = {}

-    model_name = args.pretrained.split('/')[-1].replace('.ckpt', '')
-    result_path = os.path.join(args.result_path, model_name)
+    model_name = config.pretrained.split('/')[-1].replace('.ckpt', '')
+    result_path = os.path.join(config.result_path, model_name)
    if os.path.exists(result_path):
        pass
    if not os.path.isdir(result_path):
        os.makedirs(result_path, exist_ok=True)

    # result file
-    ret_files_set = {
-        'face': os.path.join(result_path, 'comp4_det_test_face_rm5050.txt'),
-    }
+    ret_files_set = {'face': os.path.join(result_path, 'comp4_det_test_face_rm5050.txt'),}

    test_net = BuildTestNetwork(network, reduction_0, reduction_1, reduction_2, anchors, anchors_mask, num_classes,
-                                args)
+                                config)

-    print('conf_thresh:', args.conf_thresh)
+    print('conf_thresh:', config.conf_thresh)

    eval_times = 0

    for data in ds.create_tuple_iterator(output_numpy=True):
-        batch_images = data[0]
-        batch_labels = data[1]
-        batch_image_name = data[2]
-        batch_image_size = data[3]
+        batch_images, batch_labels, batch_image_name, batch_image_size = data[0:4]
        eval_times += 1

        img_tensor = Tensor(batch_images, mstype.float32)
@ -153,11 +186,11 @@ if __name__ == "__main__":
        coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2, cls_scores_2 = test_net(img_tensor)

        boxes_0, boxes_1, boxes_2 = get_bounding_boxes(coords_0, cls_scores_0, coords_1, cls_scores_1, coords_2,
-                                                       cls_scores_2, args.conf_thresh, args.input_shape,
+                                                       cls_scores_2, config.conf_thresh, config.input_shape,
                                                       num_classes)

        converted_boxes_0, converted_boxes_1, converted_boxes_2 = tensor_to_brambox(boxes_0, boxes_1, boxes_2,
-                                                                                    args.input_shape, labels)
+                                                                                    config.input_shape, labels)

        tdets.append(converted_boxes_0)
        tdets.append(converted_boxes_1)
@ -175,11 +208,11 @@ if __name__ == "__main__":
        img_anno.update({batch_image_name[k].decode('UTF-8'): v for k, v in enumerate(batch_labels)})

    print('eval times:', eval_times)
-    print('batch size: ', args.batch_size)
+    print('batch size: ', config.batch_size)

-    netw, neth = args.input_shape
+    netw, neth = config.input_shape
    reorg_dets = voc_wrapper.reorg_detection(det, netw, neth, img_size)
-    voc_wrapper.gen_results(reorg_dets, result_path, img_size, args.nms_thresh)
+    voc_wrapper.gen_results(reorg_dets, result_path, img_size, config.nms_thresh)

    # compute mAP
    ground_truth = parse_gt_from_anno(img_anno, classes)
@ -208,3 +241,6 @@ if __name__ == "__main__":
    plt.savefig(ap_save_path)

    print('=============yolov3 evaluating finished==================')
+
+if __name__ == "__main__":
+    run_eval()
--- a/model_zoo/research/cv/FaceDetection/export.py
+++ b/model_zoo/research/cv/FaceDetection/export.py
@ -14,7 +14,6 @@
 # ============================================================================
 """Convert ckpt to air."""
 import os
-import argparse
 import numpy as np

 from mindspore import context
@ -22,22 +21,22 @@ from mindspore import Tensor
 from mindspore.train.serialization import export, load_checkpoint, load_param_into_net

 from src.FaceDetection.yolov3 import HwYolov3 as backbone_HwYolov3
-from src.config import config
+from model_utils.config import config

-def save_air(args):
+def save_air():
    '''save air'''
    print('============= yolov3 start save air ==================')
-    devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
-    context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
+    devid = int(os.getenv('DEVICE_ID', '0')) if config.run_platform != 'CPU' else 0
+    context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)

    num_classes = config.num_classes
    anchors_mask = config.anchors_mask
    num_anchors_list = [len(x) for x in anchors_mask]

-    network = backbone_HwYolov3(num_classes, num_anchors_list, args)
+    network = backbone_HwYolov3(num_classes, num_anchors_list, config)

-    if os.path.isfile(args.pretrained):
-        param_dict = load_checkpoint(args.pretrained)
+    if os.path.isfile(config.pretrained):
+        param_dict = load_checkpoint(config.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
@ -47,23 +46,16 @@ def save_air(args):
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
-        print('load model {} success'.format(args.pretrained))
+        print('load model {} success'.format(config.pretrained))

-        input_data = np.random.uniform(low=0, high=1.0, size=(args.batch_size, 3, 448, 768)).astype(np.float32)
+        input_data = np.random.uniform(low=0, high=1.0, size=(config.batch_size, 3, 448, 768)).astype(np.float32)

        tensor_input_data = Tensor(input_data)
        export(network, tensor_input_data,
-               file_name=args.pretrained.replace('.ckpt', '_' + str(args.batch_size) + 'b.air'), file_format='AIR')
+               file_name=config.pretrained.replace('.ckpt', '_' + str(config.batch_size) + 'b.air'), file_format='AIR')

        print("export model success.")


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Convert ckpt to air')
-    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
-                        help="run platform, support Ascend and CPU.")
-    parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
-    parser.add_argument('--batch_size', type=int, default=8, help='batch size')
-
-    arg = parser.parse_args()
-    save_air(arg)
+    save_air()
--- a/model_zoo/research/cv/FaceDetection/model_utils/init.py
+++ b/model_zoo/research/cv/FaceDetection/model_utils/init.py
--- a/model_zoo/research/cv/FaceDetection/model_utils/config.py
+++ b/model_zoo/research/cv/FaceDetection/model_utils/config.py
@ -0,0 +1,126 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Parse arguments"""
+
+import os
+import ast
+import argparse
+from pprint import pformat
+import yaml
+
+class Config:
+    """
+    Configuration namespace. Convert dictionary to members.
+    """
+    def __init__(self, cfg_dict):
+        for k, v in cfg_dict.items():
+            if isinstance(v, (list, tuple)):
+                setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
+            else:
+                setattr(self, k, Config(v) if isinstance(v, dict) else v)
+
+    def __str__(self):
+        return pformat(self.__dict__)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
+    """
+    Parse command line arguments to the configuration according to the default yaml.
+
+    Args:
+        parser: Parent parser.
+        cfg: Base configuration.
+        helper: Helper description.
+        cfg_path: Path to the default yaml config.
+    """
+    parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
+                                     parents=[parser])
+    helper = {} if helper is None else helper
+    choices = {} if choices is None else choices
+    for item in cfg:
+        if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
+            help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
+            choice = choices[item] if item in choices else None
+            if isinstance(cfg[item], bool):
+                parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
+                                    help=help_description)
+            else:
+                parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
+                                    help=help_description)
+    args = parser.parse_args()
+    return args
+
+
+def parse_yaml(yaml_path):
+    """
+    Parse the yaml config file.
+
+    Args:
+        yaml_path: Path to the yaml config.
+    """
+    with open(yaml_path, 'r') as fin:
+        try:
+            cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
+            cfgs = [x for x in cfgs]
+            if len(cfgs) == 1:
+                cfg_helper = {}
+                cfg = cfgs[0]
+                cfg_choices = {}
+            elif len(cfgs) == 2:
+                cfg, cfg_helper = cfgs
+                cfg_choices = {}
+            elif len(cfgs) == 3:
+                cfg, cfg_helper, cfg_choices = cfgs
+            else:
+                raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
+            print(cfg_helper)
+        except:
+            raise ValueError("Failed to parse yaml")
+    return cfg, cfg_helper, cfg_choices
+
+
+def merge(args, cfg):
+    """
+    Merge the base config from yaml file and command line arguments.
+
+    Args:
+        args: Command line arguments.
+        cfg: Base configuration.
+    """
+    args_var = vars(args)
+    for item in args_var:
+        cfg[item] = args_var[item]
+    return cfg
+
+
+def get_config():
+    """
+    Get Config according to the yaml file and cli arguments.
+    """
+    parser = argparse.ArgumentParser(description="default name", add_help=False)
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
+                        help="Config file path")
+    path_args, _ = parser.parse_known_args()
+    default, helper, choices = parse_yaml(path_args.config_path)
+    args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
+    final_config = merge(args, default)
+    return Config(final_config)
+
+config = get_config()
--- a/model_zoo/research/cv/FaceDetection/model_utils/device_adapter.py
+++ b/model_zoo/research/cv/FaceDetection/model_utils/device_adapter.py
@ -0,0 +1,27 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Device adapter for ModelArts"""
+
+from .config import config
+
+if config.enable_modelarts:
+    from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
+else:
+    from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
+
+__all__ = [
+    "get_device_id", "get_device_num", "get_rank_id", "get_job_id"
+]
--- a/model_zoo/research/cv/FaceDetection/model_utils/local_adapter.py
+++ b/model_zoo/research/cv/FaceDetection/model_utils/local_adapter.py
@ -0,0 +1,36 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Local adapter"""
+
+import os
+
+def get_device_id():
+    device_id = os.getenv('DEVICE_ID', '0')
+    return int(device_id)
+
+
+def get_device_num():
+    device_num = os.getenv('RANK_SIZE', '1')
+    return int(device_num)
+
+
+def get_rank_id():
+    global_rank_id = os.getenv('RANK_ID', '0')
+    return int(global_rank_id)
+
+
+def get_job_id():
+    return "Local Job"
--- a/model_zoo/research/cv/FaceDetection/model_utils/moxing_adapter.py
+++ b/model_zoo/research/cv/FaceDetection/model_utils/moxing_adapter.py
@ -0,0 +1,116 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Moxing adapter for ModelArts"""
+
+import os
+import functools
+from mindspore import context
+from .config import config
+
+_global_sync_count = 0
+
+def get_device_id():
+    device_id = os.getenv('DEVICE_ID', '0')
+    return int(device_id)
+
+
+def get_device_num():
+    device_num = os.getenv('RANK_SIZE', '1')
+    return int(device_num)
+
+
+def get_rank_id():
+    global_rank_id = os.getenv('RANK_ID', '0')
+    return int(global_rank_id)
+
+
+def get_job_id():
+    job_id = os.getenv('JOB_ID')
+    job_id = job_id if job_id != "" else "default"
+    return job_id
+
+def sync_data(from_path, to_path):
+    """
+    Download data from remote obs to local directory if the first url is remote url and the second one is local path
+    Upload data from local directory to remote obs in contrast.
+    """
+    import moxing as mox
+    import time
+    global _global_sync_count
+    sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
+    _global_sync_count += 1
+
+    # Each server contains 8 devices as most.
+    if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+        print("from path: ", from_path)
+        print("to path: ", to_path)
+        mox.file.copy_parallel(from_path, to_path)
+        print("===finish data synchronization===")
+        try:
+            os.mknod(sync_lock)
+        except IOError:
+            pass
+        print("===save flag===")
+
+    while True:
+        if os.path.exists(sync_lock):
+            break
+        time.sleep(1)
+
+    print("Finish sync data from {} to {}.".format(from_path, to_path))
+
+
+def moxing_wrapper(pre_process=None, post_process=None):
+    """
+    Moxing wrapper to download dataset and upload outputs.
+    """
+    def wrapper(run_func):
+        @functools.wraps(run_func)
+        def wrapped_func(*args, **kwargs):
+            # Download data from data_url
+            if config.enable_modelarts:
+                if config.data_url:
+                    sync_data(config.data_url, config.data_path)
+                    print("Dataset downloaded: ", os.listdir(config.data_path))
+                if config.checkpoint_url:
+                    sync_data(config.checkpoint_url, config.load_path)
+                    print("Preload downloaded: ", os.listdir(config.load_path))
+                if config.train_url:
+                    sync_data(config.train_url, config.output_path)
+                    print("Workspace downloaded: ", os.listdir(config.output_path))
+
+                context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
+                config.device_num = get_device_num()
+                config.device_id = get_device_id()
+                if not os.path.exists(config.output_path):
+                    os.makedirs(config.output_path)
+
+                if pre_process:
+                    pre_process()
+
+            # Run the main function
+            run_func(*args, **kwargs)
+
+            # Upload data to train_url
+            if config.enable_modelarts:
+                if post_process:
+                    post_process()
+
+                if config.train_url:
+                    print("Start to copy output directory")
+                    sync_data(config.output_path, config.train_url)
+        return wrapped_func
+    return wrapper
--- a/model_zoo/research/cv/FaceDetection/scripts/run_eval.sh
+++ b/model_zoo/research/cv/FaceDetection/scripts/run_eval.sh
@ -60,10 +60,10 @@ echo $PRETRAINED_BACKBONE

 echo 'start evaluating'
 export RANK_ID=0
-rm -rf ${current_exec_path}/device$USE_DEVICE_ID
+rm -rf ${current_exec_path}/eval
 echo 'start device '$USE_DEVICE_ID
-mkdir ${current_exec_path}/device$USE_DEVICE_ID
-cd ${current_exec_path}/device$USE_DEVICE_ID  || exit
+mkdir ${current_exec_path}/eval
+cd ${current_exec_path}/eval  || exit
 dev=`expr $USE_DEVICE_ID + 0`
 export DEVICE_ID=$dev
 python ${dirname_path}/${SCRIPT_NAME} \
--- a/model_zoo/research/cv/FaceDetection/scripts/run_standalone_train.sh
+++ b/model_zoo/research/cv/FaceDetection/scripts/run_standalone_train.sh
@ -73,7 +73,6 @@ dev=`expr $USE_DEVICE_ID + 0`
 export DEVICE_ID=$dev
 python ${dirname_path}/${SCRIPT_NAME} \
    --run_platform=$PLATFORM \
-    --world_size=1 \
    --mindrecord_path=$MINDRECORD_FILE \
    --pretrained=$PRETRAINED_BACKBONE > train.log  2>&1 &

--- a/model_zoo/research/cv/FaceDetection/src/config.py
+++ b/model_zoo/research/cv/FaceDetection/src/config.py
@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===========================================================================
-"""Network config setting, will be used in train.py and eval.py"""
-from easydict import EasyDict as ed
-
-config = ed({
-    'batch_size': 64,
-    'warmup_lr': 0.0004,
-    'lr_rates': [0.002, 0.004, 0.002, 0.0008, 0.0004, 0.0002, 0.00008, 0.00004, 0.000004],
-    'lr_steps': [1000, 10000, 40000, 60000, 80000, 100000, 130000, 160000, 190000],
-    'gamma': 0.5,
-    'weight_decay': 0.0005,
-    'momentum': 0.5,
-    'max_epoch': 2500,
-
-    'log_interval': 10,
-    'ckpt_path': '../../output',
-    'ckpt_interval': 1000,
-    'result_path': '../../results',
-
-    'input_shape': [768, 448],
-    'jitter': 0.3,
-    'flip': 0.5,
-    'hue': 0.1,
-    'sat': 1.5,
-    'val': 1.5,
-    'num_classes': 1,
-    'anchors': [
-        [3, 4],
-        [5, 6],
-        [7, 9],
-        [10, 13],
-        [15, 19],
-        [21, 26],
-        [28, 36],
-        [38, 49],
-        [54, 71],
-        [77, 102],
-        [122, 162],
-        [207, 268],
-    ],
-    'anchors_mask': [(8, 9, 10, 11), (4, 5, 6, 7), (0, 1, 2, 3)],
-
-    'conf_thresh': 0.1,
-    'nms_thresh': 0.45,
-})
--- a/model_zoo/research/cv/FaceDetection/src/data_preprocess.py
+++ b/model_zoo/research/cv/FaceDetection/src/data_preprocess.py
@ -19,7 +19,7 @@ import mindspore.dataset.vision.py_transforms as P
 import mindspore.dataset as de

 from src.transforms import RandomCropLetterbox, RandomFlip, HSVShift, ResizeLetterbox
-from src.config import config
+from model_utils.config import config


 class SingleScaleTrans:
--- a/model_zoo/research/cv/FaceDetection/train.py
+++ b/model_zoo/research/cv/FaceDetection/train.py
@ -14,16 +14,14 @@
 # ============================================================================
 """Face detection train."""
 import os
-import ast
 import time
 import datetime
-import argparse
 import numpy as np

 from mindspore import context
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
 from mindspore import Tensor
-from mindspore.communication.management import init, get_rank, get_group_size
+from mindspore.communication.management import init
 from mindspore.context import ParallelMode
 from mindspore.train.callback import ModelCheckpoint, RunContext
 from mindspore.train.callback import _InternalCallbackParam, CheckpointConfig
@ -31,75 +29,104 @@ from mindspore.common import dtype as mstype

 from src.logging import get_logger
 from src.data_preprocess import create_dataset
-from src.config import config
 from src.network_define import define_network

-def parse_args():
-    '''parse_args'''
-    parser = argparse.ArgumentParser('Yolov3 Face Detection')
-    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend", "CPU"),
-                        help="run platform, support Ascend and CPU.")
-    parser.add_argument('--mindrecord_path', type=str, default='', help='dataset path, e.g. /home/data.mindrecord')
-    parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
-    parser.add_argument('--local_rank', type=int, default=0, help='current rank to support distributed')
-    parser.add_argument('--world_size', type=int, default=8, help='current process number to support distributed')
-    parser.add_argument("--use_loss_scale", type=ast.literal_eval, default=True,
-                        help="Whether use dynamic loss scale, default is True.")
-
-    args, _ = parser.parse_known_args()
-    args.batch_size = config.batch_size
-    args.warmup_lr = config.warmup_lr
-    args.lr_rates = config.lr_rates
-    if args.run_platform == "CPU":
-        args.use_loss_scale = False
-        args.world_size = 1
-        args.local_rank = 0
-    if args.world_size != 8:
-        args.lr_steps = [i * 8 // args.world_size for i in config.lr_steps]
-    else:
-        args.lr_steps = config.lr_steps
-    args.gamma = config.gamma
-    args.weight_decay = config.weight_decay if args.world_size != 1 else 0.
-    args.momentum = config.momentum
-    args.max_epoch = config.max_epoch
-    args.log_interval = config.log_interval
-    args.ckpt_path = config.ckpt_path
-    args.ckpt_interval = config.ckpt_interval
-    args.outputs_dir = os.path.join(args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
-    print('args.outputs_dir', args.outputs_dir)
-    args.num_classes = config.num_classes
-    args.anchors = config.anchors
-    args.anchors_mask = config.anchors_mask
-    args.num_anchors_list = [len(x) for x in args.anchors_mask]
-    return args
+from model_utils.config import config
+from model_utils.moxing_adapter import moxing_wrapper
+from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id


-def train(args):
+def modelarts_pre_process():
+    '''modelarts pre process function.'''
+    def unzip(zip_file, save_dir):
+        import zipfile
+        s_time = time.time()
+        if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
+            zip_isexist = zipfile.is_zipfile(zip_file)
+            if zip_isexist:
+                fz = zipfile.ZipFile(zip_file, 'r')
+                data_num = len(fz.namelist())
+                print("Extract Start...")
+                print("unzip file num: {}".format(data_num))
+                data_print = int(data_num / 100) if data_num > 100 else 1
+                i = 0
+                for file in fz.namelist():
+                    if i % data_print == 0:
+                        print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
+                    i += 1
+                    fz.extract(file, save_dir)
+                print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60),
+                                                     int(int(time.time() - s_time) % 60)))
+                print("Extract Done.")
+            else:
+                print("This is not zip.")
+        else:
+            print("Zip has been extracted.")
+
+    if config.need_modelarts_dataset_unzip:
+        zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
+        save_dir_1 = os.path.join(config.data_path)
+
+        sync_lock = "/tmp/unzip_sync.lock"
+
+        # Each server contains 8 devices as most.
+        if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+            print("Zip file path: ", zip_file_1)
+            print("Unzip file save dir: ", save_dir_1)
+            unzip(zip_file_1, save_dir_1)
+            print("===Finish extract data synchronization===")
+            try:
+                os.mknod(sync_lock)
+            except IOError:
+                pass
+
+        while True:
+            if os.path.exists(sync_lock):
+                break
+            time.sleep(1)
+
+        print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
+
+    config.ckpt_path = os.path.join(config.output_path, "output")
+
+
+@moxing_wrapper(pre_process=modelarts_pre_process)
+def run_train():
    '''train'''
+    config.world_size = get_device_num()
+    config.local_rank = get_rank_id()
+    if config.run_platform == "CPU":
+        config.use_loss_scale = False
+        config.world_size = 1
+        config.local_rank = 0
+    if config.world_size != 8:
+        config.lr_steps = [i * 8 // config.world_size for i in config.lr_steps]
+    config.weight_decay = config.weight_decay if config.world_size != 1 else 0.
+    config.outputs_dir = os.path.join(config.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
+    print('config.outputs_dir', config.outputs_dir)
+    config.num_anchors_list = [len(x) for x in config.anchors_mask]
    print('=============yolov3 start trainging==================')
-    devid = int(os.getenv('DEVICE_ID', '0')) if args.run_platform != 'CPU' else 0
-    context.set_context(mode=context.GRAPH_MODE, device_target=args.run_platform, save_graphs=False, device_id=devid)
+    devid = int(os.getenv('DEVICE_ID', '0')) if config.run_platform != 'CPU' else 0
+    context.set_context(mode=context.GRAPH_MODE, device_target=config.run_platform, save_graphs=False, device_id=devid)
    # init distributed
-    if args.world_size != 1:
+    if config.world_size != 1:
        init()
-        args.local_rank = get_rank()
-        args.world_size = get_group_size()
-        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=args.world_size,
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=config.world_size,
                                          gradients_mean=True)
-    args.logger = get_logger(args.outputs_dir, args.local_rank)
+    config.logger = get_logger(config.outputs_dir, config.local_rank)

    # dataloader
-    ds = create_dataset(args)
+    ds = create_dataset(config)

-    args.logger.important_info('start create network')
+    config.logger.important_info('start create network')
    create_network_start = time.time()

-    train_net = define_network(args)
+    train_net = define_network(config)

    # checkpoint
-    ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
-    train_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
-    ckpt_cb = ModelCheckpoint(config=train_config, directory=args.outputs_dir, prefix='{}'.format(args.local_rank))
+    ckpt_max_num = config.max_epoch * config.steps_per_epoch // config.ckpt_interval
+    train_config = CheckpointConfig(save_checkpoint_steps=config.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
+    ckpt_cb = ModelCheckpoint(config=train_config, directory=config.outputs_dir, prefix='{}'.format(config.local_rank))
    cb_params = _InternalCallbackParam()
    cb_params.train_network = train_net
    cb_params.epoch_num = ckpt_max_num
@ -112,7 +139,7 @@ def train(args):
    t_epoch = time.time()
    old_progress = -1
    i = 0
-    if args.use_loss_scale:
+    if config.use_loss_scale:
        scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000)
    for data in ds.create_tuple_iterator(output_numpy=True):
        batch_images = data[0]
@ -120,7 +147,7 @@ def train(args):
        input_list = [Tensor(batch_images, mstype.float32)]
        for idx in range(2, 26):
            input_list.append(Tensor(data[idx], mstype.float32))
-        if args.use_loss_scale:
+        if config.use_loss_scale:
            scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32)
            loss0, overflow, _ = train_net(*input_list, scaling_sens)
            overflow = np.all(overflow.asnumpy())
@ -128,50 +155,49 @@ def train(args):
                scale_manager.update_loss_scale(overflow)
            else:
                scale_manager.update_loss_scale(False)
-            args.logger.info('rank[{}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
-                             'batch_labels:{}'.format(args.local_rank, i, loss0, overflow, scaling_sens, args.lr[i],
-                                                      batch_images.shape, batch_labels.shape))
+            config.logger.info('rank[{:d}], iter[{}], loss[{}], overflow:{}, loss_scale:{}, lr:{}, batch_images:{}, '
+                               'batch_labels:{}'.format(config.local_rank, i, loss0, overflow, scaling_sens,
+                                                        config.lr[i], batch_images.shape, batch_labels.shape))
        else:
            loss0 = train_net(*input_list)
-            args.logger.info('rank[{}], iter[{}], loss[{}], lr:{}, batch_images:{}, '
-                             'batch_labels:{}'.format(args.local_rank, i, loss0, args.lr[i],
-                                                      batch_images.shape, batch_labels.shape))
+            config.logger.info('rank[{:d}], iter[{}], loss[{}], lr:{}, batch_images:{}, '
+                               'batch_labels:{}'.format(config.local_rank, i, loss0,
+                                                        config.lr[i], batch_images.shape, batch_labels.shape))
        # save ckpt
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
-        if args.local_rank == 0:
+        if config.local_rank == 0:
            ckpt_cb.step_end(run_context)

        # save Log
        if i == 0:
            time_for_graph_compile = time.time() - create_network_start
-            args.logger.important_info('Yolov3, graph compile time={:.2f}s'.format(time_for_graph_compile))
+            config.logger.important_info('Yolov3, graph compile time={:.2f}s'.format(time_for_graph_compile))

-        if i % args.steps_per_epoch == 0:
+        if i % config.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

-        if i % args.log_interval == 0 and args.local_rank == 0:
+        if i % config.log_interval == 0 and config.local_rank == 0:
            time_used = time.time() - t_end
-            epoch = int(i / args.steps_per_epoch)
-            fps = args.batch_size * (i - old_progress) * args.world_size / time_used
-            args.logger.info('epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(epoch, i, loss0, fps))
+            epoch = int(i / config.steps_per_epoch)
+            fps = config.batch_size * (i - old_progress) * config.world_size / time_used
+            config.logger.info('epoch[{}], iter[{}], loss:[{}], {:.2f} imgs/sec'.format(epoch, i, loss0, fps))
            t_end = time.time()
            old_progress = i

-        if i % args.steps_per_epoch == 0 and args.local_rank == 0:
+        if i % config.steps_per_epoch == 0 and config.local_rank == 0:
            epoch_time_used = time.time() - t_epoch
-            epoch = int(i / args.steps_per_epoch)
-            fps = args.batch_size * args.world_size * args.steps_per_epoch / epoch_time_used
-            args.logger.info('=================================================')
-            args.logger.info('epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(epoch, i, fps))
-            args.logger.info('=================================================')
+            epoch = int(i / config.steps_per_epoch)
+            fps = config.batch_size * config.world_size * config.steps_per_epoch / epoch_time_used
+            config.logger.info('=================================================')
+            config.logger.info('epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(epoch, i, fps))
+            config.logger.info('=================================================')
            t_epoch = time.time()

        i = i + 1

-    args.logger.info('=============yolov3 training finished==================')
+    config.logger.info('=============yolov3 training finished==================')


 if __name__ == "__main__":
-    arg = parse_args()
-    train(arg)
+    run_train()
--- a/tests/st/model_zoo_tests/face_detection/test_FaceDetection_WIDER.py
+++ b/tests/st/model_zoo_tests/face_detection/test_FaceDetection_WIDER.py
@ -28,9 +28,9 @@ def test_FaceDetection_WIDER():
    model_name = "FaceDetection"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, model_name)
-    old_list = ["'max_epoch': 2500,"]
-    new_list = ["'max_epoch': 1,"]
-    utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "src/config.py"))
+    old_list = ["max_epoch: 2500"]
+    new_list = ["max_epoch: 1"]
+    utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "default_config.yaml"))
    dataset_path = os.path.join(utils.data_root, "widerface/mindrecord_train/data.mindrecord")
    device_id = int(os.environ.get("DEVICE_ID", "0"))
    model_train_command = "cd {}/scripts;sh run_standalone_train.sh Ascend {} {}"\