From 17111f72f2802c4945f0645967c45925c5f21d16 Mon Sep 17 00:00:00 2001
From: markuskunej <markus.kunej@huawei.com>
Date: Tue, 6 Jul 2021 16:40:41 +0000
Subject: [PATCH] added gpu support for centerface

---
 model_zoo/official/cv/centerface/README.md    | 473 ++++++++++++------
 .../cv/centerface/dependency/evaluate/eval.py |   2 +-
 .../official/cv/centerface/scripts/test.sh    |  61 ++-
 .../cv/centerface/scripts/test_and_eval.sh    |  62 ++-
 .../cv/centerface/scripts/test_distribute.sh  |  70 +--
 .../scripts/train_distribute_gpu.sh           | 132 +++++
 .../scripts/train_standalone_gpu.sh           | 146 ++++++
 .../official/cv/centerface/src/centerface.py  |  57 ++-
 model_zoo/official/cv/centerface/test.py      |   9 +-
 model_zoo/official/cv/centerface/train.py     |  37 +-
 10 files changed, 775 insertions(+), 274 deletions(-)
 create mode 100644 model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
 create mode 100644 model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh

diff --git a/model_zoo/official/cv/centerface/README.md b/model_zoo/official/cv/centerface/README.md
index bffb5673b1f..45983af34ef 100644
--- a/model_zoo/official/cv/centerface/README.md
+++ b/model_zoo/official/cv/centerface/README.md
@@ -84,8 +84,8 @@ other datasets need to use the same format as WiderFace.
 
 # [Environment Requirements](#contents)
 
-- Hardware（Ascend）
-    - Prepare hardware environment with Ascend processor.
+- Hardware（Ascend/GPU）
+    - Prepare hardware environment with Ascend or GPU processor.
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
@@ -105,7 +105,7 @@ step1: prepare pretrained model: train a mobilenet_v2 model by mindspore or use
 #        The key/cell/module name must as follow, otherwise you need to modify "name_map" function:
 #            --mindspore: as the same as mobilenet_v2_key.ckpt
 #            --pytorch: same as official pytorch model(e.g., official mobilenet_v2-b0353104.pth)
-python convert_weight_centerface.py --ckpt_fn=./mobilenet_v2_key.ckpt --pt_fn=./mobilenet_v2-b0353104.pth --out_ckpt_fn=./mobilenet_v2.ckpt
+python convert_weight_mobilenetv2.py --ckpt_fn=./mobilenet_v2_key.ckpt --pt_fn=./mobilenet_v2-b0353104.pth --out_ckpt_fn=./mobilenet_v2.ckpt
 ```
 
 step2: prepare dataset  
@@ -116,7 +116,7 @@ step2: prepare dataset
 
 &emsp;3)download training annotations from [annotations](https://pan.baidu.com/s/1j_2wggZ3bvCuOAfZvjWqTg).  password: **f9hh**
 
-step3: prepare user rank_table
+step3 (ASCEND ONLY): prepare user rank_table
 
 ```python
 # user can use your own rank table file
@@ -137,13 +137,25 @@ ls ./dataset/centerface/annotations/train.json # annot_path
 ls ./dataset/centerface/images/train/images # img_dir
 ```
 
-```python
-# enter script dir, train CenterFace
-sh train_distribute.sh
-# after training
-mkdir ./model
-cp device0/outputs/*/*.ckpt ./model # cp model to [MODEL_PATH]
-```
+- Train on Ascend
+
+    ```python
+    # enter script dir, train CenterFace
+    sh train_distribute.sh
+    # after training
+    mkdir ./model
+    cp device0/output/*/*.ckpt ./model # cp model to [MODEL_PATH]
+    ```
+
+- Train on GPU
+
+    ```python
+    # enter script dir, train CenterFace
+    sh train_distribute_gpu.sh
+    # after training
+    mkdir ./model
+    cp train_distribute_gpu/output/*/*.ckpt ./model # cp model to [MODEL_PATH]
+    ```
 
 step5: test
 
@@ -163,10 +175,19 @@ ls ./dataset/images/val/images/ # data path
 ls ./dataset/centerface/ground_truth/val.mat # annot_path
 ```
 
-```python
-# test CenterFace
-sh test_distribute.sh
-```
+- Test on Ascend
+
+    ```python
+    # test CenterFace
+    sh test_distribute.sh
+    ```
+
+- Test on GPU
+
+    ```bash
+    # test CenterFace
+    bash test_distribute GPU
+    ```
 
 step6: eval
 
@@ -304,7 +325,9 @@ sh eval_all.sh [ground_truth_path]
         │   ├──test_distribute.sh        // testing a range of models
         │   ├──test_and_eval.sh          // test then evaluate a single model
         │   ├──train_standalone.sh       // train in ascend with single npu
+        │   ├──train_standalone_gpu.sh   // train on GPU with single npu
         │   ├──train_distribute.sh       // train in ascend with multi npu
+        │   ├──train_distribute_gpu.sh   // train on GPU with multi npu
         ├── src
         │   ├──__init__.py
         │   ├──centerface.py             // centerface networks, training entry
@@ -320,7 +343,7 @@ sh eval_all.sh [ground_truth_path]
         |      ├──config.py              // Processing configuration parameters
         |      ├──device_adapter.py      // Get cloud ID
         |      ├──local_adapter.py       // Get local ID
-        |     └ ──moxing_adapter.py      // Parameter processing
+        |      ├──moxing_adapter.py      // Parameter processing
         └── dependency                   // third party codes: MIT License
             ├──extd                      // training dependency: data augmentation
             │   ├──utils
@@ -371,6 +394,7 @@ sh eval_all.sh [ground_truth_path]
     --data_dir: data dir
     --annot_path: annotations path
     --img_dir: img dir in data_dir
+    --device_target: device where the code will be implemented. Options are "Ascend" or "GPU". (default: Ascend)
     ```
 
 2. centerface unique configs: in config.py; not recommend user to change
@@ -395,6 +419,7 @@ sh eval_all.sh [ground_truth_path]
     # detail can be found in "test.py"
     # if ckpt is specified not need below 4 parameter
     --device_num: training device number
+    --device_target: device where the code will be implemented. Options are "Ascend" or "GPU". (default: Ascend)
     --steps_per_epoch: steps for each epoch
     --start: start loop number, used to calculate first epoch number
     --end: end loop number, used to calculate last epoch number
@@ -414,82 +439,152 @@ Major parameters eval.py as follows:
 
 ### Training
 
-'task_set' is important for multi-npu train to get higher speed
---task_set: 0, not task_set; 1 task_set;
---task_set_core: task_set core number, most time = cpu number/nproc_per_node
+- Running on Ascend
 
-step1: user need train a mobilenet_v2 model by mindspore or use the script below:
+    'task_set' is important for multi-npu train to get higher speed
+    --task_set: 0, not task_set; 1 task_set;
+    --task_set_core: task_set core number, most time = cpu number/nproc_per_node
 
-```python
-python torch_to_ms_centerface.py --ckpt_fn=./mobilenet_v2_key.ckpt --pt_fn=./mobilenet_v2-b0353104.pth --out_ckpt_fn=./mobilenet_v2.ckpt
-```
+    step1: user need train a mobilenet_v2 model by mindspore or use the script below:
 
-step2: prepare user rank_table
+    ```python
+    python torch_to_ms_mobilenetv2.py --ckpt_fn=./mobilenet_v2_key.ckpt --pt_fn=./mobilenet_v2-b0353104.pth --out_ckpt_fn=./mobilenet_v2.ckpt
+    ```
 
-```python
-# user can use your own rank table file
-# or use the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate rank table file
-# e.g., python hccl_tools.py --device_num "[0,8)"
-python hccl_tools.py --device_num "[0,8)"
-```
+    step2: prepare user rank_table
 
-step3: train
+    ```python
+    # user can use your own rank table file
+    # or use the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate rank table file
+    # e.g., python hccl_tools.py --device_num "[0,8)"
+    python hccl_tools.py --device_num "[0,8)"
+    ```
 
-- Single device
+    step3: train
 
-```python
-# enter script dir, train CenterFace
-cd scripts
-# you need to change the parameter in train_standalone.sh
-# or use symbolic link as quick start
-# or use the command as follow:
-#   USE_DEVICE_ID: your device
-#   PRETRAINED_BACKBONE: your pretrained model path
-#   DATASET: dataset path
-#   ANNOTATIONS: annotation path
-#   images: img_dir in dataset path
-sh train_standalone.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
-# after training
-cp device0/outputs/*/*.ckpt [MODEL_PATH]
-```
+    - Single device
 
-- multi-device (recommended)
+    ```python
+    # enter script dir, train CenterFace
+    cd scripts
+    # you need to change the parameter in train_standalone.sh
+    # or use symbolic link as quick start
+    # or use the command as follow:
+    #   USE_DEVICE_ID: your device
+    #   PRETRAINED_BACKBONE: your pretrained model path
+    #   DATASET: dataset path
+    #   ANNOTATIONS: annotation path
+    #   images: img_dir in dataset path
+    sh train_standalone.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp device0/output/*/*.ckpt [MODEL_PATH]
+    ```
 
-```python
-# enter script dir, train CenterFace
-cd scripts;
-# you need to change the parameter in train_distribute.sh
-# or use symbolic link as quick start
-# or use the command as follow, most are the same as train_standalone.sh, the different is RANK_TABLE
-#   RANK_TABLE: for multi-device only, from generate_rank_table.py or user writing
-sh train_distribute.sh [RANK_TABLE] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
-# after training
-cp device0/outputs/*/*.ckpt [MODEL_PATH]
-```
+    - Multi-device (recommended)
 
-After training with 8 device, the loss value will be achieved as follows:
+    ```python
+    # enter script dir, train CenterFace
+    cd scripts;
+    # you need to change the parameter in train_distribute.sh
+    # or use symbolic link as quick start
+    # or use the command as follow, most are the same as train_standalone.sh, the different is RANK_TABLE
+    #   RANK_TABLE: for multi-device only, from generate_rank_table.py or user writing
+    sh train_distribute.sh [RANK_TABLE] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp device0/output/*/*.ckpt [MODEL_PATH]
+    ```
 
-```python
-# grep "loss is " device0/xxx.log
-# epoch: 1 step: 1, loss is greater than 500 and less than 5000
-2020-09-24 19:00:53,550:INFO:epoch:1, iter:0, average_loss:loss:1148.415649, loss:1148.4156494140625, overflow:False, loss_scale:1024.0
-[WARNING] DEBUG(51499,python):2020-09-24-19:00:53.590.008 [mindspore/ccsrc/debug/dump_proto.cc:218] SetValueToProto] Unsupported type UInt
-2020-09-24 19:00:53,784:INFO:epoch:1, iter:1, average_loss:loss:798.286713, loss:448.15777587890625, overflow:False, loss_scale:1024.0
-...
-2020-09-24 19:01:58,095:INFO:epoch:2, iter:197, average_loss:loss:1.942609, loss:1.5492267608642578, overflow:False, loss_scale:1024.0
-2020-09-24 19:01:58,501:INFO:epoch[2], loss:1.942609, 477.97 imgs/sec, lr:0.004000000189989805
-2020-09-24 19:01:58,502:INFO:==========end epoch===============
-2020-09-24 19:02:00,780:INFO:epoch:3, iter:0, average_loss:loss:2.107658, loss:2.1076583862304688, overflow:False, loss_scale:1024.0
-...
-# epoch: 140 average loss is greater than 0.3 and less than 1.5:
-2020-09-24 20:19:16,255:INFO:epoch:140, iter:196, average_loss:loss:0.906300, loss:1.1071504354476929, overflow:False, loss_scale:1024.0
-2020-09-24 20:19:16,347:INFO:epoch:140, iter:197, average_loss:loss:0.904684, loss:0.586264967918396, overflow:False, loss_scale:1024.0
-2020-09-24 20:19:16,747:INFO:epoch[140], loss:0.904684, 480.10 imgs/sec, lr:3.9999998989515007e-05
-2020-09-24 20:19:16,748:INFO:==========end epoch===============
-2020-09-24 20:19:16,748:INFO:==========end training===============
-```
+    After training with 8 device, the loss value will be achieved as follows:
 
-The model checkpoint will be saved in the scripts/device0/output/xxx/xxx.ckpt
+    ```python
+    # grep "loss:" device0/xxx.log
+    #
+    # epoch: 1 step: 1, loss is greater than 500 and less than 5000
+    2020-09-24 19:00:53,550:INFO:epoch:1, iter:0, average_loss:loss:1148.415649, loss:1148.4156494140625, overflow:False, loss_scale:1024.0
+    [WARNING] DEBUG(51499,python):2020-09-24-19:00:53.590.008 [mindspore/ccsrc/debug/dump_proto.cc:218] SetValueToProto] Unsupported type UInt
+    2020-09-24 19:00:53,784:INFO:epoch:1, iter:1, average_loss:loss:798.286713, loss:448.15777587890625, overflow:False, loss_scale:1024.0
+    ...
+    2020-09-24 19:01:58,095:INFO:epoch:2, iter:197, average_loss:loss:1.942609, loss:1.5492267608642578, overflow:False, loss_scale:1024.0
+    2020-09-24 19:01:58,501:INFO:epoch[2], loss:1.942609, 477.97 imgs/sec, lr:0.004000000189989805
+    2020-09-24 19:01:58,502:INFO:==========end epoch===============
+    2020-09-24 19:02:00,780:INFO:epoch:3, iter:0, average_loss:loss:2.107658, loss:2.1076583862304688, overflow:False, loss_scale:1024.0
+    ...
+    # epoch: 140 average loss is greater than 0.3 and less than 1.5:
+    2020-09-24 20:19:16,255:INFO:epoch:140, iter:196, average_loss:loss:0.906300, loss:1.1071504354476929, overflow:False, loss_scale:1024.0
+    2020-09-24 20:19:16,347:INFO:epoch:140, iter:197, average_loss:loss:0.904684, loss:0.586264967918396, overflow:False, loss_scale:1024.0
+    2020-09-24 20:19:16,747:INFO:epoch[140], loss:0.904684, 480.10 imgs/sec, lr:3.9999998989515007e-05
+    2020-09-24 20:19:16,748:INFO:==========end epoch===============
+    2020-09-24 20:19:16,748:INFO:==========end training===============
+    ```
+
+    The model checkpoint will be saved in scripts/device0/output/xxx/xxx.ckpt
+
+- Running on GPU
+
+    'task_set' is important for multi-npu train to get higher speed
+    --task_set: 0, not task_set; 1 task_set;
+    --task_set_core: task_set core number, most time = cpu number/nproc_per_node
+
+    step1: user need train a mobilenet_v2 model by mindspore or use the script below:
+
+    ```python
+    python torch_to_ms_mobilenetv2.py --ckpt_fn=./mobilenet_v2_key.ckpt --pt_fn=./mobilenet_v2-b0353104.pth --out_ckpt_fn=./mobilenet_v2.ckpt
+    ```
+
+    step2: train
+
+    - Single device
+
+    ```python
+    # enter script dir, train CenterFace
+    cd scripts
+    # you need to change the parameter in train_standalone_gpu.sh
+    # or use symbolic link as quick start
+    # or use the command as follow:
+    #   USE_DEVICE_ID: your device
+    #   PRETRAINED_BACKBONE: your pretrained model path
+    #   DATASET: dataset path
+    #   ANNOTATIONS: annotation path
+    #   images: img_dir in dataset path
+    sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp train_standalone_gpu/output/*/*.ckpt [MODEL_PATH]
+    ```
+
+    - Multi-device (recommended)
+
+    ```python
+    # enter script dir, train CenterFace
+    cd scripts;
+    # you need to change the parameter in train_distribute_gpu.sh
+    # or use symbolic link as quick start
+    # or use the command as follow, most are the same as train_standalone_gpu.sh, the different is DEVICE_NUM
+    #   DEVICE_NUM: for multi-device only, number of devices
+    sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp train_distribute_gpu/output/*/*.ckpt [MODEL_PATH]
+    ```
+
+    After training with 8 device, the loss value will be achieved as follows:
+
+    ```python
+    # grep "loss:" train_distribute_gpu/xxx.log
+    #
+    # epoch: 1 step: 1, loss is greater than 500 and less than 5000
+    2021-07-06 16:00:45,375:INFO:epoch:1, iter:0, avg_loss:loss:1271.834595, loss:1271.8345947265625, overflow:False, loss_scale:1024.0
+    [WARNING] ME(50115:139631687231296,_GeneratorWorkerMp-42):2021-07-06-16:00:45.499.845 [mindspore/dataset/engine/queue.py:99] Using shared memory queue, but rowsize is larger than allocated memory max_rowsize 6291456 current rowwize 9550848
+    2021-07-06 16:00:45,600:INFO:epoch:1, iter:1, avg_loss:loss:1017.134613, loss:762.4346313476562, overflow:False, loss_scale:1024.0
+    ...
+    2021-07-06 16:01:42,710:INFO:epoch:2, iter:197, avg_loss:loss:1.906899, loss:1.6912976503372192, overflow:False, loss_scale:1024.0
+    2021-07-06 16:01:42,869:INFO:epoch[2], loss:1.906899, 442.33 imgs/sec, lr:0.004000000189989805
+    2021-07-06 16:01:42,985:INFO:epoch:3, iter:0, avg_loss:loss:1.804715, loss:1.804714560508728, overflow:False, loss_scale:1024.0
+    ...
+    # epoch: 140 average loss is greater than 0.3 and less than 1.5:
+    2021-07-06 17:02:39,750:INFO:epoch:140, iter:196, avg_loss:loss:0.870886, loss:0.7947260141372681, overflow:False, loss_scale:1024.0
+    2021-07-06 17:02:39,869:INFO:epoch:140, iter:197, avg_loss:loss:0.872917, loss:1.2730457782745361, overflow:False, loss_scale:1024.0
+    2021-07-06 17:02:40,005:INFO:epoch[140], loss:0.872917, 529.03 imgs/sec, lr:3.9999998989515007e-05
+    2021-07-06 17:02:41,273:INFO:==========end training===============
+    ```
 
 ## [Testing Process](#contents)
 
@@ -511,27 +606,29 @@ mkdir [SAVE_PATH]
     # you need to change the parameter in test.sh
     # or use symbolic link as quick start
     # or use the command as follow:
+    #   DEVICE_TARGET: device where the code will be implemented. Either Ascend or GPU (default: Ascend)
     #   MODEL_PATH: ckpt path saved during training
     #   DATASET: img dir
     #   GROUND_TRUTH_MAT: ground_truth file, mat type
     #   SAVE_PATH: save_path for evaluate
     #   DEVICE_ID: use device id
     #   CKPT: test model name
-    sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]
+    sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]
     ```
 
 2. test many out ckpt for user to choose the best one
 
-```python
-# you need to change the parameter in test.sh
-# or use symbolic link as quick start
-# or use the command as follow, most are the same as test.sh, the different are:
-#   DEVICE_NUM: training device number
-#   STEPS_PER_EPOCH: steps for each epoch
-#   START: start loop number, used to calculate first epoch number
-#   END: end loop number, used to calculate last epoch number
-sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]
-```
+    ```python
+    # you need to change the parameter in test.sh
+    # or use symbolic link as quick start
+    # or use the command as follow, most are the same as test.sh, the different are:
+    #   DEVICE_TARGET: device where the code will be implemented. Either Ascend or GPU (default: Ascend)
+    #   DEVICE_NUM: training device number
+    #   STEPS_PER_EPOCH: steps for each epoch
+    #   START: start loop number, used to calculate first epoch number
+    #   END: end loop number, used to calculate last epoch number
+    sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]
+    ```
 
 After testing, you can find many txt file save the box information and scores,
 open it you can see:
@@ -572,57 +669,107 @@ cd ../../../scripts;
 
 3. test+eval
 
-```python
-# you need to change the parameter in test_and_eval.sh
-# or use symbolic link as quick start, default eval the ckpt saved in ./scripts/output/centerface/999
-# or use the command as follow, most are the same as test.sh, the different are:
-#   GROUND_TRUTH_PATH: ground truth path
-sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [CKPT] [GROUND_TRUTH_PATH]
-```
+    ```python
+    # you need to change the parameter in test_and_eval.sh
+    # or use symbolic link as quick start, default eval the ckpt saved in ./scripts/output/centerface/999
+    # or use the command as follow, most are the same as test.sh, the different are:
+    #   GROUND_TRUTH_PATH: ground truth path
+    sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [CKPT] [GROUND_TRUTH_PATH]
+    ```
 
-you can see the MAP below by eval.sh
+- Running on Ascend
 
-```log
-(ci3.7) [root@bms-aiserver scripts]# ./eval.sh ./ground_truth_path
-start eval
-==================== Results = ==================== ./scripts/output/centerface/999
-Easy   Val AP: 0.923914407045363
-Medium Val AP: 0.9166100571371586
-Hard   Val AP: 0.7810750535799462
-=================================================
-end eval
-```
+    you can see the MAP below by eval.sh
 
-you can see the MAP below by eval_all.sh
+    ```log
+    (ci3.7) [root@bms-aiserver scripts]# ./eval.sh
+    start eval
+    ==================== Results = ==================== ./scripts/output/centerface/999
+    Easy   Val AP: 0.923914407045363
+    Medium Val AP: 0.9166100571371586
+    Hard   Val AP: 0.7810750535799462
+    =================================================
+    end eval
+    ```
 
-```log
-(ci3.7) [root@bms-aiserver scripts]# ./eval_all.sh ./ground_truth_path
-==================== Results = ==================== ./scripts/output/centerface/89
-Easy   Val AP: 0.8884892849068273
-Medium Val AP: 0.8928813452811216
-Hard   Val AP: 0.7721131614294564
-=================================================
-==================== Results = ==================== ./scripts/output/centerface/90
-Easy   Val AP: 0.8836073914165545
-Medium Val AP: 0.8875938506473486
-Hard   Val AP: 0.775956751740446
-...
-==================== Results = ==================== ./scripts/output/centerface/125
-Easy   Val AP: 0.923914407045363
-Medium Val AP: 0.9166100571371586
-Hard   Val AP: 0.7810750535799462
-=================================================
-==================== Results = ==================== ./scripts/output/centerface/126
-Easy   Val AP: 0.9218741197149122
-Medium Val AP: 0.9151860193570651
-Hard   Val AP: 0.7825645670331809
-...
-==================== Results = ==================== ./scripts/output/centerface/140
-Easy   Val AP: 0.9250715236965638
-Medium Val AP: 0.9170429723233877
-Hard   Val AP: 0.7822182013830674
-=================================================
-```
+    you can see the MAP below by eval_all.sh
+
+    ```log
+    (ci3.7) [root@bms-aiserver scripts]# ./eval_all.sh
+    ==================== Results = ==================== ./scripts/output/centerface/89
+    Easy   Val AP: 0.8884892849068273
+    Medium Val AP: 0.8928813452811216
+    Hard   Val AP: 0.7721131614294564
+    =================================================
+    ==================== Results = ==================== ./scripts/output/centerface/90
+    Easy   Val AP: 0.8836073914165545
+    Medium Val AP: 0.8875938506473486
+    Hard   Val AP: 0.775956751740446
+    ...
+    ==================== Results = ==================== ./scripts/output/centerface/125
+    Easy   Val AP: 0.923914407045363
+    Medium Val AP: 0.9166100571371586
+    Hard   Val AP: 0.7810750535799462
+    =================================================
+    ==================== Results = ==================== ./scripts/output/centerface/126
+    Easy   Val AP: 0.9218741197149122
+    Medium Val AP: 0.9151860193570651
+    Hard   Val AP: 0.7825645670331809
+    ...
+    ==================== Results = ==================== ./scripts/output/centerface/140
+    Easy   Val AP: 0.9250715236965638
+    Medium Val AP: 0.9170429723233877
+    Hard   Val AP: 0.7822182013830674
+    =================================================
+    ```
+
+- Running on GPU
+
+    you can see the MAP below from eval.sh
+
+    ```log
+    (markus) rescue@distrubuteddata13: ./scripts$ bash eval.sh
+    start eval
+    ==================== Results = ==================== ./scripts/output/centerface/140
+    Easy   Val AP: 0.9240708943779239
+    Medium Val AP: 0.9193106635436091
+    Hard   Val AP: 0.7777030480280428
+    =================================================
+    end eval
+    ```
+
+    you can see the MAP below from eval_all.sh
+
+    ```log
+    (markus) rescue@distrubuteddata13: ./scripts$ bash eval_all.sh
+    ==================== Results = ==================== ./scripts/output/centerface/89
+    Easy   Val AP: 0.9138417914429035
+    Medium Val AP: 0.9052437122819539
+    Hard   Val AP: 0.7705692348147004
+    =================================================
+    ==================== Results = ==================== ./scripts/output/centerface/90
+    Easy   Val AP: 0.8820974959531916
+    Medium Val AP: 0.8902186098138436
+    Hard   Val AP: 0.7655257898032033
+    =================================================
+    ...
+    ==================== Results = ==================== /home/rescue/markus/markus_repo/mindspore/model_zoo/official/cv/centerface/scripts/output/centerface/125
+    Easy   Val AP: 0.9240525949727452
+    Medium Val AP: 0.9180645371016661
+    Hard   Val AP: 0.782047346778918
+    =================================================
+    ==================== Results = ==================== /home/rescue/markus/markus_repo/mindspore/model_zoo/official/cv/centerface/scripts/output/centerface/126
+    Easy   Val AP: 0.9199560196120761
+    Medium Val AP: 0.9157462777329638
+    Hard   Val AP: 0.7814679399942209
+    =================================================
+    ...
+    ==================== Results = ==================== /home/rescue/markus/markus_repo/mindspore/model_zoo/official/cv/centerface/scripts/output/centerface/140
+    Easy   Val AP: 0.9240708943779239
+    Medium Val AP: 0.9193106635436091
+    Hard   Val AP: 0.7777030480280428
+    =================================================
+    ```
 
 ## [Inference process](#contents)
 
@@ -678,36 +825,36 @@ Hard   Val AP: 0.776737419299741
 
 CenterFace on 13K images(The annotation and data format must be the same as widerFace)
 
-| Parameters                 | CenterFace                                                  |
-| -------------------------- | ----------------------------------------------------------- |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             |
-| uploaded Date              | 10/29/2020 (month/day/year)                                 |
-| MindSpore Version          | 1.0.0                                                 |
-| Dataset                    | 13K images                                                  |
-| Training Parameters        | epoch=140, steps=198 * epoch, batch_size = 8, lr=0.004      |
-| Optimizer                  | Adam                                                        |
-| Loss Function              | Focal Loss, L1 Loss, Smooth L1 Loss                         |
-| outputs                    | heatmaps                                                    |
-| Loss                       | 0.3-1.5, average loss for last epoch is in 0.8-1.0          |
-| Speed                      | 1p 65 img/s, 8p 475 img/s                                   |
-| Total time                 | train(8p) 1.1h, test 50min, eval 5-10min                    |
-| Checkpoint for Fine tuning | 22M (.ckpt file)                                            |
-| Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/centerface> |
+| Parameters                 | Ascend                                                      | GPU                                      |
+| -------------------------- | ----------------------------------------------------------- | -----------------------------------------|
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5 |
+| uploaded Date              | 10/29/2020 (month/day/year)                                 | 7/9/2021 (month/day/year) |
+| MindSpore Version          | 1.0.0                                                 | 1.3.0 |
+| Dataset                    | 13K images                                                  | 13K images |
+| Training Parameters        | epoch=140, steps=198 * epoch, batch_size = 8, lr=0.004      | epoch=140, steps=198 * epoch, batch_size = 8, lr=0.004 |
+| Optimizer                  | Adam                                                        | Adam |
+| Loss Function              | Focal Loss, L1 Loss, Smooth L1 Loss                         | Focal Loss, L1 Loss, Smooth L1 Loss  |
+| outputs                    | heatmaps                                                    | heatmaps |
+| Loss                       | 0.3-1.5, average loss for last epoch is in 0.8-1.0          | iter loss for last epoch 0.3-3.3, average loss for last epoch is in 0.75-1.05 |
+| Speed                      | 1p 65 img/s, 8p 475 img/s                                   | 1gpu 80 img/s, 8gpu 480 img/s |
+| Total time                 | train(8p) 1.1h, test 50min, eval 5-10min                    | train(8gpu) 1.0h, test 35 min, eval 5-10min |
+| Checkpoint for Fine tuning | 22M (.ckpt file)                                            | 23M (.ckpt file) |
+| Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/centerface> | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/centerface> |
 
 ### Inference Performance
 
 CenterFace on 3.2K images(The annotation and data format must be the same as widerFace)
 
-| Parameters                 | CenterFace                                                  |
-| -------------------------- | ----------------------------------------------------------- |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             |
-| uploaded Date              | 10/29/2020 (month/day/year)                                 |
-| MindSpore Version          | 1.0.0                                               |
-| Dataset                    | 3.2K images                                                 |
-| batch_size                 | 1                                                           |
-| outputs                    | box position and sorces, and probability                    |
-| Accuracy                   | Easy 92.2%  Medium 91.5% Hard 78.2% (+-0.5%)                |
-| Model for inference        | 22M (.ckpt file)                                            |
+| Parameters                 | Ascend                                                      | GPU                                        |
+| -------------------------- | ----------------------------------------------------------- | ------------------------------------------ |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5 |
+| uploaded Date              | 10/29/2020 (month/day/year)                                 | 7/9/2021 (month/day/year) |
+| MindSpore Version          | 1.0.0                                               | 1.3.0
+| Dataset                    | 3.2K images                                                 | 3.2K images |
+| batch_size                 | 1                                                           | 1 |
+| outputs                    | box position and scores, and probability                    | box position and scores, and probability |
+| Accuracy                   | Easy 92.2%  Medium 91.5% Hard 78.2% (+-0.5%)                | Easy 92.4%  Medium 91.9%  Hard 77.8% (+-0.5%) |
+| Model for inference        | 22M (.ckpt file)                                            | 23M (.ckpt file) |
 
 ### 310Inference Performance
 
diff --git a/model_zoo/official/cv/centerface/dependency/evaluate/eval.py b/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
index b565cce4028..031aa1497b5 100644
--- a/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
+++ b/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
@@ -39,7 +39,7 @@ from bbox import bbox_overlaps
 def get_gt_boxes(gt_dir):
     """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
 
-    gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat')) # you own ground_truth name
+    gt_mat = loadmat(os.path.join(gt_dir, 'val.mat')) # you own ground_truth name
     hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
     medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
     easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
diff --git a/model_zoo/official/cv/centerface/scripts/test.sh b/model_zoo/official/cv/centerface/scripts/test.sh
index 4d623bd608f..ee719554631 100644
--- a/model_zoo/official/cv/centerface/scripts/test.sh
+++ b/model_zoo/official/cv/centerface/scripts/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,14 +14,15 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 6 ]
+if [ $# -gt 7 ]
 then
-    echo "Usage: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test.sh [MODEL_PATH]"
+    echo "Usage: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test.sh [DEVICE_TARGET]"
     echo "   or: sh test.sh "
 exit 1
 fi
@@ -50,32 +51,43 @@ dataset_root=$root/dataset
 dataset_path=$dataset_root/centerface/images/val/images/
 ground_truth_mat=$dataset_root/centerface/ground_truth/val.mat
 save_path=$root/output/centerface/
+device_target="Ascend"
 device_id=0
-ckpt="0-125_24750.ckpt" # the model saved for epoch=125
+ckpt="0-140_221620.ckpt" # the model saved for epoch=140
 
-if [ $# == 1 ]
+if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
+    if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
-    if [ ! -f $dataset_path ]
+    dataset_path=$(get_real_path $3)
+    if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -83,24 +95,24 @@ then
     fi
 fi
 
-if [ $# == 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
-    if [ ! -f $save_path ]
+    save_path=$(get_real_path $5)
+    if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 5 ]
+if [ $# -ge 6 ]
 then
-    device_id=$5
+    device_id=$6
 fi
 
-if [ $# == 6 ]
+if [ $# == 7 ]
 then
-    ckpt=$6
+    ckpt=$7
 fi
 
 echo $model_path
@@ -126,6 +138,7 @@ python ${dirname_path}/${SCRIPT_NAME} \
     --ground_truth_mat=$ground_truth_mat \
     --save_dir=$save_path \
     --rank=$device_id \
+    --device_target=$device_target \
     --ckpt_name=$ckpt > test.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/test_and_eval.sh b/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
index 6a6e1ea4f34..e52e0a59fae 100644
--- a/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
+++ b/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,15 +14,16 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 6 ]
+if [ $# -gt 8 ]
 then
-    echo "Usage: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT] [GROUND_TRUTH_PATH]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH]"
+    echo "Usage: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT] [GROUND_TRUTH_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET]"
     echo "   or: sh test_and_eval.sh "
 exit 1
 fi
@@ -51,14 +52,24 @@ dataset_root=$root/dataset
 dataset_path=$dataset_root/centerface/images/val/images/
 ground_truth_mat=$dataset_root/centerface/ground_truth/val.mat
 save_path=$root/output/centerface/999
+device_target="Ascend"
 device_id=0
-ckpt="0-125_24750.ckpt" # the model saved for epoch=125
+ckpt="0-140_221620.ckpt" # the model saved for epoch=125
 ground_truth_path=$root/dataset/centerface/ground_truth
 
 if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-#    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
     if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a dir"
@@ -66,9 +77,9 @@ then
     fi
 fi
 
-if [ $# -ge 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
     if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a dir"
@@ -76,9 +87,9 @@ then
     fi
 fi
 
-if [ $# -ge 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -86,9 +97,9 @@ then
     fi
 fi
 
-if [ $# -ge 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
+    save_path=$(get_real_path $5)
     if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a dir"
@@ -96,19 +107,19 @@ then
     fi
 fi
 
-if [ $# -ge 5 ]
-then
-    device_id=$5
-fi
-
 if [ $# -ge 6 ]
 then
-    ckpt=$6
+    device_id=$6
 fi
 
 if [ $# -ge 7 ]
 then
-    ground_truth_path=$(get_real_path $7)
+    ckpt=$7
+fi
+
+if [ $# == 8 ]
+then
+    ground_truth_path=$(get_real_path $8)
     if [ ! -f $ground_truth_path ]
     then
         echo "error: ground_truth_path=$ground_truth_path is not a file"
@@ -142,6 +153,7 @@ python ${dirname_path}/${SCRIPT_NAME} \
     --rank=$device_id \
     --ckpt_name=$ckpt \
     --eval=1 \
+    --device_target=$device_target \
     --ground_truth_path=$ground_truth_path > test.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/test_distribute.sh b/model_zoo/official/cv/centerface/scripts/test_distribute.sh
index 3cfc82934e4..d14c84df6c8 100644
--- a/model_zoo/official/cv/centerface/scripts/test_distribute.sh
+++ b/model_zoo/official/cv/centerface/scripts/test_distribute.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,18 +14,19 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 8 ]
+if [ $# -gt 9 ]
 then
-    echo "Usage: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH]"
+    echo "Usage: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET]"
     echo "   or: sh test_distribute.sh "
 exit 1
 fi
@@ -58,6 +59,7 @@ save_path=$root/output/centerface/
 # model/ckpt name is "0-" + str(ckpt_num) + "_" + str(198*ckpt_num) + ".ckpt";
 # ckpt_num is epoch number, can be calculated by device_num
 # detail can be found in "test.py"
+device_target="Ascend"
 device_num=8
 steps_per_epoch=198 #198 for 8P; 1583 for 1p
 start=11 # start epoch number = start * device_num + min(device_phy_id) + 1
@@ -65,8 +67,17 @@ end=18 # end epoch number = end * device_num + max(device_phy_id) + 1
 
 if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-#    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
     if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a dir"
@@ -74,9 +85,9 @@ then
     fi
 fi
 
-if [ $# -ge 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
     if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a dir"
@@ -84,9 +95,9 @@ then
     fi
 fi
 
-if [ $# -ge 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -94,9 +105,9 @@ then
     fi
 fi
 
-if [ $# -ge 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
+    save_path=$(get_real_path $5)
     if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a dir"
@@ -104,24 +115,24 @@ then
     fi
 fi
 
-if [ $# -ge 5 ]
-then
-    device_num=$5
-fi
-
 if [ $# -ge 6 ]
 then
-    steps_per_epoch=$6
+    device_num=$6
 fi
 
 if [ $# -ge 7 ]
 then
-    start=$7
+    steps_per_epoch=$7
 fi
 
-if [ $# == 8 ]
+if [ $# -ge 8 ]
 then
-    end=$8
+    start=$8
+fi
+
+if [ $# == 9 ]
+then
+    end=$9
 fi
 
 echo $model_path
@@ -150,6 +161,7 @@ do
         --save_dir=$save_path \
         --rank=$i \
         --device_num=$device_num \
+        --device_target=$device_target \
         --steps_per_epoch=$steps_per_epoch \
         --start=$start \
         --end=$end > test.log  2>&1 &
diff --git a/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh b/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
new file mode 100644
index 00000000000..c8b626de9ee
--- /dev/null
+++ b/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 0 ] && [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] && [ $# != 4 ] && [ $# != 5 ]
+then
+    echo "Usage: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM]"
+    echo "   or: sh train_distribute_gpu.sh "
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+current_exec_path=$(pwd)
+echo ${current_exec_path}
+
+dirname_path=$(dirname "$(pwd)")
+echo ${dirname_path}
+
+rm -rf ${current_exec_path}/train_distribute_gpu
+SCRIPT_NAME='train.py'
+
+ulimit -c unlimited
+
+root=${current_exec_path} # your script path
+pretrained_backbone=${dirname_path}/mobilenet_v2.ckpt # or mobilenet_v2-b0353104.ckpt
+dataset_path=$root/dataset/centerface
+annot_path=$dataset_path/annotations/train.json
+img_dir=$dataset_path/images/train/images
+num_devices=8
+
+if [ $# == 1 ]
+then
+    num_devices=$1
+fi
+
+if [ $# == 2 ]
+then
+    pretrained_backbone=$(get_real_path $2)
+    if [ ! -f $pretrained_backbone ]
+    then
+        echo "error: pretrained_backbone=$pretrained_backbone is not a file"
+    exit 1
+    fi
+fi
+
+if [ $# == 3 ]
+then
+    dataset_path=$(get_real_path $3)
+    if [ ! -f $dataset_path ]
+    then
+        echo "error: dataset_path=$dataset_path is not a file"
+    exit 1
+    fi
+fi
+
+if [ $# == 4 ]
+then
+    annot_path=$(get_real_path $4)
+    if [ ! -f $annot_path ]
+    then
+        echo "error: annot_path=$annot_path is not a file"
+    exit 1
+    fi
+fi
+
+if [ $# == 5 ]
+then
+    img_dir=$(get_real_path $5)
+    if [ ! -f $img_dir ]
+    then
+        echo "error: img_dir=$img_dir is not a file"
+    exit 1
+    fi
+fi
+
+echo $pretrained_backbone
+echo $dataset_path
+echo $annot_path
+echo $img_dir
+
+export PYTHONPATH=${dirname_path}:$PYTHONPATH
+export RANK_SIZE=$num_devices
+export DEVICE_ID=0
+
+echo "start training on $RANK_SIZE devices"
+
+mkdir ${current_exec_path}/train_distribute_gpu
+cd ${current_exec_path}/train_distribute_gpu || exit
+
+mpirun -n $RANK_SIZE \
+    python ${dirname_path}/${SCRIPT_NAME} \
+    --lr=4e-3 \
+    --per_batch_size=8 \
+    --is_distributed=1 \
+    --t_max=140 \
+    --max_epoch=140 \
+    --warmup_epochs=0 \
+    --lr_scheduler=multistep \
+    --lr_epochs=90,120 \
+    --weight_decay=0.0000 \
+    --loss_scale=1024 \
+    --pretrained_backbone=$pretrained_backbone \
+    --data_dir=$dataset_path \
+    --annot_path=$annot_path \
+    --img_dir=$img_dir \
+    --device_target="GPU" > train.log  2>&1 &
+
+
+echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh b/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
new file mode 100644
index 00000000000..6a187d66936
--- /dev/null
+++ b/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 0 ] && [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] && [ $# != 4 ] && [ $# != 5 ]
+then
+    echo "Usage: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID]"
+    echo "   or: sh train_standalone_gpu.sh "
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+current_exec_path=$(pwd)
+echo "current_exec_path: "   ${current_exec_path}
+
+dirname_path=$(dirname "$(pwd)")
+echo "dirname_path: "   ${dirname_path}
+
+SCRIPT_NAME='train.py'
+
+ulimit -c unlimited
+
+root=${current_exec_path} # your script path
+pretrained_backbone=${dirname_path}/mobilenet_v2.ckpt # or mobilenet_v2-b0353104.ckpt
+dataset_path=$root/dataset/centerface
+annot_path=$dataset_path/annotations/train.json
+img_dir=$dataset_path/images/train/images
+use_device_id=0
+
+if [ $# == 1 ]
+then
+    use_device_id=$1
+fi
+
+if [ $# == 2 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+fi
+
+if [ $# == 3 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+fi
+
+if [ $# == 4 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+    annot_path=$(get_real_path $4)
+fi
+
+if [ $# == 5 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+    annot_path=$(get_real_path $4)
+    img_dir=$(get_real_path $5)
+fi
+
+echo "use_device_id: "   $use_device_id
+echo "pretrained_backbone: "   $pretrained_backbone
+echo "dataset_path: "   $dataset_path
+echo "annot_path: "   $annot_path
+echo "img_dir: "   $img_dir
+
+if [ ! -f $pretrained_backbone ]
+then
+    echo "error: pretrained_backbone=$pretrained_backbone is not a file"
+exit 1
+fi
+
+if [ ! -d $dataset_path ]
+then
+    echo "error: dataset_path=$dataset_path is not a directory"
+exit 1
+fi
+
+if [ ! -f $annot_path ]
+then
+    echo "error: annot_path=$annot_path is not a file"
+exit 1
+fi
+
+if [ ! -d $img_dir ]
+then
+    echo "error: img_dir=$img_dir is not a directory"
+exit 1
+fi
+
+export PYTHONPATH=${dirname_path}:$PYTHONPATH
+export RANK_SIZE=1
+
+echo 'start training'
+echo 'start rank '$use_device_id
+rm -rf ${current_exec_path}/train_standalone_gpu
+mkdir ${current_exec_path}/train_standalone_gpu
+cd ${current_exec_path}/train_standalone_gpu || exit
+export RANK_ID=0
+dev=`expr $use_device_id + 0`
+export DEVICE_ID=$dev
+python ${dirname_path}/${SCRIPT_NAME} \
+    --lr=5e-4 \
+    --per_batch_size=8 \
+    --is_distributed=0 \
+    --t_max=140 \
+    --max_epoch=140 \
+    --warmup_epochs=0 \
+    --lr_scheduler=multistep \
+    --lr_epochs=90,120 \
+    --weight_decay=0.0000 \
+    --loss_scale=1024 \
+    --pretrained_backbone=$pretrained_backbone \
+    --data_dir=$dataset_path \
+    --annot_path=$annot_path \
+    --img_dir=$img_dir \
+    --device_target="GPU" > train.log  2>&1 &
+
+echo 'running'
diff --git a/model_zoo/official/cv/centerface/src/centerface.py b/model_zoo/official/cv/centerface/src/centerface.py
index a5b482f0edb..aae19169a39 100644
--- a/model_zoo/official/cv/centerface/src/centerface.py
+++ b/model_zoo/official/cv/centerface/src/centerface.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -39,6 +39,13 @@ reciprocal = P.Reciprocal()
 def tensor_grad_scale(scale, grad):
     return grad * reciprocal(scale)
 
+_grad_overflow = C.MultitypeFuncGraph("_grad_overflow")
+grad_overflow = P.FloatStatus()
+
+@_grad_overflow.register("Tensor")
+def _tensor_grad_overflow(grad):
+    return grad_overflow(grad)
+
 def conv1x1(in_channels, out_channels, stride=1, padding=0, has_bias=False):
     return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, has_bias=has_bias,
                      padding=padding, pad_mode="pad")
@@ -240,9 +247,16 @@ class TrainingWrapper(nn.Cell):
             self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
 
         self.hyper_map = C.HyperMap()
-        self.alloc_status = NPUAllocFloatStatus()
-        self.get_status = NPUGetFloatStatus()
-        self.clear_status = NPUClearFloatStatus()
+        if context.get_context("device_target") == "GPU":
+            self.gpu_target = True
+            self.float_status = P.FloatStatus()
+            self.addn = P.AddN()
+            self.reshape = P.Reshape()
+        else:
+            self.gpu_target = False
+            self.alloc_status = NPUAllocFloatStatus()
+            self.get_status = NPUGetFloatStatus()
+            self.clear_status = NPUClearFloatStatus()
         self.reduce_sum = ReduceSum(keep_dims=False)
         self.base = Tensor(1, mstype.float32)
         self.less_equal = LessEqual()
@@ -257,12 +271,15 @@ class TrainingWrapper(nn.Cell):
         weights = self.weights
         loss = self.network(x, hm, reg_mask, ind, wh, wight_mask, hm_offset, hps_mask, landmarks)
 
-        # init overflow buffer
-        init = self.alloc_status()
-        init = F.depend(init, loss)
-        # clear overflow buffer
-        clear_status = self.clear_status(init)
-        loss = F.depend(loss, clear_status)
+        init = False
+
+        if not self.gpu_target:
+            # init overflow buffer
+            init = self.alloc_status()
+            init = F.depend(init, loss)
+            # clear overflow buffer
+            clear_status = self.clear_status(init)
+            loss = F.depend(loss, clear_status)
 
         #sens = sens_input #P.Fill()(P.DType()(loss), P.Shape()(loss), sens_input) # user can contral loss scale by add a sens_input
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
@@ -272,12 +289,20 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
 
-        # get the overflow buffer
-        init = F.depend(init, grads)
-        get_status = self.get_status(init)
-        init = F.depend(init, get_status)
-        # sum overflow buffer elements, 0:not overflow , >0:overflow
-        flag_sum = self.reduce_sum(init, (0,))
+        if not self.gpu_target:
+            # get the overflow buffer
+            init = F.depend(init, grads)
+
+            get_status = self.get_status(init)
+            init = F.depend(init, get_status)
+            # sum overflow buffer elements, 0:not overflow , >0:overflow
+            flag_sum = self.reduce_sum(init, (0,))
+        else:
+            flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
+            flag_sum = self.addn(flag_sum)
+            # convert flag_sum to scalar
+            flag_sum = self.reshape(flag_sum, (()))
+
         if self.is_distributed:
             # sum overflow flag over devices
             flag_reduce = self.allreduce(flag_sum)
diff --git a/model_zoo/official/cv/centerface/test.py b/model_zoo/official/cv/centerface/test.py
index 47654739e2d..b5635e79cf8 100644
--- a/model_zoo/official/cv/centerface/test.py
+++ b/model_zoo/official/cv/centerface/test.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,8 +34,11 @@ from dependency.centernet.src.lib.detectors.base_detector import CenterFaceDetec
 from dependency.evaluate.eval import evaluation
 
 dev_id = get_device_id()
-context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=False,
-                    device_target="Ascend", save_graphs=False, device_id=dev_id)
+context.set_context(mode=context.GRAPH_MODE,
+                    device_target=config.device_target, save_graphs=False, device_id=dev_id)
+
+if config.device_target == "Ascend":
+    context.set_context(enable_auto_mixed_precision=False)
 
 def modelarts_process():
     config.data_dir = config.data_path
diff --git a/model_zoo/official/cv/centerface/train.py b/model_zoo/official/cv/centerface/train.py
index 74650ce842f..57a7d05603f 100644
--- a/model_zoo/official/cv/centerface/train.py
+++ b/model_zoo/official/cv/centerface/train.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -52,14 +52,18 @@ from src.model_utils.device_adapter import get_device_id
 
 set_seed(1)
 dev_id = get_device_id()
-context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=False,
-                    device_target="Ascend", save_graphs=False, device_id=dev_id, reserve_class_name_in_scope=False)
+context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target,
+                    save_graphs=False, device_id=dev_id, reserve_class_name_in_scope=False)
+
+if config.device_target == "Ascend":
+    context.set_context(enable_auto_mixed_precision=False)
 
 if config.lr_scheduler == 'cosine_annealing' and config.max_epoch > config.t_max:
     config.t_max = config.max_epoch
 
 config.lr_epochs = list(map(int, config.lr_epochs.split(',')))
 
+
 def convert_training_shape(args_):
     """
     Convert training shape
@@ -81,10 +85,12 @@ class InternalCallbackParam(dict):
 def modelarts_pre_process():
     config.ckpt_path = os.path.join(config.output_path, config.ckpt_path)
 
+
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def train_centerface():
     pass
 
+
 if __name__ == "__main__":
     train_centerface()
     print('\ntrain.py config:\n', config)
@@ -103,7 +109,8 @@ if __name__ == "__main__":
         config.rank_save_ckpt_flag = 1
 
     # logger
-    config.outputs_dir = os.path.join(config.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
+    config.outputs_dir = os.path.join(
+        config.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
 
     if config.need_profiler:
         profiler = Profiler(output_path=config.outputs_dir)
@@ -120,14 +127,16 @@ if __name__ == "__main__":
 
     # Notice: parameter_broadcast should be supported, but current version has bugs, thus been disabled.
     # To make sure the init weight on all npu is the same, we need to set a static seed in default_recurisive_init when weight initialization
-    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)
+    context.set_auto_parallel_context(
+        parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)
     network = CenterfaceMobilev2()
     # init, to avoid overflow, some std of weight should be enough small
     default_recurisive_init(network)
 
     if config.pretrained_backbone:
         network = load_backbone(network, config.pretrained_backbone, config)
-        print('load pre-trained backbone {} into network'.format(config.pretrained_backbone))
+        print(
+            'load pre-trained backbone {} into network'.format(config.pretrained_backbone))
     else:
         print('Not load pre-trained backbone, please be careful')
 
@@ -155,9 +164,10 @@ if __name__ == "__main__":
         config.multi_scale = [convert_training_shape(config)]
 
     # data loader
-    data_loader, config.steps_per_epoch = GetDataLoader(per_batch_size=config.per_batch_size, \
-        max_epoch=config.max_epoch, rank=config.rank, group_size=config.group_size, \
-        config=config, split='train')
+    data_loader, config.steps_per_epoch = GetDataLoader(per_batch_size=config.per_batch_size,
+                                                        max_epoch=config.max_epoch, rank=config.rank,
+                                                        group_size=config.group_size,
+                                                        config=config, split='train')
     config.steps_per_epoch = config.steps_per_epoch // config.max_epoch
     print('Finish loading dataset')
 
@@ -238,7 +248,7 @@ if __name__ == "__main__":
         run_context = RunContext(cb_params)
         ckpt_cb.begin(run_context)
 
-        print('config.steps_per_epoch = {} config.ckpt_interval ={}'.format(config.steps_per_epoch, \
+        print('config.steps_per_epoch = {} config.ckpt_interval ={}'.format(config.steps_per_epoch,
                                                                             config.ckpt_interval))
 
     t_end = time.time()
@@ -258,12 +268,13 @@ if __name__ == "__main__":
         hps_mask = Tensor(hps_mask)
         landmarks = Tensor(landmarks)
 
-        loss, overflow, scaling = network(images, hm, reg_mask, ind, wh, wight_mask, hm_offset, hps_mask, landmarks)
+        loss, overflow, scaling = network(
+            images, hm, reg_mask, ind, wh, wight_mask, hm_offset, hps_mask, landmarks)
         # Tensor to numpy
         overflow = np.all(overflow.asnumpy())
         loss = loss.asnumpy()
         loss_meter.update(loss)
-        print('epoch:{}, iter:{}, avg_loss:{}, loss:{}, overflow:{}, loss_scale:{}'.format( \
+        print('epoch:{}, iter:{}, avg_loss:{}, loss:{}, overflow:{}, loss_scale:{}'.format(
             epoch, i, loss_meter, loss, overflow, scaling.asnumpy()))
 
         if config.rank_save_ckpt_flag:
@@ -280,7 +291,7 @@ if __name__ == "__main__":
                 print(
                     'epoch[{}], {}, {:.2f} imgs/sec, lr:{}'
                     .format(epoch, loss_meter, fps, lr[i + (epoch-1)*config.steps_per_epoch])
-                    )
+                )
             t_end = time.time()
             loss_meter.reset()