add gpu scripts to tinydarknet

2021-07-19 15:08:38 +08:00 · 2021-07-19 15:08:38 +08:00 · d6a9848b23
parent 72b6382d5d
commit d6a9848b23
17 changed files with 616 additions and 88 deletions
--- a/model_zoo/official/cv/tinydarknet/README.md
+++ b/model_zoo/official/cv/tinydarknet/README.md
@ -60,8 +60,8 @@ Dataset used can refer to [paper](<https://ieeexplore.ieee.org/abstract/document

 # [Environment Requirements](#contents)

- Hardware（Ascend/CPU）
-    - Prepare hardware environment with Ascend/CPU processor.
+- Hardware（Ascend/CPU/GPU）
+    - Prepare hardware environment with Ascend/CPU processor/GPU.
 - Framework
    - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information,please check the resources below：
@ -93,6 +93,35 @@ After installing MindSpore via the official website, you can start training and

  <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools.>

+- running on GPU with gpu default parameters
+
+  ```python
+  # GPU standalone training example
+  python train.py  \
+  --config_path=./imagenet_config_gpu.yaml \
+  --dataset_name=imagenet --train_data_dir=../dataset/imagenet_original/train --device_target=GPU
+  OR
+  cd scripts
+  bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10 | imagenet]
+
+  # GPU distribute training example
+  export RANK_SIZE=8
+  mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout  \
+  python train.py  \
+  --config_path=./config/imagenet_config_gpu.yaml \
+  --dataset_name=imagenet \
+  --train_data_dir=../dataset/imagenet_original/train \
+  --device_target=GPU
+  OR
+  bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10 | imagenet]
+
+  # GPU evaluation example
+  python eval.py -device_target=GPU --val_data_dir=../dataset/imagenet_original/val --dataset_name=imagenet --config_path=./config/imagenet_config_gpu.yaml \
+  --checkpoint_path=$PATH2
+  OR
+  bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]
+  ```
+
 - Running on ModelArts

  If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows.
@ -155,12 +184,20 @@ For more details, please refer the specify script.
    ├── README.md                       // descriptions about Tiny-Darknet in English
    ├── README_CN.md                    // descriptions about Tiny-Darknet in Chinese
    ├── ascend310_infer                 // application for 310 inference
+    ├── src
+        ├── imagenet_config.yaml        // imagenet parameter configuration
+        ├── imagenet_config_gpu.yaml    // imagenet parameter configuration for GPU
+        ├── cifar10_config.yaml         // cifar10 parameter configuration
+        ├── cifar10_config_gpu.yaml     // cifar10 parameter configuration for GPU
    ├── scripts
        ├── run_standalone_train.sh     // shell script for single on Ascend
+        ├── run_standalone_train_gpu.sh // shell script for single on GPU
        ├── run_distribute_train.sh     // shell script for distributed on Ascend
+        ├── run_distribute_train_gpu.sh // shell script for distributed on GPU
        ├── run_train_cpu.sh            // shell script for distributed on CPU
        ├── run_eval.sh                 // shell script for evaluation on Ascend
        ├── run_eval_cpu.sh             // shell script for evaluation on CPU
+        ├── run_eval_gpu.sh             // shell script for evaluation on GPU
        ├── run_infer_310.sh            // shell script for inference on Ascend310
    ├── src
        ├── lr_scheduler                //learning rate scheduler
@ -179,8 +216,6 @@ For more details, please refer the specify script.
    ├── train.py                        // training script
    ├── eval.py                         //  evaluation script
    ├── export.py                       // export checkpoint file into air/onnx
-    ├── imagenet_config.yaml            // imagenet parameter configuration
-    ├── cifar10_config.yaml             // cifar10 parameter configuration
    ├── mindspore_hub_conf.py           // hub config
    ├── postprocess.py                  // postprocess script

@ -252,6 +287,29 @@ For more configuration details, please refer the script `imagenet_config.yaml`.
  The model checkpoint file will be saved in the current folder.
  <!-- The model checkpoint will be saved in the current directory.  -->

+- running on GPU：
+
+  ```python
+  cd scripts
+  bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]
+  ```
+
+  The command above will run in the background, you can view the results through the file train.log.
+
+  After training, you'll get some checkpoint files under the script folder by default. The loss value will be achieved as follows:
+  <!-- After training, you'll get some checkpoint files under the script folder by default. The loss value will be achieved as follows: -->
+
+  ```python
+  # grep "loss is " train.log
+  epoch: 498 step: 1251, loss is 2.7798953
+  Epoch time: 130690.544, per step time: 104.469
+  epoch: 499 step: 1251, loss is 2.9261637
+  Epoch time: 130511.081, per step time: 104.325
+  epoch: 500 step: 1251, loss is 2.69412
+  Epoch time: 127067.548, per step time: 101.573
+  ...
+  ```
+
 - running on CPU

  ```python
@ -279,6 +337,25 @@ For more configuration details, please refer the script `imagenet_config.yaml`.
  ...
  ```

+- running on GPU：
+
+  ```python
+  bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet]
+  ```
+
+  The above shell script will run distribute training in the background. You can view the results through the file train_parallel[X]/log. The loss value will be achieved as follows:
+
+  ```python
+  # grep "result: " distribute_train_gpu/nohup.out
+  epoch: 498 step: 1251, loss is 2.7825122
+  epoch time: 200066.210 ms, per step time: 159.925 ms
+  epoch: 499 step: 1251, loss is 2.799798
+  epoch time: 199098.258 ms, per step time: 159.151 ms
+  epoch: 500 step: 1251, loss is 2.8718748
+  epoch time: 197784.661 ms, per step time: 158.101 ms
+  ...
+  ```
+
 ## [Evaluation Process](#contents)

 ### [Evaluation](#contents)
@ -307,6 +384,28 @@ For more configuration details, please refer the script `imagenet_config.yaml`.
  accuracy:  {'top_1_accuracy': 0.5871979166666667, 'top_5_accuracy': 0.8175280448717949}
  ```

+- evaluation on Imagenet dataset when running on GPU:
+
+  Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "/username/tinydaeknet/train_tinydarknet.ckpt".
+
+  ```python
+  bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]
+  ```
+
+  The above python command will run in the background. You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows:
+
+  ```python
+  # grep "accuracy: " eval.log
+  accuracy:  {'top_1_accuracy': 0.5896033653846153, 'top_5_accuracy': 0.8176482371794872}
+  ```
+
+  Note that for evaluation after distributed training, please set the checkpoint_path to be the last saved checkpoint file. The accuracy of the test dataset will be as follows:
+
+  ```python
+  # grep "accuracy: " eval.log
+  accuracy:  {'top_1_accuracy': 0.5896033653846153, 'top_5_accuracy': 0.8176482371794872}
+  ```
+
 - evaluation on cifar-10 dataset when running on CPU:

  Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "/username/tinydaeknet/train_tinydarknet.ckpt".
@ -389,34 +488,33 @@ Inference result is saved in current path, you can find result like this in acc.

 ### [Training Performance](#contents)

-| Parameters                 | Ascend                                                      |
-| -------------------------- | ----------------------------------------------------------- |
-| Model Version              | V1                                                |
-| Resource                   | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8               |
-| Uploaded Date              | 2020/12/22                                 |
-| MindSpore Version          | 1.1.0                                                       |
-| Dataset                    | 1200k images                                                |
-| Training Parameters        | epoch=500, steps=1251, batch_size=128, lr=0.1               |
-| Optimizer                  | Momentum                                                    |
-| Loss Function              | Softmax Cross Entropy                                       |
-| Speed                      | 8 pc: 104 ms/step                        |
-| Total Time                 | 8 pc: 17.8 hours                                             |
-| Parameters(M)             | 4.0M                                                        |
-| Scripts                    | [Tiny-Darknet Scripts](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/tinydarknet) |
+| Parameters                        | Ascend                                                      | GPU                                                 |
+| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------|
+| Model Version                   | V1                                                          | V1                                                  |
+| Resource                        | Ascend 910；CPU 2.60GHz，56cores；内存 314G；系统 Euler2.8  | PCIE V100-32G                                    |
+| Uploaded Date                   | 2020/12/22                                                  | 2021/07/15                         |
+| MindSpore Version              | 1.1.0                                                       | 1.3.0                                               |
+| Dataset                     | 1200k images                                               | 1200k images                           |
+| Training Parameters                   | epoch=500, steps=1251, batch_size=128, lr=0.1               | epoch=500, steps=1251, batch_size = 128, lr=0.005   |
+| Optimizer                     | Momentum                                                    | Momentum                                            |
+| Loss Function                   | Softmax Cross Entropy                                       | Softmax Cross Entropy                               |
+| Speed                       | 8pc: 104 ms/step                                            | 8pc: 255 ms/step                                          |
+| Parameters(M)                    | 4.0;                                                        | 4.0;                               |
+| Scripts                       | [Tiny-Darknet scripts](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/tinydarknet)

 ### [Evaluation Performance](#contents)

-| Parameters          | Ascend                      |
-| ------------------- | --------------------------- |
-| Model Version       | V1                |
-| Resource            | Ascend 910; OS Euler2.8                   |
-| Uploaded Date       | 2020/12/22 |
-| MindSpore Version   | 1.1.0                       |
-| Dataset             | 200k images                |
-| batch_size          | 128                         |
-| Outputs             | probability                 |
-| Accuracy            | 8 pc Top-1: 58.7%; Top-5: 81.7%                 |
-| Model for inference             | 11.6M (.ckpt file)                 |
+| Parameters                 | Ascend                            | GPU                               |
+| ------------------- | ----------------------------------| ----------------------------------|
+| Model Version             | V1                                | V1                                |
+| Resource                |  Ascend 910；Euler2.8        | PCIE V100-32G                  |
+| Uploaded Date            | 2020/12/22                        | 2021/7/15                         |
+| MindSpore Version       | 1.1.0                             | 1.3.0                             |
+| Dataset              | 200k images                        | 200k images                        |
+| batch_size          | 128                               | 128                               |
+| Outputs                | probability                          | probability                          |
+| Accuracy              | 8pcs Top-1: 58.7%; Top-5: 81.7%    | 8pcs Top-1: 58.9%; Top-5: 81.7%    |
+| Model for inference            | 11.6M (.ckpt file)                 | 10.06M (.ckpt file)                |

 ### [Inference Performance](#contents)

--- a/model_zoo/official/cv/tinydarknet/README_CN.md
+++ b/model_zoo/official/cv/tinydarknet/README_CN.md
@ -68,8 +68,8 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的

 # [环境要求](#目录)

- 硬件（Ascend/CPU）
-    - 请准备具有Ascend/CPU处理器的硬件环境.
+- 硬件（Ascend/CPU/GPU）
+    - 请准备具有Ascend/CPU处理器/GPU的硬件环境.
 - 框架
    - [MindSpore](https://www.mindspore.cn/install)
 - 更多的信息请访问以下链接：
@ -101,6 +101,35 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的

  <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools.>

+- running on GPU with gpu default parameters
+
+  ```python
+  # GPU单卡训练示例
+  python train.py  \
+  --config_path=./config/imagenet_config_gpu.yaml \
+  --dataset_name=imagenet --train_data_dir=../dataset/imagenet_original/train --device_target=GPU
+  OR
+  cd scripts
+  bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10 | imagenet]
+
+  # GPU多卡训练示例
+  export RANK_SIZE=8
+  mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout  \
+  python train.py  \
+  --config_path=./config/imagenet_config_gpu.yaml \
+  --dataset_name=imagenet \
+  --train_data_dir=../dataset/imagenet_original/train \
+  --device_target=GPU
+  OR
+  bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10 | imagenet]
+
+  # GPU评估示例
+  python eval.py -device_target=GPU --val_data_dir=../dataset/imagenet_original/val --dataset_name=imagenet --config_path=./config/imagenet_config_gpu.yaml \
+  --checkpoint_path=$PATH2
+  OR
+  bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]
+  ```
+
 - 在ModelArts上运行
      如果你想在modelarts上运行，可以参考以下文档 [modelarts](https://support.huaweicloud.com/modelarts/)

@ -162,12 +191,20 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的
 ├── README.md                           // Tiny-Darknet英文说明
    ├── README_CN.md                    // Tiny-Darknet中文说明
    ├── ascend310_infer                 // 用于310推理
+    ├── config
+        ├── imagenet_config.yaml        // imagenet参数配置
+        ├── imagenet_config_gpu.yaml    // imagenet参数配置
+        ├── cifar10_config.yaml         // cifar10参数配置
+        ├── cifar10_config_gpu.yaml     // cifar10参数配置
    ├── scripts
        ├── run_standalone_train.sh     // Ascend单卡训练shell脚本
+        ├── run_standalone_train_gpu.sh // GPU单卡训练shell脚本  
        ├── run_distribute_train.sh     // Ascend分布式训练shell脚本
+        ├── run_distribute_train_gpu.sh // GPU分布式训练shell脚本
        ├── run_train_cpu.sh            // CPU训练shell脚本
        ├── run_eval.sh                 // Ascend评估shell脚本
        ├── run_eval_cpu.sh             // CPU评估shell脚本
+        ├── run_eval_gpu.sh             // GPU评估shell脚本
        └── run_infer_310.sh            // Ascend310推理shell脚本
    ├── src
        ├── lr_scheduler                // 学习率策略
@ -186,8 +223,6 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的
    ├── train.py                        // 训练脚本
    ├── eval.py                         // 评估脚本
    ├── export.py                       // 导出checkpoint文件
-    ├── imagenet_config.yaml            // imagenet参数配置
-    ├── cifar10_config.yaml             // cifar10参数配置
    ├── mindspore_hub_conf.py           // hub配置文件
    └── postprocess.py                  // 310推理后处理脚本

@ -259,6 +294,29 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的
  模型checkpoint文件将会保存在当前文件夹下.
  <!-- The model checkpoint will be saved in the current directory.  -->

+- 在GPU资源上运行：
+
+  ```python
+  cd scripts
+  bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]
+  ```
+
+  上述的命令将运行在后台中，可以通过 `train_single_gpu/train.log` 文件查看运行结果.
+
+  训练完成后,默认情况下,可在script文件夹下得到一些checkpoint文件. 训练的损失值将以如下的形式展示:
+  <!-- After training, you'll get some checkpoint files under the script folder by default. The loss value will be achieved as follows: -->
+
+  ```python
+  # grep "loss is " train.log
+  epoch: 498 step: 1251, loss is 2.7798953
+  Epoch time: 130690.544, per step time: 104.469
+  epoch: 499 step: 1251, loss is 2.9261637
+  Epoch time: 130511.081, per step time: 104.325
+  epoch: 500 step: 1251, loss is 2.69412
+  Epoch time: 127067.548, per step time: 101.573
+  ...
+  ```
+
 - 在CPU资源上运行：

  ```python
@ -273,16 +331,35 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的
  bash scripts/run_distribute_train.sh [RANK_TABLE_FILE]
  ```

-  上述的脚本命令将在后台中进行分布式训练，可以通过`train_parallel[X]/log`文件查看运行结果. 训练的损失值将以如下的形式展示:
+  上述的脚本命令将在后台中进行分布式训练，可以通过`distribute_train/nohup.out`文件查看运行结果. 训练的损失值将以如下的形式展示:

  ```python
-  # grep "result: " train_parallel*/log
-  epoch: 498 step: 1251, loss is 2.7798953
-  Epoch time: 130690.544, per step time: 104.469
-  epoch: 499 step: 1251, loss is 2.9261637
-  Epoch time: 130511.081, per step time: 104.325
-  epoch: 500 step: 1251, loss is 2.69412
-  Epoch time: 127067.548, per step time: 101.573
+  # grep "result: " distribute_train/nohup.out
+  epoch: 498 step: 1251, loss is 2.7825122
+  epoch time: 200066.210 ms, per step time: 159.925 ms
+  epoch: 499 step: 1251, loss is 2.799798
+  epoch time: 199098.258 ms, per step time: 159.151 ms
+  epoch: 500 step: 1251, loss is 2.8718748
+  epoch time: 197784.661 ms, per step time: 158.101 ms
+  ...
+  ```
+
+- 在GPU资源上运行：
+
+  ```python
+  bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet]
+  ```
+
+  上述的脚本命令将在后台中进行分布式训练，可以通过`distribute_train_gpu/nohup.out`文件查看运行结果. 训练的损失值将以如下的形式展示:
+
+  ```python
+  # grep "result: " distribute_train_gpu/nohup.out
+  epoch: 498 step: 1251, loss is 2.7825122
+  epoch time: 200066.210 ms, per step time: 159.925 ms
+  epoch: 499 step: 1251, loss is 2.799798
+  epoch time: 199098.258 ms, per step time: 159.151 ms
+  epoch: 500 step: 1251, loss is 2.8718748
+  epoch time: 197784.661 ms, per step time: 158.101 ms
  ...
  ```

@ -314,12 +391,34 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的
  accuracy:  {'top_1_accuracy': 0.5871979166666667, 'top_5_accuracy': 0.8175280448717949}
  ```

+- 在GPU资源上进行评估:
+
+  在运行如下命令前,请确认用于评估的checkpoint文件的路径.checkpoint文件须包含在tinydarknet文件夹内.请将checkpoint路径设置为相对于 eval.py文件 的路径,例如:"./ckpts/train_tinydarknet.ckpt"(ckpts 与 eval.py 同级).
+
+  ```python
+  bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]
+  ```
+
+  上述的python命令将运行在后台中，可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列:
+
+  ```python
+  # grep "accuracy: " eval.log
+  accuracy:  {'top_1_accuracy': 0.5896033653846153, 'top_5_accuracy': 0.8176482371794872}
+  ```
+
+  请注意在并行训练后,测试请将checkpoint_path设置为最后保存的checkpoint文件的路径,准确率将如下面所列:
+
+  ```python
+  # grep "accuracy: " eval.log
+  accuracy:  {'top_1_accuracy': 0.5896033653846153, 'top_5_accuracy': 0.8176482371794872}
+  ```
+
 - 在CPU资源上进行评估

  在运行如下命令前,请确认用于评估的checkpoint文件的路径.checkpoint文件须包含在tinydarknet文件夹内.请将checkpoint路径设置为相对于 eval.py文件 的路径,例如:"./ckpts/train_tinydarknet.ckpt"(ckpts 与 eval.py 同级).

  ```python
-  bash scripts/run_eval.sh [VAL_DATA_DIR] [imagenet|cifar10] [CHECKPOINT_PATH]
+  bash scripts/run_eval_cpu.sh [VAL_DATA_DIR] [imagenet|cifar10] [CHECKPOINT_PATH]
  ```

  可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列:
@ -395,34 +494,36 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [LABEL_PATH] [DVPP] [DEVICE_ID]

 ### [训练性能](#目录)

-| 参数                 | Ascend                                                      |
-| -------------------------- | ----------------------------------------------------------- |
-| 模型版本              | V1                                                |
-| 资源                   |  Ascend 910；CPU 2.60GHz，56cores；内存 314G；系统 Euler2.8               |
-| 上传日期              | 2020/12/22                                 |
-| MindSpore版本          | 1.1.0                                                       |
-| 数据集                    | 1200k张图片                                                |
-| 训练参数        | epoch=500, steps=1251, batch_size=128, lr=0.1               |
-| 优化器                  | Momentum                                                    |
-| 损失函数              | Softmax Cross Entropy                                       |
-| 速度                      | 8卡: 104 ms/step                        |
-| 总时间                 | 8卡: 17.8小时                                             |
-| 参数(M)             | 4.0                                                        |
-| 脚本                    | [Tiny-Darknet脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/tinydarknet) |
+#### Tinydarknet on ImageNet 2012
+
+| 参数                       | Ascend                                                      | GPU                                                 |
+| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------|
+| 模型版本                   | V1                                                          | V1                                                  |
+| 资源                       | Ascend 910；CPU 2.60GHz，56cores；内存 314G；系统 Euler2.8  | PCIE V100-32G                                       |
+| 上传日期                   | 2020/12/22                                                  | 2021/07/15                                          |
+| MindSpore版本              | 1.1.0                                                       | 1.3.0                                               |
+| 数据集                     | 1200k张图片                                                 | 1200k张图片                                         |
+| 训练参数                   | epoch=500, steps=1251, batch_size=128, lr=0.1               | epoch=500, steps=1251, batch_size = 128, lr=0.005   |
+| 优化器                     | Momentum                                                    | Momentum                                            |
+| 损失函数                   | Softmax Cross Entropy                                       | Softmax Cross Entropy                               |
+| 速度                       | 8卡: 104 ms/step                                            | 8卡: 255 ms/step                                    |
+| 总时间                     | 8卡: 17.8小时                                               | 8卡: 46.9小时                                       |
+| 参数(M)                    | 4.0;                                                        | 4.0;                                              |
+| 脚本                       | [Tiny-Darknet脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/tinydarknet)

 ### [评估性能](#目录)

-| 参数          | Ascend                      |
-| ------------------- | --------------------------- |
-| 模型版本       | V1                |
-| 资源            |  Ascend 910；系统 Euler2.8                 |
-| 上传日期       | 2020/12/22 |
-| MindSpore版本   | 1.1.0                       |
-| 数据集             | 200k张图片                |
-| batch_size          | 128                         |
-| 输出             | 分类概率                 |
-| 准确率            | 8卡 Top-1: 58.7%; Top-5: 81.7%                 |
-| 推理模型             | 11.6M (.ckpt文件)                 |
+| 参数                | Ascend                            | GPU                               |
+| ------------------- | ----------------------------------| ----------------------------------|
+| 模型版本            | V1                                | V1                                |
+| 资源                |  Ascend 910；系统 Euler2.8        | NV SMX2 V100-32G                  |
+| 上传日期            | 2020/12/22                        | 2021/7/15                         |
+| MindSpore版本       | 1.1.0                             | 1.3.0                             |
+| 数据集              | 200k张图片                        | 200k张图片                        |
+| batch_size          | 128                               | 128                               |
+| 输出                | 分类概率                          | 分类概率                          |
+| 准确率              | 8卡 Top-1: 58.7%; Top-5: 81.7%    | 8卡 Top-1: 58.9%; Top-5: 81.7%    |
+| 推理模型            | 11.6M (.ckpt文件)                 | 10.06M (.ckpt文件)                |

 ### [推理性能](#目录)

--- a/model_zoo/official/cv/tinydarknet/config/cifar10_config.yaml
+++ b/model_zoo/official/cv/tinydarknet/config/cifar10_config.yaml
--- a/model_zoo/official/cv/tinydarknet/config/cifar10_config_gpu.yaml
+++ b/model_zoo/official/cv/tinydarknet/config/cifar10_config_gpu.yaml
@ -0,0 +1,57 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path"
+device_target: "GPU"
+enable_profiling: False
+
+modelarts_dataset_unzip_name: ''
+# ==============================================================================
+#train-eval-export related
+dataset_name : cifar10
+ckpt_save_dir: checkpoints
+pre_trained: False
+device_id: 0
+num_classes: 10
+lr_init: 0.1
+batch_size: 32
+epoch_size: 120
+momentum: 0.9
+weight_decay: 0.0001
+image_height: 227
+image_width: 227
+train_data_dir: './data/cifar10_train/'
+val_data_dir: './data/cifar10_val/'
+keep_checkpoint_max: 1
+checkpoint_path: './scripts/train_parallel4/ckpt_4/train_tinydarknet_imagenet-300_1251.ckpt'
+onnx_filename: 'tinydarknet.onnx'
+air_filename: 'tinydarknet.air'
+# optimizer and lr related
+lr_scheduler: 'exponential'
+lr_epochs: [70, 140, 210, 280]
+lr_gamma: 0.1
+eta_min: 0.0
+T_max: 150
+warmup_epochs: 0
+# loss related
+is_dynamic_loss_scale: False
+loss_scale: 1024
+label_smooth_factor: 0.1
+use_label_smooth: True
+
+---
+
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Url for modelarts"
+train_url: "Url for modelarts"
+data_path: "The location of the input data."
+output_path: "The location of the output file."
+device_target: "Running platform, choose from Ascend, GPU or CPU, and default is Ascend."
+enable_profiling: 'Whether enable profiling while training, default: False'
--- a/model_zoo/official/cv/tinydarknet/config/imagenet_config.yaml
+++ b/model_zoo/official/cv/tinydarknet/config/imagenet_config.yaml
--- a/model_zoo/official/cv/tinydarknet/config/imagenet_config_gpu.yaml
+++ b/model_zoo/official/cv/tinydarknet/config/imagenet_config_gpu.yaml
@ -0,0 +1,61 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path"
+device_target: "GPU"
+enable_profiling: False
+
+modelarts_dataset_unzip_name: ''
+# ==============================================================================
+#train-eval-export related
+dataset_name: imagenet
+ckpt_save_dir: checkpoints
+pre_trained: False
+device_id: 0
+num_classes: 1000
+lr_init: 0.1
+batch_size: 128
+epoch_size: 500
+momentum: 0.9
+weight_decay: 0.0001
+image_height: 224
+image_width: 224
+train_data_dir: './dataset/imagenet_original/train/'
+val_data_dir: './dataset/imagenet_original/val/'
+keep_checkpoint_max: 1
+checkpoint_path: './scripts/train_parallel4/ckpt_4/train_tinydarknet_imagenet-300_1251.ckpt'
+file_name: 'tinydarknet'
+file_format: 'MINDIR'
+# optimizer and lr related
+lr_scheduler: 'exponential'
+lr_epochs: [70, 140, 210, 280]
+lr_gamma: 0.3
+eta_min: 0.0
+T_max: 150
+warmup_epochs: 0
+# loss related
+is_dynamic_loss_scale: False
+loss_scale: 1024
+label_smooth_factor: 0.1
+use_label_smooth: True
+#310infer postprocess
+result_path: ''
+label_file: ''
+
+---
+
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Url for modelarts"
+train_url: "Url for modelarts"
+data_path: "The location of the input data."
+output_path: "The location of the output file."
+device_target: "Running platform, choose from Ascend, GPU or CPU, and default is Ascend."
+enable_profiling: 'Whether enable profiling while training, default: False'
+file_format: '["MINDIR", "AIR"]'
--- a/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train.sh
@ -57,7 +57,7 @@ do
    mkdir ./train_parallel$i
    cp -r ../src ./train_parallel$i
    cp ../train.py ./train_parallel$i
-    cp ../*.yaml ./train_parallel$i
+    cp -r ../config ./train_parallel$i
    echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type"
    cd ./train_parallel$i || exit
    env > env.log
--- a/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh
@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 3 ]; then
+  echo "Usage: sh run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet]"
+  exit 1
+fi
+
+get_real_path() {
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+dataset_type='imagenet'
+if [ $# == 3 ]
+then
+    if [ $3 != "cifar10" ] && [ $3 != "imagenet" ]
+    then
+        echo "error: the selected dataset is neither cifar10 nor imagenet"
+    exit 1
+    fi
+    dataset_type=$3
+fi
+
+export RANK_SIZE=$1
+PROJECT_DIR=$(cd ./"`dirname $0`" || exit; pwd)
+TRAIN_DATA_DIR=$(get_real_path $2)
+
+if [ ! -d $TRAIN_DATA_DIR ]; then
+  echo "error: TRAIN_DATA_DIR=$TRAIN_DATA_DIR is not a directory"
+  exit 1
+fi
+
+if [ -d "distribute_train_gpu" ]; then
+  rm -rf ./distribute_train_gpu
+fi
+
+mkdir ./distribute_train_gpu
+cp ./*.py ./distribute_train_gpu
+cp -r ./config ./distribute_train_gpu
+cp -r ./src ./distribute_train_gpu
+cd ./distribute_train_gpu || exit
+
+if [ $3 == 'imagenet' ]; then
+  CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config_gpu.yaml"
+elif [ $3 == 'cifar10' ]; then
+  CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config_gpu.yaml"
+else
+  echo "error: the selected dataset is neither cifar10 nor imagenet"
+exit 1
+fi
+
+mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
+nohup python train.py  \
+  --config_path=$CONFIG_FILE \
+  --dataset_name=$dataset_type \
+  --train_data_dir=$TRAIN_DATA_DIR \
+  --device_target=GPU > log.txt 2>&1 &
+cd ..
--- a/model_zoo/official/cv/tinydarknet/scripts/run_eval.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_eval.sh
@ -22,7 +22,7 @@ rm -rf ./eval
 mkdir ./eval
 cp -r ../src ./eval
 cp ../eval.py ./eval
-cp ../*.yaml ./eval
+cp -r ../config ./eval
 cd ./eval || exit
 env >env.log
 python ./eval.py > ./eval.log 2>&1 &
--- a/model_zoo/official/cv/tinydarknet/scripts/run_eval_cpu.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_eval_cpu.sh
@ -43,9 +43,9 @@ fi

 BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
 if [ $2 == 'imagenet' ]; then
-  CONFIG_FILE="${BASE_PATH}/imagenet_config.yaml"
+  CONFIG_FILE="${BASE_PATH}/config/imagenet_config.yaml"
 elif [ $2 == 'cifar10' ]; then
-  CONFIG_FILE="${BASE_PATH}/cifar10_config.yaml"
+  CONFIG_FILE="${BASE_PATH}/config/cifar10_config.yaml"
 else
  echo "error: the selected dataset is neither cifar10 nor imagenet"
 exit 1
@ -55,7 +55,7 @@ rm -rf ./eval
 mkdir ./eval
 cp -r ./src ./eval
 cp ./eval.py ./eval
-cp ./*.yaml ./eval
+cp -r ./config ./eval
 env >env.log
 echo "start evaluation for device CPU"
 cd ./eval || exit
--- a/model_zoo/official/cv/tinydarknet/scripts/run_eval_gpu.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_eval_gpu.sh
@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ $# != 1 ] && [ $# != 2 ]  && [ $# != 3 ]
+then
+  echo "Usage bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+if [ ! -d $PATH1 ]
+then
+  echo "error: VAL_DATA_DIR=$PATH1 is not a directory"
+exit 1
+fi
+
+PATH2=$(get_real_path $3)
+if [ ! -f $PATH2 ]
+then
+    echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
+if [ $2 == 'imagenet' ]; then
+  CONFIG_FILE="${BASE_PATH}/config/imagenet_config_gpu.yaml"
+elif [ $2 == 'cifar10' ]; then
+  CONFIG_FILE="${BASE_PATH}/config/cifar10_config_gpu.yaml"
+else
+  echo "error: the selected dataset is neither cifar10 nor imagenet"
+exit 1
+fi
+
+rm -rf ./eval
+mkdir ./eval
+cp -r ./src ./eval
+cp ./eval.py ./eval
+cp -r ./config ./eval
+env >env.log
+echo "start evaluation for device GPU"
+cd ./eval || exit
+python ./eval.py --device_target=GPU --val_data_dir=$PATH1 --dataset_name=$2 --config_path=$CONFIG_FILE \
+--checkpoint_path=$PATH2 > ./eval.log 2>&1 &
+cd ..
--- a/model_zoo/official/cv/tinydarknet/scripts/run_standalone_train.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_standalone_train.sh
@ -55,7 +55,7 @@ rm -rf ./train_single
 mkdir ./train_single
 cp -r ../src ./train_single
 cp ../train.py ./train_single
-cp ../*.yaml ./train_single
+cp -r ../config ./train_single
 echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type"
 cd ./train_single || exit
 python ./train.py --dataset_name=$dataset_type --train_data_dir=$train_data_dir> ./train.log 2>&1 &
--- a/model_zoo/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh
@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "$1 $2 $3"
+
+if [ $# != 2 ] && [ $# != 3 ]
+then
+    echo "Usage: bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]"
+exit 1
+fi
+
+expr $1 + 6 &>/dev/null
+if [ $? != 0 ]
+then
+    echo "error:DEVICE_ID=$1 is not a integer"
+exit 1
+fi
+
+if [ ! -d $2 ]
+then
+    echo "error:TRAIN_DATA_DIR=$2 is not a folder"
+exit 1
+fi
+train_data_dir=$2
+PROJECT_DIR=$(cd ./"`dirname $0`" || exit; pwd)
+CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config_gpu.yaml"
+dataset_type='imagenet'
+if [ $# == 3 ]
+then
+    if [ $3 != "cifar10" ] && [ $3 != "imagenet" ]
+    then
+        echo "error: the selected dataset is neither cifar10 nor imagenet"
+    exit 1
+    fi
+    dataset_type=$3
+fi
+
+if [ $3 == 'imagenet' ]; then
+  CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config_gpu.yaml"
+elif [ $3 == 'cifar10' ]; then
+  CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config_gpu.yaml"
+else
+  echo "error: the selected dataset is neither cifar10 nor imagenet"
+exit 1
+fi
+
+export DEVICE_ID=$1
+export RANK_ID=0
+export DEVICE_NUM=1
+export RANK_SIZE=1
+rm -rf ./train_single_gpu
+mkdir ./train_single_gpu
+cp -r ../src ./train_single_gpu
+cp ../train.py ./train_single_gpu
+cp -r ../config ./train_single_gpu
+echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type"
+cd ./train_single_gpu || exit
+python ./train.py --config_path=$CONFIG_FILE \
+--dataset_name=$dataset_type --train_data_dir=$train_data_dir --device_target=GPU> ./train.log 2>&1 &
+
--- a/model_zoo/official/cv/tinydarknet/scripts/run_train_cpu.sh
+++ b/model_zoo/official/cv/tinydarknet/scripts/run_train_cpu.sh
@ -49,7 +49,7 @@ rm -rf ./train_cpu
 mkdir ./train_cpu
 cp ./train.py ./train_cpu
 cp -r ./src ./train_cpu
-cp ./*.yaml ./train_cpu
+cp -r ./config ./train_cpu
 echo "start training for device CPU"
 cd ./train_cpu || exit
 env > env.log
--- a/model_zoo/official/cv/tinydarknet/src/dataset.py
+++ b/model_zoo/official/cv/tinydarknet/src/dataset.py
@ -40,14 +40,10 @@ def create_dataset_cifar(dataset_path,
    Returns:
        dataset
    """
-    if target == "Ascend":
-        device_num, rank_id = _get_rank_info()
-    elif target == "CPU":
+    if target == "CPU":
        device_num = 1
    else:
-        init()
-        rank_id = get_rank()
-        device_num = get_group_size()
+        device_num, rank_id = _get_rank_info()

    if device_num == 1:
        data_set = ds.Cifar10Dataset(dataset_path,
@ -165,7 +161,8 @@ def _get_rank_info():
    rank_size = int(os.environ.get("RANK_SIZE", 1))

    if rank_size > 1:
-        from mindspore.communication.management import get_rank, get_group_size
+        from mindspore.communication.management import init, get_rank, get_group_size
+        init()
        rank_size = get_group_size()
        rank_id = get_rank()
    else:
--- a/model_zoo/official/cv/tinydarknet/src/model_utils/config.py
+++ b/model_zoo/official/cv/tinydarknet/src/model_utils/config.py
@ -117,7 +117,7 @@ def get_config():
    """
    parser = argparse.ArgumentParser(description="default name", add_help=False)
    current_dir = os.path.dirname(os.path.abspath(__file__))
-    parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../{}".format(_config)),
+    parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../config/{}".format(_config)),
                        help="Config file path")
    path_args, _ = parser.parse_known_args()
    default, helper, choices = parse_yaml(path_args.config_path)
--- a/model_zoo/official/cv/tinydarknet/train.py
+++ b/model_zoo/official/cv/tinydarknet/train.py
@ -21,7 +21,7 @@ import time

 from mindspore import Tensor
 from mindspore import context
-from mindspore.communication.management import init
+from mindspore.communication.management import init, get_rank
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager
@ -36,7 +36,7 @@ from src.tinydarknet import TinyDarkNet
 from src.CrossEntropySmooth import CrossEntropySmooth
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
-from src.model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
+from src.model_utils.device_adapter import get_device_id, get_device_num

 set_seed(1)

@ -132,11 +132,11 @@ def run_train():
    else:
        context.set_context(device_id=get_device_id())
        if device_num > 1:
+            init()
            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              gradients_mean=True)
-            init()
-            rank = get_rank_id()
+            rank = get_rank()

    if config.dataset_name == "imagenet":
        dataset = create_dataset_imagenet(config.train_data_dir, 1)
@ -204,10 +204,12 @@ def run_train():

    if device_target == "CPU":
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, loss_scale_manager=loss_scale_manager)
-    else:
+    elif device_target == "Ascend":
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
                      amp_level="O3", loss_scale_manager=loss_scale_manager)
-
+    elif device_target == "GPU":
+        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
+                      amp_level="O2", loss_scale_manager=loss_scale_manager)
    config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 50, keep_checkpoint_max=config.keep_checkpoint_max)
    time_cb = TimeMonitor(data_size=batch_num)
    ckpt_save_dir = os.path.join(config.ckpt_save_dir, str(rank))