From 44833d7886222c3e94f8a2a5e550a794d9d605ab Mon Sep 17 00:00:00 2001 From: ZeyangGao Date: Fri, 30 Jul 2021 10:29:09 +0800 Subject: [PATCH] add gpu scripts to resnet18 --- model_zoo/official/cv/resnet/README.md | 86 ++++++++++--------- model_zoo/official/cv/resnet/README_CN.md | 85 +++++++++--------- .../resnet101_imagenet2012_config.yaml | 0 .../{ => config}/resnet18_cifar10_config.yaml | 0 .../config/resnet18_cifar10_config_gpu.yaml | 80 +++++++++++++++++ .../resnet18_imagenet2012_config.yaml | 0 .../resnet18_imagenet2012_config_gpu.yaml | 82 ++++++++++++++++++ .../resnet34_imagenet2012_config.yaml | 0 .../{ => config}/resnet50_cifar10_config.yaml | 0 .../resnet50_imagenet2012_Acc_config.yaml | 0 ...net50_imagenet2012_Ascend_Thor_config.yaml | 0 ...resnet50_imagenet2012_GPU_Thor_config.yaml | 0 .../resnet50_imagenet2012_config.yaml | 0 .../{ => config}/resnet_benchmark_GPU.yaml | 0 .../se-resnet50_imagenet2012_config.yaml | 0 .../cv/resnet/scripts/run_distribute_train.sh | 4 +- .../scripts/run_distribute_train_gpu.sh | 4 +- .../official/cv/resnet/scripts/run_eval.sh | 4 +- .../cv/resnet/scripts/run_eval_gpu.sh | 4 +- .../official/cv/resnet/scripts/run_infer.sh | 4 +- .../cv/resnet/scripts/run_infer_310.sh | 2 +- .../scripts/run_parameter_server_train.sh | 8 +- .../scripts/run_parameter_server_train_gpu.sh | 8 +- .../cv/resnet/scripts/run_standalone_train.sh | 4 +- .../scripts/run_standalone_train_gpu.sh | 4 +- .../cv/resnet/src/model_utils/config.py | 4 +- .../resnet50/test_resnet50_cifar10.py | 4 +- 27 files changed, 276 insertions(+), 111 deletions(-) rename model_zoo/official/cv/resnet/{ => config}/resnet101_imagenet2012_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet18_cifar10_config.yaml (100%) create mode 100644 model_zoo/official/cv/resnet/config/resnet18_cifar10_config_gpu.yaml rename model_zoo/official/cv/resnet/{ => config}/resnet18_imagenet2012_config.yaml (100%) create mode 100644 model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config_gpu.yaml rename model_zoo/official/cv/resnet/{ => config}/resnet34_imagenet2012_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet50_cifar10_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet50_imagenet2012_Acc_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet50_imagenet2012_Ascend_Thor_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet50_imagenet2012_GPU_Thor_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet50_imagenet2012_config.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/resnet_benchmark_GPU.yaml (100%) rename model_zoo/official/cv/resnet/{ => config}/se-resnet50_imagenet2012_config.yaml (100%) diff --git a/model_zoo/official/cv/resnet/README.md b/model_zoo/official/cv/resnet/README.md index 2a2271a6bfb..c5c17bdb8a1 100644 --- a/model_zoo/official/cv/resnet/README.md +++ b/model_zoo/official/cv/resnet/README.md @@ -202,6 +202,18 @@ If you want to run in modelarts, please check the official documentation of [mod . └──resnet ├── README.md + ├── config + ├── resnet18_cifar10_config.yaml # parameter configuration + ├── resnet18_cifar10_config_gpu.yaml # parameter configuration + ├── resnet18_imagenet2012_config.yaml # parameter configuration + ├── resnet18_imagenet2012_config_gpu.yaml # parameter configuration + ├── resnet34_imagenet2012_config.yaml # parameter configuration + ├── resnet50_cifar10_config.yaml # parameter configuration + ├── resnet50_imagenet2012_Ascend_config.yaml # parameter configuration + ├── resnet50_imagenet2012_config.yaml # parameter configuration + ├── resnet50_imagenet2012_GPU_config.yaml # parameter configuration + ├── resnet101_imagenet2012_config.yaml # parameter configuration + └── se-resnet50_imagenet2012_config.yaml # parameter configuration ├── scripts ├── run_distribute_train.sh # launch ascend distributed training(8 pcs) ├── run_parameter_server_train.sh # launch ascend parameter server training(8 pcs) @@ -226,16 +238,6 @@ If you want to run in modelarts, please check the official documentation of [mod ├──device_adapter.py # device adapter ├──local_adapter.py # local adapter ├──moxing_adapter.py # moxing adapter - ├── resnet18_cifar10_config.yaml # parameter configuration - ├── resnet18_imagenet2012_config.yaml # parameter configuration - ├── resnet34_imagenet2012_config.yaml # parameter configuration - ├── resnet50_cifar10_config.yaml # parameter configuration - ├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration - ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration - ├── resnet50_imagenet2012_config.yaml # parameter configuration - ├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration - ├── resnet101_imagenet2012_config.yaml # parameter configuration - ├── se-resnet50_imagenet2012_config.yaml # parameter configuration ├── export.py # export model for inference ├── mindspore_hub_conf.py # mindspore hub interface ├── eval.py # eval net @@ -713,42 +715,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522. #### ResNet18 on CIFAR-10 -| Parameters | Ascend 910 | -| -------------------------- | -------------------------------------- | -| Model Version | ResNet18 | -| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | -| uploaded Date | 02/25/2021 (month/day/year) | -| MindSpore Version | 1.1.1 | -| Dataset | CIFAR-10 | -| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | -| Optimizer | Momentum | -| Loss Function | Softmax Cross Entropy | -| outputs | probability | -| Loss | 0.0002519517 | -| Speed | 13 ms/step(8pcs) | -| Total time | 4 mins | -| Parameters (M) | 11.2 | -| Checkpoint for Fine tuning | 86M (.ckpt file) | +| Parameters | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| Model Version | ResNet18 | ResNet18 | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G | +| uploaded Date | 02/25/2021 (month/day/year) | 07/23/2021 (month/day/year) | +| MindSpore Version | 1.1.1 | 1.3.0 | +| Dataset | CIFAR-10 | CIFAR-10 | +| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 | +| Optimizer | Momentum | Momentum | +| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | +| outputs | probability | probability | +| Loss | 0.0002519517 | 0.0015517382 | +| Speed | 13 ms/step(8pcs) | 29 ms/step(8pcs) | +| Total time | 4 mins | 11 minds | +| Parameters (M) | 11.2 | 11.2 | +| Checkpoint for Fine tuning | 86M (.ckpt file) | 85.4 (.ckpt file) | | Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ResNet18 on ImageNet2012 -| Parameters | Ascend 910 | -| -------------------------- | -------------------------------------- | -| Model Version | ResNet18 | -| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | -| uploaded Date | 02/25/2021 (month/day/year) ; | -| MindSpore Version | 1.1.1 | -| Dataset | ImageNet2012 | -| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | -| Optimizer | Momentum | -| Loss Function | Softmax Cross Entropy | -| outputs | probability | -| Loss | 2.15702 | -| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) | -| Total time | 110 mins | -| Parameters (M) | 11.7 | -| Checkpoint for Fine tuning | 90M (.ckpt file) | +| Parameters | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| Model Version | ResNet18 | ResNet18 | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G | +| uploaded Date | 02/25/2021 (month/day/year) ; | 07/23/2021 (month/day/year) | +| MindSpore Version | 1.1.1 | 1.3.0 | +| Dataset | ImageNet2012 | ImageNet2012 | +| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 | +| Optimizer | Momentum | Momentum | +| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | +| outputs | probability | probability | +| Loss | 2.15702 | 2.168664 | +| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) | 107 ms/step(8pcs) | +| Total time | 110 mins | 130 mins | +| Parameters (M) | 11.7 | 11.7 | +| Checkpoint for Fine tuning | 90M (.ckpt file) | 90M (.ckpt file) | | Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ResNet50 on CIFAR-10 diff --git a/model_zoo/official/cv/resnet/README_CN.md b/model_zoo/official/cv/resnet/README_CN.md index 18c39d777e6..51c9965f8f2 100755 --- a/model_zoo/official/cv/resnet/README_CN.md +++ b/model_zoo/official/cv/resnet/README_CN.md @@ -188,6 +188,18 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] . └──resnet ├── README.md + ├── config + ├── resnet18_cifar10_config.yaml # 参数配置 + ├── resnet18_cifar10_config_gpu.yaml # 参数配置 + ├── resnet18_imagenet2012_config.yaml # 参数配置 + ├── resnet18_imagenet2012_config_gpu.yaml # 参数配置 + ├── resnet34_imagenet2012_config.yaml # 参数配置 + ├── resnet50_cifar10_config.yaml # 参数配置 + ├── resnet50_imagenet2012_Ascend_config.yaml # 参数配置 + ├── resnet50_imagenet2012_config.yaml # 参数配置 + ├── resnet50_imagenet2012_GPU_config.yaml # 参数配置 + ├── resnet101_imagenet2012_config.yaml # 参数配置 + ├── se-resnet50_imagenet2012_config.yaml # 参数配置 ├── scripts ├── run_distribute_train.sh # 启动Ascend分布式训练(8卡) ├── run_parameter_server_train.sh # 启动Ascend参数服务器训练(8卡) @@ -209,17 +221,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] ├── device_adapter.py # 设备配置 ├── local_adapter.py # 本地设备配置 └── moxing_adapter.py # modelarts设备配置 - ├── resnet18_cifar10_config.yaml # 参数配置 - ├── resnet18_imagenet2012_config.yaml # 参数配置 - ├── resnet34_imagenet2012_config.yaml # 参数配置 - ├── resnet50_cifar10_config.yaml # 参数配置 - ├── resnet50_imagenet2012_Acc_config.yaml # 参数配置 - ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # 参数配置 - ├── resnet50_imagenet2012_config.yaml # 参数配置 - ├── resnet50_imagenet2012_GPU_Thor_config.yaml # 参数配置 - ├── resnet101_imagenet2012_config.yaml # 参数配置 - ├── se-resnet50_imagenet2012_config.yaml # 参数配置 - ├── eval.py # 评估网络 ├── eval.py # 评估网络 └── train.py # 训练网络 ``` @@ -674,42 +675,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522. #### CIFAR-10上的ResNet18 -| 参数 | Ascend 910 | -| -------------------------- | -------------------------------------- | -| 模型版本 | ResNet18 | -| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | -| 上传日期 | 2021-02-25 | -| MindSpore版本 | 1.1.1 | -| 数据集 | CIFAR-10 | -| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 | -| 优化器 | Momentum | -| 损失函数 | Softmax交叉熵 | -| 输出 | 概率 | -| 损失 | 0.0002519517 | -| 速度 | 13毫秒/步(8卡) | -| 总时长 | 4分钟 | -| 参数(M) | 11.2 | +| 参数 | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| 模型版本 | ResNet18 | ResNet18 | +| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G | +| 上传日期 | 2021-02-25 | 2021-07-23 | +| MindSpore版本 | 1.1.1 | 1.3.0 | +| 数据集 | CIFAR-10 | CIFAR-10 | +| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 | +| 优化器 | Momentum | Momentum| +| 损失函数 | Softmax交叉熵 | Softmax交叉熵 | +| 输出 | 概率 | 概率 | +| 损失 | 0.0002519517 | 0.0015517382 | +| 速度 | 13毫秒/步(8卡) | 29毫秒/步(8卡) | +| 总时长 | 4分钟 | 11分钟 | +| 参数(M) | 11.2 | 11.2 | | 微调检查点 | 86(.ckpt文件) | | 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ImageNet2012上的ResNet18 -| 参数 | Ascend 910 | -| -------------------------- | -------------------------------------- | -| 模型版本 | ResNet18 | -| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | -| 上传日期 | 2020-04-01 ; | -| MindSpore版本 | 1.1.1 | -| 数据集 | ImageNet2012 | -| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 | -| 优化器 | Momentum | -| 损失函数 | Softmax交叉熵 | -| 输出 | 概率 | -| 损失 | 2.15702 | -| 速度 | 110毫秒/步(8卡) (可能需要在datasetpy中增加set_numa_enbale绑核操作) | -| 总时长 | 110分钟 | -| 参数(M) | 11.7 | -| 微调检查点| 90M(.ckpt文件) | +| 参数 | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| 模型版本 | ResNet18 | RESNET18 | +| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G | +| 上传日期 | 2020-04-01 ; | 2021-07-23 | +| MindSpore版本 | 1.1.1 | 1.3.0 | +| 数据集 | ImageNet2012 | ImageNet2012 | +| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 | +| 优化器 | Momentum | Momentum| +| 损失函数 | Softmax交叉熵 | Softmax交叉熵 | +| 输出 | 概率 | 概率 | +| 损失 | 2.15702 | 2.168664 | +| 速度 | 110毫秒/步(8卡) (可能需要在datasetpy中增加set_numa_enbale绑核操作) | 107毫秒/步(8卡) | +| 总时长 | 110分钟 | 130分钟 | +| 参数(M) | 11.7 | 11.7 | +| 微调检查点| 90M(.ckpt文件) | 90M(.ckpt文件) | | 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### CIFAR-10上的ResNet50 diff --git a/model_zoo/official/cv/resnet/resnet101_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/config/resnet101_imagenet2012_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet101_imagenet2012_config.yaml rename to model_zoo/official/cv/resnet/config/resnet101_imagenet2012_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet18_cifar10_config.yaml b/model_zoo/official/cv/resnet/config/resnet18_cifar10_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet18_cifar10_config.yaml rename to model_zoo/official/cv/resnet/config/resnet18_cifar10_config.yaml diff --git a/model_zoo/official/cv/resnet/config/resnet18_cifar10_config_gpu.yaml b/model_zoo/official/cv/resnet/config/resnet18_cifar10_config_gpu.yaml new file mode 100644 index 00000000000..073b380a6f7 --- /dev/null +++ b/model_zoo/official/cv/resnet/config/resnet18_cifar10_config_gpu.yaml @@ -0,0 +1,80 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +# Url for modelarts +data_url: "" +train_url: "" +checkpoint_url: "" +# Path for local +run_distribute: False +enable_profiling: False +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path/" +device_target: "GPU" +checkpoint_path: "./checkpoint/" +checkpoint_file_path: "" + +# ============================================================================== +# Training options +optimizer: "Momentum" +infer_label: "" +class_num: 10 +batch_size: 32 +loss_scale: 1024 +momentum: 0.9 +weight_decay: 0.0001 +epoch_size: 90 +pretrain_epoch_size: 0 +save_checkpoint: True +save_checkpoint_epochs: 5 +keep_checkpoint_max: 10 +warmup_epochs: 5 +lr_decay_mode: "poly" +lr_init: 0.01 +lr_end: 0.00001 +lr_max: 0.1 +lars_epsilon: 0.0 +lars_coefficient: 0.001 + +net_name: "resnet18" +dataset: "cifar10" +device_num: 1 +pre_trained: "" +run_eval: False +eval_dataset_path: "" +parameter_server: False +filter_weight: False +save_best_ckpt: True +eval_start_epoch: 40 +eval_interval: 1 +enable_cache: False +cache_session_id: "" +mode_name: "GRAPH" +acc_mode: "O0" +conv_init: "XavierUniform" +dense_init: "TruncatedNormal" + +# Export options +device_id: 0 +width: 224 +height: 224 +file_name: "resnet18" +file_format: "AIR" +ckpt_file: "" +network_dataset: "resnet18_cifar10" + +--- +# Help description for each configuration +enable_modelarts: "Whether training on modelarts, default: False" +data_url: "Dataset url for obs" +checkpoint_url: "The location of checkpoint for obs" +data_path: "Dataset path for local" +output_path: "Training output path for local" +load_path: "The location of checkpoint for obs" +device_target: "Target device type, available: [Ascend, GPU, CPU]" +enable_profiling: "Whether enable profiling while training, default: False" +num_classes: "Class for dataset" +batch_size: "Batch size for training and evaluation" +epoch_size: "Total training epochs." +checkpoint_path: "The location of the checkpoint file." +checkpoint_file_path: "The location of the checkpoint file." diff --git a/model_zoo/official/cv/resnet/resnet18_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet18_imagenet2012_config.yaml rename to model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config.yaml diff --git a/model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config_gpu.yaml b/model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config_gpu.yaml new file mode 100644 index 00000000000..33232aacbc5 --- /dev/null +++ b/model_zoo/official/cv/resnet/config/resnet18_imagenet2012_config_gpu.yaml @@ -0,0 +1,82 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +# Url for modelarts +data_url: "" +train_url: "" +checkpoint_url: "" +# Path for local +run_distribute: False +enable_profiling: False +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path/" +device_target: "GPU" +checkpoint_path: "./checkpoint/" +checkpoint_file_path: "" + +# ============================================================================== +# Training options +optimizer: "Momentum" +infer_label: "" +class_num: 1001 +batch_size: 256 +loss_scale: 1024 +momentum: 0.9 +weight_decay: 0.0001 +epoch_size: 90 +pretrain_epoch_size: 0 +save_checkpoint: True +save_checkpoint_epochs: 5 +keep_checkpoint_max: 10 +warmup_epochs: 0 +lr_decay_mode: "linear" +use_label_smooth: True +label_smooth_factor: 0.1 +lr_init: 0 +lr_max: 0.8 +lr_end: 0.0 +lars_epsilon: 0.0 +lars_coefficient: 0.001 + +net_name: "resnet18" +dataset: "imagenet2012" +device_num: 1 +pre_trained: "" +run_eval: False +eval_dataset_path: "" +parameter_server: False +filter_weight: False +save_best_ckpt: True +eval_start_epoch: 40 +eval_interval: 1 +enable_cache: False +cache_session_id: "" +mode_name: "GRAPH" +acc_mode: "O0" +conv_init: "XavierUniform" +dense_init: "TruncatedNormal" + +# Export options +device_id: 0 +width: 224 +height: 224 +file_name: "resnet18" +file_format: "AIR" +ckpt_file: "" +network_dataset: "resnet18_imagenet2012" + +--- +# Help description for each configuration +enable_modelarts: "Whether training on modelarts, default: False" +data_url: "Dataset url for obs" +checkpoint_url: "The location of checkpoint for obs" +data_path: "Dataset path for local" +output_path: "Training output path for local" +load_path: "The location of checkpoint for obs" +device_target: "Target device type, available: [Ascend, GPU, CPU]" +enable_profiling: "Whether enable profiling while training, default: False" +num_classes: "Class for dataset" +batch_size: "Batch size for training and evaluation" +epoch_size: "Total training epochs." +checkpoint_path: "The location of the checkpoint file." +checkpoint_file_path: "The location of the checkpoint file." diff --git a/model_zoo/official/cv/resnet/resnet34_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/config/resnet34_imagenet2012_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet34_imagenet2012_config.yaml rename to model_zoo/official/cv/resnet/config/resnet34_imagenet2012_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet50_cifar10_config.yaml b/model_zoo/official/cv/resnet/config/resnet50_cifar10_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet50_cifar10_config.yaml rename to model_zoo/official/cv/resnet/config/resnet50_cifar10_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_Acc_config.yaml b/model_zoo/official/cv/resnet/config/resnet50_imagenet2012_Acc_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet50_imagenet2012_Acc_config.yaml rename to model_zoo/official/cv/resnet/config/resnet50_imagenet2012_Acc_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_Ascend_Thor_config.yaml b/model_zoo/official/cv/resnet/config/resnet50_imagenet2012_Ascend_Thor_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet50_imagenet2012_Ascend_Thor_config.yaml rename to model_zoo/official/cv/resnet/config/resnet50_imagenet2012_Ascend_Thor_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_GPU_Thor_config.yaml b/model_zoo/official/cv/resnet/config/resnet50_imagenet2012_GPU_Thor_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet50_imagenet2012_GPU_Thor_config.yaml rename to model_zoo/official/cv/resnet/config/resnet50_imagenet2012_GPU_Thor_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/config/resnet50_imagenet2012_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet50_imagenet2012_config.yaml rename to model_zoo/official/cv/resnet/config/resnet50_imagenet2012_config.yaml diff --git a/model_zoo/official/cv/resnet/resnet_benchmark_GPU.yaml b/model_zoo/official/cv/resnet/config/resnet_benchmark_GPU.yaml similarity index 100% rename from model_zoo/official/cv/resnet/resnet_benchmark_GPU.yaml rename to model_zoo/official/cv/resnet/config/resnet_benchmark_GPU.yaml diff --git a/model_zoo/official/cv/resnet/se-resnet50_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/config/se-resnet50_imagenet2012_config.yaml similarity index 100% rename from model_zoo/official/cv/resnet/se-resnet50_imagenet2012_config.yaml rename to model_zoo/official/cv/resnet/config/se-resnet50_imagenet2012_config.yaml diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh index 6967dae9a80..c5f3903be96 100755 --- a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh @@ -35,7 +35,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ $# == 4 ] then @@ -101,7 +101,7 @@ do mkdir ./train_parallel$i cp ../*.py ./train_parallel$i cp *.sh ./train_parallel$i - cp -r ../*.yaml ./train_parallel$i + cp -r ../config/*.yaml ./train_parallel$i cp -r ../src ./train_parallel$i cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh index b44116f9923..39dacf98653 100755 --- a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then @@ -80,7 +80,7 @@ rm -rf ./train_parallel mkdir ./train_parallel cp ../*.py ./train_parallel cp *.sh ./train_parallel -cp -r ../*.yaml ./train_parallel +cp -r ../config/*.yaml ./train_parallel cp -r ../src ./train_parallel cd ./train_parallel || exit diff --git a/model_zoo/official/cv/resnet/scripts/run_eval.sh b/model_zoo/official/cv/resnet/scripts/run_eval.sh index 85c75682c3b..97a7ba85c71 100755 --- a/model_zoo/official/cv/resnet/scripts/run_eval.sh +++ b/model_zoo/official/cv/resnet/scripts/run_eval.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -58,7 +58,7 @@ fi mkdir ./eval cp ../*.py ./eval cp *.sh ./eval -cp -r ../*.yaml ./eval +cp -r ../config/*.yaml ./eval cp -r ../src ./eval cd ./eval || exit env > env.log diff --git a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh index ed93cb09c08..97114b7a456 100755 --- a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -58,7 +58,7 @@ fi mkdir ./eval cp ../*.py ./eval cp *.sh ./eval -cp -r ../*.yaml ./eval +cp -r ../config/*.yaml ./eval cp -r ../src ./eval cd ./eval || exit env > env.log diff --git a/model_zoo/official/cv/resnet/scripts/run_infer.sh b/model_zoo/official/cv/resnet/scripts/run_infer.sh index 34ae0fadadc..b73e956c18a 100644 --- a/model_zoo/official/cv/resnet/scripts/run_infer.sh +++ b/model_zoo/official/cv/resnet/scripts/run_infer.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -56,7 +56,7 @@ then rm -rf ./infer fi mkdir ./infer -cp ../*.yaml ./infer +cp ../config/*.yaml ./infer cp ../*.py ./infer cp *.sh ./infer cp -r ../src ./infer diff --git a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh index d49002a575b..79ff34bb8d3 100644 --- a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh +++ b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh @@ -87,7 +87,7 @@ function preprocess_data() fi mkdir preprocess_Result BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")") - CONFIG_FILE="${BASE_PATH}/$1" + CONFIG_FILE="${BASE_PATH}/config/$1" python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log } diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh index e3dd2d6372a..0cd85f336cd 100644 --- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ $# == 4 ] then @@ -71,7 +71,7 @@ export DEVICE_ID=0 export RANK_ID=0 rm -rf ./sched mkdir ./sched -cp ../*.yaml ./sched +cp ../config/*.yaml ./sched cp ../*.py ./sched cp *.sh ./sched cp -r ../src ./sched @@ -97,7 +97,7 @@ do export RANK_ID=$i rm -rf ./server_$i mkdir ./server_$i - cp ../*.yaml ./server_$i + cp ../config/*.yaml ./server_$i cp ../*.py ./server_$i cp *.sh ./server_$i cp -r ../src ./server_$i @@ -125,7 +125,7 @@ do export RANK_ID=$i rm -rf ./worker_$i mkdir ./worker_$i - cp ../*.yaml ./worker_$i + cp ../config/*.yaml ./worker_$i cp ../*.py ./worker_$i cp *.sh ./worker_$i cp -r ../src ./worker_$i diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh index ba83f209644..38eac825e35 100755 --- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh @@ -29,7 +29,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then PATH2=$(get_real_path $3) @@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081 export MS_ROLE=MS_SCHED rm -rf ./sched mkdir ./sched -cp ../*.yaml ./sched +cp ../config/*.yaml ./sched cp ../*.py ./sched cp *.sh ./sched cp -r ../src ./sched @@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++)); do rm -rf ./server_$i mkdir ./server_$i - cp ../*.yaml ./server_$i + cp ../config/*.yaml ./server_$i cp ../*.py ./server_$i cp *.sh ./server_$i cp -r ../src ./server_$i @@ -110,7 +110,7 @@ done export MS_ROLE=MS_WORKER rm -rf ./worker mkdir ./worker -cp ../*.yaml ./worker +cp ../config/*.yaml ./worker cp ../*.py ./worker cp *.sh ./worker cp -r ../src ./worker diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh index 402e01a6869..a0381dbeafe 100755 --- a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then PATH2=$(get_real_path $3) @@ -80,7 +80,7 @@ then rm -rf ./train fi mkdir ./train -cp ../*.yaml ./train +cp ../config/*.yaml ./train cp ../*.py ./train cp *.sh ./train cp -r ../src ./train diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh index edb85580acb..581d5521911 100755 --- a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then @@ -83,7 +83,7 @@ then rm -rf ./train fi mkdir ./train -cp ../*.yaml ./train +cp ../config/*.yaml ./train cp ../*.py ./train cp *.sh ./train cp -r ../src ./train diff --git a/model_zoo/official/cv/resnet/src/model_utils/config.py b/model_zoo/official/cv/resnet/src/model_utils/config.py index d8f6518f1ad..19678722f34 100644 --- a/model_zoo/official/cv/resnet/src/model_utils/config.py +++ b/model_zoo/official/cv/resnet/src/model_utils/config.py @@ -21,7 +21,7 @@ import argparse from pprint import pprint, pformat import yaml -_config_path = "./resnet50_cifar10_config.yaml" +_config_path = "./config/resnet50_cifar10_config.yaml" class Config: """ @@ -118,7 +118,7 @@ def get_config(): parser = argparse.ArgumentParser(description="default name", add_help=False) current_dir = os.path.dirname(os.path.abspath(__file__)) parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \ - "../resnet50_cifar10_config.yaml"), help="Config file path") + "../config/resnet50_cifar10_config.yaml"), help="Config file path") path_args, _ = parser.parse_known_args() default, helper, choices = parse_yaml(path_args.config_path) pprint(default) diff --git a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py index f3527397549..fcc3be0fd08 100644 --- a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py +++ b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py @@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend(): new_list = ["total_epochs=10", "10"] utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py")) dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin") - config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml") + config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml") exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\ .format(utils.rank_table_path, dataset_path, config_path) os.system(exec_network_shell) @@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu(): new_list = ["total_epochs=10", "10"] utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py")) dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin") - config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml") + config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml") os.system("nvidia-smi") exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \ .format(dataset_path, config_path)