forked from mindspore-Ecosystem/mindspore
add gpu scripts to resnet18
This commit is contained in:
parent
564306c232
commit
44833d7886
|
@ -202,6 +202,18 @@ If you want to run in modelarts, please check the official documentation of [mod
|
|||
.
|
||||
└──resnet
|
||||
├── README.md
|
||||
├── config
|
||||
├── resnet18_cifar10_config.yaml # parameter configuration
|
||||
├── resnet18_cifar10_config_gpu.yaml # parameter configuration
|
||||
├── resnet18_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet18_imagenet2012_config_gpu.yaml # parameter configuration
|
||||
├── resnet34_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet50_cifar10_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_Ascend_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_GPU_config.yaml # parameter configuration
|
||||
├── resnet101_imagenet2012_config.yaml # parameter configuration
|
||||
└── se-resnet50_imagenet2012_config.yaml # parameter configuration
|
||||
├── scripts
|
||||
├── run_distribute_train.sh # launch ascend distributed training(8 pcs)
|
||||
├── run_parameter_server_train.sh # launch ascend parameter server training(8 pcs)
|
||||
|
@ -226,16 +238,6 @@ If you want to run in modelarts, please check the official documentation of [mod
|
|||
├──device_adapter.py # device adapter
|
||||
├──local_adapter.py # local adapter
|
||||
├──moxing_adapter.py # moxing adapter
|
||||
├── resnet18_cifar10_config.yaml # parameter configuration
|
||||
├── resnet18_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet34_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet50_cifar10_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_config.yaml # parameter configuration
|
||||
├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration
|
||||
├── resnet101_imagenet2012_config.yaml # parameter configuration
|
||||
├── se-resnet50_imagenet2012_config.yaml # parameter configuration
|
||||
├── export.py # export model for inference
|
||||
├── mindspore_hub_conf.py # mindspore hub interface
|
||||
├── eval.py # eval net
|
||||
|
@ -713,42 +715,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
|
|||
|
||||
#### ResNet18 on CIFAR-10
|
||||
|
||||
| Parameters | Ascend 910 |
|
||||
| -------------------------- | -------------------------------------- |
|
||||
| Model Version | ResNet18 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
|
||||
| uploaded Date | 02/25/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.1.1 |
|
||||
| Dataset | CIFAR-10 |
|
||||
| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 |
|
||||
| Optimizer | Momentum |
|
||||
| Loss Function | Softmax Cross Entropy |
|
||||
| outputs | probability |
|
||||
| Loss | 0.0002519517 |
|
||||
| Speed | 13 ms/step(8pcs) |
|
||||
| Total time | 4 mins |
|
||||
| Parameters (M) | 11.2 |
|
||||
| Checkpoint for Fine tuning | 86M (.ckpt file) |
|
||||
| Parameters | Ascend 910 | GPU |
|
||||
| -------------------------- | -------------------------------------- | -------------------------------------- |
|
||||
| Model Version | ResNet18 | ResNet18 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G |
|
||||
| uploaded Date | 02/25/2021 (month/day/year) | 07/23/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.1.1 | 1.3.0 |
|
||||
| Dataset | CIFAR-10 | CIFAR-10 |
|
||||
| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 |
|
||||
| Optimizer | Momentum | Momentum |
|
||||
| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy |
|
||||
| outputs | probability | probability |
|
||||
| Loss | 0.0002519517 | 0.0015517382 |
|
||||
| Speed | 13 ms/step(8pcs) | 29 ms/step(8pcs) |
|
||||
| Total time | 4 mins | 11 minds |
|
||||
| Parameters (M) | 11.2 | 11.2 |
|
||||
| Checkpoint for Fine tuning | 86M (.ckpt file) | 85.4 (.ckpt file) |
|
||||
| Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
|
||||
|
||||
#### ResNet18 on ImageNet2012
|
||||
|
||||
| Parameters | Ascend 910 |
|
||||
| -------------------------- | -------------------------------------- |
|
||||
| Model Version | ResNet18 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
|
||||
| uploaded Date | 02/25/2021 (month/day/year) ; |
|
||||
| MindSpore Version | 1.1.1 |
|
||||
| Dataset | ImageNet2012 |
|
||||
| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 |
|
||||
| Optimizer | Momentum |
|
||||
| Loss Function | Softmax Cross Entropy |
|
||||
| outputs | probability |
|
||||
| Loss | 2.15702 |
|
||||
| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) |
|
||||
| Total time | 110 mins |
|
||||
| Parameters (M) | 11.7 |
|
||||
| Checkpoint for Fine tuning | 90M (.ckpt file) |
|
||||
| Parameters | Ascend 910 | GPU |
|
||||
| -------------------------- | -------------------------------------- | -------------------------------------- |
|
||||
| Model Version | ResNet18 | ResNet18 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G |
|
||||
| uploaded Date | 02/25/2021 (month/day/year) ; | 07/23/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.1.1 | 1.3.0 |
|
||||
| Dataset | ImageNet2012 | ImageNet2012 |
|
||||
| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 |
|
||||
| Optimizer | Momentum | Momentum |
|
||||
| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy |
|
||||
| outputs | probability | probability |
|
||||
| Loss | 2.15702 | 2.168664 |
|
||||
| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) | 107 ms/step(8pcs) |
|
||||
| Total time | 110 mins | 130 mins |
|
||||
| Parameters (M) | 11.7 | 11.7 |
|
||||
| Checkpoint for Fine tuning | 90M (.ckpt file) | 90M (.ckpt file) |
|
||||
| Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
|
||||
|
||||
#### ResNet50 on CIFAR-10
|
||||
|
|
|
@ -188,6 +188,18 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
|
|||
.
|
||||
└──resnet
|
||||
├── README.md
|
||||
├── config
|
||||
├── resnet18_cifar10_config.yaml # 参数配置
|
||||
├── resnet18_cifar10_config_gpu.yaml # 参数配置
|
||||
├── resnet18_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet18_imagenet2012_config_gpu.yaml # 参数配置
|
||||
├── resnet34_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet50_cifar10_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_Ascend_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_GPU_config.yaml # 参数配置
|
||||
├── resnet101_imagenet2012_config.yaml # 参数配置
|
||||
├── se-resnet50_imagenet2012_config.yaml # 参数配置
|
||||
├── scripts
|
||||
├── run_distribute_train.sh # 启动Ascend分布式训练(8卡)
|
||||
├── run_parameter_server_train.sh # 启动Ascend参数服务器训练(8卡)
|
||||
|
@ -209,17 +221,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
|
|||
├── device_adapter.py # 设备配置
|
||||
├── local_adapter.py # 本地设备配置
|
||||
└── moxing_adapter.py # modelarts设备配置
|
||||
├── resnet18_cifar10_config.yaml # 参数配置
|
||||
├── resnet18_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet34_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet50_cifar10_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_Acc_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_Ascend_Thor_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_config.yaml # 参数配置
|
||||
├── resnet50_imagenet2012_GPU_Thor_config.yaml # 参数配置
|
||||
├── resnet101_imagenet2012_config.yaml # 参数配置
|
||||
├── se-resnet50_imagenet2012_config.yaml # 参数配置
|
||||
├── eval.py # 评估网络
|
||||
├── eval.py # 评估网络
|
||||
└── train.py # 训练网络
|
||||
```
|
||||
|
@ -674,42 +675,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
|
|||
|
||||
#### CIFAR-10上的ResNet18
|
||||
|
||||
| 参数 | Ascend 910 |
|
||||
| -------------------------- | -------------------------------------- |
|
||||
| 模型版本 | ResNet18 |
|
||||
| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 |
|
||||
| 上传日期 | 2021-02-25 |
|
||||
| MindSpore版本 | 1.1.1 |
|
||||
| 数据集 | CIFAR-10 |
|
||||
| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 |
|
||||
| 优化器 | Momentum |
|
||||
| 损失函数 | Softmax交叉熵 |
|
||||
| 输出 | 概率 |
|
||||
| 损失 | 0.0002519517 |
|
||||
| 速度 | 13毫秒/步(8卡) |
|
||||
| 总时长 | 4分钟 |
|
||||
| 参数(M) | 11.2 |
|
||||
| 参数 | Ascend 910 | GPU |
|
||||
| -------------------------- | -------------------------------------- | -------------------------------------- |
|
||||
| 模型版本 | ResNet18 | ResNet18 |
|
||||
| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G |
|
||||
| 上传日期 | 2021-02-25 | 2021-07-23 |
|
||||
| MindSpore版本 | 1.1.1 | 1.3.0 |
|
||||
| 数据集 | CIFAR-10 | CIFAR-10 |
|
||||
| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 |
|
||||
| 优化器 | Momentum | Momentum|
|
||||
| 损失函数 | Softmax交叉熵 | Softmax交叉熵 |
|
||||
| 输出 | 概率 | 概率 |
|
||||
| 损失 | 0.0002519517 | 0.0015517382 |
|
||||
| 速度 | 13毫秒/步(8卡) | 29毫秒/步(8卡) |
|
||||
| 总时长 | 4分钟 | 11分钟 |
|
||||
| 参数(M) | 11.2 | 11.2 |
|
||||
| 微调检查点 | 86(.ckpt文件) |
|
||||
| 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
|
||||
|
||||
#### ImageNet2012上的ResNet18
|
||||
|
||||
| 参数 | Ascend 910 |
|
||||
| -------------------------- | -------------------------------------- |
|
||||
| 模型版本 | ResNet18 |
|
||||
| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 |
|
||||
| 上传日期 | 2020-04-01 ; |
|
||||
| MindSpore版本 | 1.1.1 |
|
||||
| 数据集 | ImageNet2012 |
|
||||
| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 |
|
||||
| 优化器 | Momentum |
|
||||
| 损失函数 | Softmax交叉熵 |
|
||||
| 输出 | 概率 |
|
||||
| 损失 | 2.15702 |
|
||||
| 速度 | 110毫秒/步(8卡) (可能需要在datasetpy中增加set_numa_enbale绑核操作) |
|
||||
| 总时长 | 110分钟 |
|
||||
| 参数(M) | 11.7 |
|
||||
| 微调检查点| 90M(.ckpt文件) |
|
||||
| 参数 | Ascend 910 | GPU |
|
||||
| -------------------------- | -------------------------------------- | -------------------------------------- |
|
||||
| 模型版本 | ResNet18 | RESNET18 |
|
||||
| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G |
|
||||
| 上传日期 | 2020-04-01 ; | 2021-07-23 |
|
||||
| MindSpore版本 | 1.1.1 | 1.3.0 |
|
||||
| 数据集 | ImageNet2012 | ImageNet2012 |
|
||||
| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 |
|
||||
| 优化器 | Momentum | Momentum|
|
||||
| 损失函数 | Softmax交叉熵 | Softmax交叉熵 |
|
||||
| 输出 | 概率 | 概率 |
|
||||
| 损失 | 2.15702 | 2.168664 |
|
||||
| 速度 | 110毫秒/步(8卡) (可能需要在datasetpy中增加set_numa_enbale绑核操作) | 107毫秒/步(8卡) |
|
||||
| 总时长 | 110分钟 | 130分钟 |
|
||||
| 参数(M) | 11.7 | 11.7 |
|
||||
| 微调检查点| 90M(.ckpt文件) | 90M(.ckpt文件) |
|
||||
| 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
|
||||
|
||||
#### CIFAR-10上的ResNet50
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
|
||||
enable_modelarts: False
|
||||
# Url for modelarts
|
||||
data_url: ""
|
||||
train_url: ""
|
||||
checkpoint_url: ""
|
||||
# Path for local
|
||||
run_distribute: False
|
||||
enable_profiling: False
|
||||
data_path: "/cache/data"
|
||||
output_path: "/cache/train"
|
||||
load_path: "/cache/checkpoint_path/"
|
||||
device_target: "GPU"
|
||||
checkpoint_path: "./checkpoint/"
|
||||
checkpoint_file_path: ""
|
||||
|
||||
# ==============================================================================
|
||||
# Training options
|
||||
optimizer: "Momentum"
|
||||
infer_label: ""
|
||||
class_num: 10
|
||||
batch_size: 32
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.0001
|
||||
epoch_size: 90
|
||||
pretrain_epoch_size: 0
|
||||
save_checkpoint: True
|
||||
save_checkpoint_epochs: 5
|
||||
keep_checkpoint_max: 10
|
||||
warmup_epochs: 5
|
||||
lr_decay_mode: "poly"
|
||||
lr_init: 0.01
|
||||
lr_end: 0.00001
|
||||
lr_max: 0.1
|
||||
lars_epsilon: 0.0
|
||||
lars_coefficient: 0.001
|
||||
|
||||
net_name: "resnet18"
|
||||
dataset: "cifar10"
|
||||
device_num: 1
|
||||
pre_trained: ""
|
||||
run_eval: False
|
||||
eval_dataset_path: ""
|
||||
parameter_server: False
|
||||
filter_weight: False
|
||||
save_best_ckpt: True
|
||||
eval_start_epoch: 40
|
||||
eval_interval: 1
|
||||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
|
||||
# Export options
|
||||
device_id: 0
|
||||
width: 224
|
||||
height: 224
|
||||
file_name: "resnet18"
|
||||
file_format: "AIR"
|
||||
ckpt_file: ""
|
||||
network_dataset: "resnet18_cifar10"
|
||||
|
||||
---
|
||||
# Help description for each configuration
|
||||
enable_modelarts: "Whether training on modelarts, default: False"
|
||||
data_url: "Dataset url for obs"
|
||||
checkpoint_url: "The location of checkpoint for obs"
|
||||
data_path: "Dataset path for local"
|
||||
output_path: "Training output path for local"
|
||||
load_path: "The location of checkpoint for obs"
|
||||
device_target: "Target device type, available: [Ascend, GPU, CPU]"
|
||||
enable_profiling: "Whether enable profiling while training, default: False"
|
||||
num_classes: "Class for dataset"
|
||||
batch_size: "Batch size for training and evaluation"
|
||||
epoch_size: "Total training epochs."
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
checkpoint_file_path: "The location of the checkpoint file."
|
|
@ -0,0 +1,82 @@
|
|||
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
|
||||
enable_modelarts: False
|
||||
# Url for modelarts
|
||||
data_url: ""
|
||||
train_url: ""
|
||||
checkpoint_url: ""
|
||||
# Path for local
|
||||
run_distribute: False
|
||||
enable_profiling: False
|
||||
data_path: "/cache/data"
|
||||
output_path: "/cache/train"
|
||||
load_path: "/cache/checkpoint_path/"
|
||||
device_target: "GPU"
|
||||
checkpoint_path: "./checkpoint/"
|
||||
checkpoint_file_path: ""
|
||||
|
||||
# ==============================================================================
|
||||
# Training options
|
||||
optimizer: "Momentum"
|
||||
infer_label: ""
|
||||
class_num: 1001
|
||||
batch_size: 256
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.0001
|
||||
epoch_size: 90
|
||||
pretrain_epoch_size: 0
|
||||
save_checkpoint: True
|
||||
save_checkpoint_epochs: 5
|
||||
keep_checkpoint_max: 10
|
||||
warmup_epochs: 0
|
||||
lr_decay_mode: "linear"
|
||||
use_label_smooth: True
|
||||
label_smooth_factor: 0.1
|
||||
lr_init: 0
|
||||
lr_max: 0.8
|
||||
lr_end: 0.0
|
||||
lars_epsilon: 0.0
|
||||
lars_coefficient: 0.001
|
||||
|
||||
net_name: "resnet18"
|
||||
dataset: "imagenet2012"
|
||||
device_num: 1
|
||||
pre_trained: ""
|
||||
run_eval: False
|
||||
eval_dataset_path: ""
|
||||
parameter_server: False
|
||||
filter_weight: False
|
||||
save_best_ckpt: True
|
||||
eval_start_epoch: 40
|
||||
eval_interval: 1
|
||||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
|
||||
# Export options
|
||||
device_id: 0
|
||||
width: 224
|
||||
height: 224
|
||||
file_name: "resnet18"
|
||||
file_format: "AIR"
|
||||
ckpt_file: ""
|
||||
network_dataset: "resnet18_imagenet2012"
|
||||
|
||||
---
|
||||
# Help description for each configuration
|
||||
enable_modelarts: "Whether training on modelarts, default: False"
|
||||
data_url: "Dataset url for obs"
|
||||
checkpoint_url: "The location of checkpoint for obs"
|
||||
data_path: "Dataset path for local"
|
||||
output_path: "Training output path for local"
|
||||
load_path: "The location of checkpoint for obs"
|
||||
device_target: "Target device type, available: [Ascend, GPU, CPU]"
|
||||
enable_profiling: "Whether enable profiling while training, default: False"
|
||||
num_classes: "Class for dataset"
|
||||
batch_size: "Batch size for training and evaluation"
|
||||
epoch_size: "Total training epochs."
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
checkpoint_file_path: "The location of the checkpoint file."
|
|
@ -35,7 +35,7 @@ get_real_path(){
|
|||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
CONFIG_FILE=$3
|
||||
CONFIG_FILE=$(get_real_path $3)
|
||||
|
||||
if [ $# == 4 ]
|
||||
then
|
||||
|
@ -101,7 +101,7 @@ do
|
|||
mkdir ./train_parallel$i
|
||||
cp ../*.py ./train_parallel$i
|
||||
cp *.sh ./train_parallel$i
|
||||
cp -r ../*.yaml ./train_parallel$i
|
||||
cp -r ../config/*.yaml ./train_parallel$i
|
||||
cp -r ../src ./train_parallel$i
|
||||
cd ./train_parallel$i || exit
|
||||
echo "start training for rank $RANK_ID, device $DEVICE_ID"
|
||||
|
|
|
@ -34,7 +34,7 @@ get_real_path(){
|
|||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
CONFIG_FILE=$2
|
||||
CONFIG_FILE=$(get_real_path $2)
|
||||
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
|
@ -80,7 +80,7 @@ rm -rf ./train_parallel
|
|||
mkdir ./train_parallel
|
||||
cp ../*.py ./train_parallel
|
||||
cp *.sh ./train_parallel
|
||||
cp -r ../*.yaml ./train_parallel
|
||||
cp -r ../config/*.yaml ./train_parallel
|
||||
cp -r ../src ./train_parallel
|
||||
cd ./train_parallel || exit
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ get_real_path(){
|
|||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
CONFIG_FILE=$3
|
||||
CONFIG_FILE=$(get_real_path $3)
|
||||
|
||||
|
||||
if [ ! -d $PATH1 ]
|
||||
|
@ -58,7 +58,7 @@ fi
|
|||
mkdir ./eval
|
||||
cp ../*.py ./eval
|
||||
cp *.sh ./eval
|
||||
cp -r ../*.yaml ./eval
|
||||
cp -r ../config/*.yaml ./eval
|
||||
cp -r ../src ./eval
|
||||
cd ./eval || exit
|
||||
env > env.log
|
||||
|
|
|
@ -30,7 +30,7 @@ get_real_path(){
|
|||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
CONFIG_FILE=$3
|
||||
CONFIG_FILE=$(get_real_path $3)
|
||||
|
||||
|
||||
if [ ! -d $PATH1 ]
|
||||
|
@ -58,7 +58,7 @@ fi
|
|||
mkdir ./eval
|
||||
cp ../*.py ./eval
|
||||
cp *.sh ./eval
|
||||
cp -r ../*.yaml ./eval
|
||||
cp -r ../config/*.yaml ./eval
|
||||
cp -r ../src ./eval
|
||||
cd ./eval || exit
|
||||
env > env.log
|
||||
|
|
|
@ -30,7 +30,7 @@ get_real_path(){
|
|||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
CONFIG_FILE=$3
|
||||
CONFIG_FILE=$(get_real_path $3)
|
||||
|
||||
|
||||
if [ ! -d $PATH1 ]
|
||||
|
@ -56,7 +56,7 @@ then
|
|||
rm -rf ./infer
|
||||
fi
|
||||
mkdir ./infer
|
||||
cp ../*.yaml ./infer
|
||||
cp ../config/*.yaml ./infer
|
||||
cp ../*.py ./infer
|
||||
cp *.sh ./infer
|
||||
cp -r ../src ./infer
|
||||
|
|
|
@ -87,7 +87,7 @@ function preprocess_data()
|
|||
fi
|
||||
mkdir preprocess_Result
|
||||
BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
|
||||
CONFIG_FILE="${BASE_PATH}/$1"
|
||||
CONFIG_FILE="${BASE_PATH}/config/$1"
|
||||
|
||||
python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ get_real_path(){
|
|||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
CONFIG_FILE=$3
|
||||
CONFIG_FILE=$(get_real_path $3)
|
||||
|
||||
if [ $# == 4 ]
|
||||
then
|
||||
|
@ -71,7 +71,7 @@ export DEVICE_ID=0
|
|||
export RANK_ID=0
|
||||
rm -rf ./sched
|
||||
mkdir ./sched
|
||||
cp ../*.yaml ./sched
|
||||
cp ../config/*.yaml ./sched
|
||||
cp ../*.py ./sched
|
||||
cp *.sh ./sched
|
||||
cp -r ../src ./sched
|
||||
|
@ -97,7 +97,7 @@ do
|
|||
export RANK_ID=$i
|
||||
rm -rf ./server_$i
|
||||
mkdir ./server_$i
|
||||
cp ../*.yaml ./server_$i
|
||||
cp ../config/*.yaml ./server_$i
|
||||
cp ../*.py ./server_$i
|
||||
cp *.sh ./server_$i
|
||||
cp -r ../src ./server_$i
|
||||
|
@ -125,7 +125,7 @@ do
|
|||
export RANK_ID=$i
|
||||
rm -rf ./worker_$i
|
||||
mkdir ./worker_$i
|
||||
cp ../*.yaml ./worker_$i
|
||||
cp ../config/*.yaml ./worker_$i
|
||||
cp ../*.py ./worker_$i
|
||||
cp *.sh ./worker_$i
|
||||
cp -r ../src ./worker_$i
|
||||
|
|
|
@ -29,7 +29,7 @@ get_real_path(){
|
|||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
CONFIG_FILE=$2
|
||||
CONFIG_FILE=$(get_real_path $2)
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
PATH2=$(get_real_path $3)
|
||||
|
@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081
|
|||
export MS_ROLE=MS_SCHED
|
||||
rm -rf ./sched
|
||||
mkdir ./sched
|
||||
cp ../*.yaml ./sched
|
||||
cp ../config/*.yaml ./sched
|
||||
cp ../*.py ./sched
|
||||
cp *.sh ./sched
|
||||
cp -r ../src ./sched
|
||||
|
@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++));
|
|||
do
|
||||
rm -rf ./server_$i
|
||||
mkdir ./server_$i
|
||||
cp ../*.yaml ./server_$i
|
||||
cp ../config/*.yaml ./server_$i
|
||||
cp ../*.py ./server_$i
|
||||
cp *.sh ./server_$i
|
||||
cp -r ../src ./server_$i
|
||||
|
@ -110,7 +110,7 @@ done
|
|||
export MS_ROLE=MS_WORKER
|
||||
rm -rf ./worker
|
||||
mkdir ./worker
|
||||
cp ../*.yaml ./worker
|
||||
cp ../config/*.yaml ./worker
|
||||
cp ../*.py ./worker
|
||||
cp *.sh ./worker
|
||||
cp -r ../src ./worker
|
||||
|
|
|
@ -34,7 +34,7 @@ get_real_path(){
|
|||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
CONFIG_FILE=$2
|
||||
CONFIG_FILE=$(get_real_path $2)
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
PATH2=$(get_real_path $3)
|
||||
|
@ -80,7 +80,7 @@ then
|
|||
rm -rf ./train
|
||||
fi
|
||||
mkdir ./train
|
||||
cp ../*.yaml ./train
|
||||
cp ../config/*.yaml ./train
|
||||
cp ../*.py ./train
|
||||
cp *.sh ./train
|
||||
cp -r ../src ./train
|
||||
|
|
|
@ -34,7 +34,7 @@ get_real_path(){
|
|||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
CONFIG_FILE=$2
|
||||
CONFIG_FILE=$(get_real_path $2)
|
||||
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
|
@ -83,7 +83,7 @@ then
|
|||
rm -rf ./train
|
||||
fi
|
||||
mkdir ./train
|
||||
cp ../*.yaml ./train
|
||||
cp ../config/*.yaml ./train
|
||||
cp ../*.py ./train
|
||||
cp *.sh ./train
|
||||
cp -r ../src ./train
|
||||
|
|
|
@ -21,7 +21,7 @@ import argparse
|
|||
from pprint import pprint, pformat
|
||||
import yaml
|
||||
|
||||
_config_path = "./resnet50_cifar10_config.yaml"
|
||||
_config_path = "./config/resnet50_cifar10_config.yaml"
|
||||
|
||||
class Config:
|
||||
"""
|
||||
|
@ -118,7 +118,7 @@ def get_config():
|
|||
parser = argparse.ArgumentParser(description="default name", add_help=False)
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \
|
||||
"../resnet50_cifar10_config.yaml"), help="Config file path")
|
||||
"../config/resnet50_cifar10_config.yaml"), help="Config file path")
|
||||
path_args, _ = parser.parse_known_args()
|
||||
default, helper, choices = parse_yaml(path_args.config_path)
|
||||
pprint(default)
|
||||
|
|
|
@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend():
|
|||
new_list = ["total_epochs=10", "10"]
|
||||
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
|
||||
dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
|
||||
config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
|
||||
config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
|
||||
exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\
|
||||
.format(utils.rank_table_path, dataset_path, config_path)
|
||||
os.system(exec_network_shell)
|
||||
|
@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu():
|
|||
new_list = ["total_epochs=10", "10"]
|
||||
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
|
||||
dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
|
||||
config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
|
||||
config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
|
||||
os.system("nvidia-smi")
|
||||
exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \
|
||||
.format(dataset_path, config_path)
|
||||
|
|
Loading…
Reference in New Issue