add gpu scripts to resnet18

This commit is contained in:
ZeyangGao 2021-07-30 10:29:09 +08:00
parent 564306c232
commit 44833d7886
27 changed files with 276 additions and 111 deletions

View File

@ -202,6 +202,18 @@ If you want to run in modelarts, please check the official documentation of [mod
.
└──resnet
├── README.md
├── config
├── resnet18_cifar10_config.yaml # parameter configuration
├── resnet18_cifar10_config_gpu.yaml # parameter configuration
├── resnet18_imagenet2012_config.yaml # parameter configuration
├── resnet18_imagenet2012_config_gpu.yaml # parameter configuration
├── resnet34_imagenet2012_config.yaml # parameter configuration
├── resnet50_cifar10_config.yaml # parameter configuration
├── resnet50_imagenet2012_Ascend_config.yaml # parameter configuration
├── resnet50_imagenet2012_config.yaml # parameter configuration
├── resnet50_imagenet2012_GPU_config.yaml # parameter configuration
├── resnet101_imagenet2012_config.yaml # parameter configuration
└── se-resnet50_imagenet2012_config.yaml # parameter configuration
├── scripts
├── run_distribute_train.sh # launch ascend distributed training(8 pcs)
├── run_parameter_server_train.sh # launch ascend parameter server training(8 pcs)
@ -226,16 +238,6 @@ If you want to run in modelarts, please check the official documentation of [mod
├──device_adapter.py # device adapter
├──local_adapter.py # local adapter
├──moxing_adapter.py # moxing adapter
├── resnet18_cifar10_config.yaml # parameter configuration
├── resnet18_imagenet2012_config.yaml # parameter configuration
├── resnet34_imagenet2012_config.yaml # parameter configuration
├── resnet50_cifar10_config.yaml # parameter configuration
├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration
├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration
├── resnet50_imagenet2012_config.yaml # parameter configuration
├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration
├── resnet101_imagenet2012_config.yaml # parameter configuration
├── se-resnet50_imagenet2012_config.yaml # parameter configuration
├── export.py # export model for inference
├── mindspore_hub_conf.py # mindspore hub interface
├── eval.py # eval net
@ -713,42 +715,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
#### ResNet18 on CIFAR-10
| Parameters | Ascend 910 |
| -------------------------- | -------------------------------------- |
| Model Version | ResNet18 |
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
| uploaded Date | 02/25/2021 (month/day/year) |
| MindSpore Version | 1.1.1 |
| Dataset | CIFAR-10 |
| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 |
| Optimizer | Momentum |
| Loss Function | Softmax Cross Entropy |
| outputs | probability |
| Loss | 0.0002519517 |
| Speed | 13 ms/step8pcs |
| Total time | 4 mins |
| Parameters (M) | 11.2 |
| Checkpoint for Fine tuning | 86M (.ckpt file) |
| Parameters | Ascend 910 | GPU |
| -------------------------- | -------------------------------------- | -------------------------------------- |
| Model Version | ResNet18 | ResNet18 |
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G |
| uploaded Date | 02/25/2021 (month/day/year) | 07/23/2021 (month/day/year) |
| MindSpore Version | 1.1.1 | 1.3.0 |
| Dataset | CIFAR-10 | CIFAR-10 |
| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 |
| Optimizer | Momentum | Momentum |
| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy |
| outputs | probability | probability |
| Loss | 0.0002519517 | 0.0015517382 |
| Speed | 13 ms/step8pcs | 29 ms/step8pcs |
| Total time | 4 mins | 11 minds |
| Parameters (M) | 11.2 | 11.2 |
| Checkpoint for Fine tuning | 86M (.ckpt file) | 85.4 (.ckpt file) |
| Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
#### ResNet18 on ImageNet2012
| Parameters | Ascend 910 |
| -------------------------- | -------------------------------------- |
| Model Version | ResNet18 |
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |
| uploaded Date | 02/25/2021 (month/day/year) |
| MindSpore Version | 1.1.1 |
| Dataset | ImageNet2012 |
| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 |
| Optimizer | Momentum |
| Loss Function | Softmax Cross Entropy |
| outputs | probability |
| Loss | 2.15702 |
| Speed | 110ms/step8pcs (may need to set_numa_enbale in dataset.py) |
| Total time | 110 mins |
| Parameters (M) | 11.7 |
| Checkpoint for Fine tuning | 90M (.ckpt file) |
| Parameters | Ascend 910 | GPU |
| -------------------------- | -------------------------------------- | -------------------------------------- |
| Model Version | ResNet18 | ResNet18 |
| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G |
| uploaded Date | 02/25/2021 (month/day/year) | 07/23/2021 (month/day/year) |
| MindSpore Version | 1.1.1 | 1.3.0 |
| Dataset | ImageNet2012 | ImageNet2012 |
| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 |
| Optimizer | Momentum | Momentum |
| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy |
| outputs | probability | probability |
| Loss | 2.15702 | 2.168664 |
| Speed | 110ms/step8pcs (may need to set_numa_enbale in dataset.py) | 107 ms/step8pcs |
| Total time | 110 mins | 130 mins |
| Parameters (M) | 11.7 | 11.7 |
| Checkpoint for Fine tuning | 90M (.ckpt file) | 90M (.ckpt file) |
| Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
#### ResNet50 on CIFAR-10

View File

@ -188,6 +188,18 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
.
└──resnet
├── README.md
├── config
├── resnet18_cifar10_config.yaml # 参数配置
├── resnet18_cifar10_config_gpu.yaml # 参数配置
├── resnet18_imagenet2012_config.yaml # 参数配置
├── resnet18_imagenet2012_config_gpu.yaml # 参数配置
├── resnet34_imagenet2012_config.yaml # 参数配置
├── resnet50_cifar10_config.yaml # 参数配置
├── resnet50_imagenet2012_Ascend_config.yaml # 参数配置
├── resnet50_imagenet2012_config.yaml # 参数配置
├── resnet50_imagenet2012_GPU_config.yaml # 参数配置
├── resnet101_imagenet2012_config.yaml # 参数配置
├── se-resnet50_imagenet2012_config.yaml # 参数配置
├── scripts
├── run_distribute_train.sh # 启动Ascend分布式训练8卡
├── run_parameter_server_train.sh # 启动Ascend参数服务器训练(8卡)
@ -209,17 +221,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
├── device_adapter.py # 设备配置
├── local_adapter.py # 本地设备配置
└── moxing_adapter.py # modelarts设备配置
├── resnet18_cifar10_config.yaml # 参数配置
├── resnet18_imagenet2012_config.yaml # 参数配置
├── resnet34_imagenet2012_config.yaml # 参数配置
├── resnet50_cifar10_config.yaml # 参数配置
├── resnet50_imagenet2012_Acc_config.yaml # 参数配置
├── resnet50_imagenet2012_Ascend_Thor_config.yaml # 参数配置
├── resnet50_imagenet2012_config.yaml # 参数配置
├── resnet50_imagenet2012_GPU_Thor_config.yaml # 参数配置
├── resnet101_imagenet2012_config.yaml # 参数配置
├── se-resnet50_imagenet2012_config.yaml # 参数配置
├── eval.py # 评估网络
├── eval.py # 评估网络
└── train.py # 训练网络
```
@ -674,42 +675,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
#### CIFAR-10上的ResNet18
| 参数 | Ascend 910 |
| -------------------------- | -------------------------------------- |
| 模型版本 | ResNet18 |
| 资源 | Ascend 910CPU 2.60GHz192核内存 755G系统 Euler2.8 |
| 上传日期 | 2021-02-25 |
| MindSpore版本 | 1.1.1 |
| 数据集 | CIFAR-10 |
| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 |
| 优化器 | Momentum |
| 损失函数 | Softmax交叉熵 |
| 输出 | 概率 |
| 损失 | 0.0002519517 |
| 速度 | 13毫秒/步8卡 |
| 总时长 | 4分钟 |
| 参数(M) | 11.2 |
| 参数 | Ascend 910 | GPU |
| -------------------------- | -------------------------------------- | -------------------------------------- |
| 模型版本 | ResNet18 | ResNet18 |
| 资源 | Ascend 910CPU 2.60GHz192核内存 755G系统 Euler2.8 | PCIE V100-32G |
| 上传日期 | 2021-02-25 | 2021-07-23 |
| MindSpore版本 | 1.1.1 | 1.3.0 |
| 数据集 | CIFAR-10 | CIFAR-10 |
| 训练参数 | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 |
| 优化器 | Momentum | Momentum|
| 损失函数 | Softmax交叉熵 | Softmax交叉熵 |
| 输出 | 概率 | 概率 |
| 损失 | 0.0002519517 | 0.0015517382 |
| 速度 | 13毫秒/步8卡 | 29毫秒/步8卡 |
| 总时长 | 4分钟 | 11分钟 |
| 参数(M) | 11.2 | 11.2 |
| 微调检查点 | 86.ckpt文件 |
| 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
#### ImageNet2012上的ResNet18
| 参数 | Ascend 910 |
| -------------------------- | -------------------------------------- |
| 模型版本 | ResNet18 |
| 资源 | Ascend 910CPU 2.60GHz192核内存 755G系统 Euler2.8 |
| 上传日期 | 2020-04-01 ; |
| MindSpore版本 | 1.1.1 |
| 数据集 | ImageNet2012 |
| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 |
| 优化器 | Momentum |
| 损失函数 | Softmax交叉熵 |
| 输出 | 概率 |
| 损失 | 2.15702 |
| 速度 | 110毫秒/步8卡 (可能需要在datasetpy中增加set_numa_enbale绑核操作) |
| 总时长 | 110分钟 |
| 参数(M) | 11.7 |
| 微调检查点| 90M.ckpt文件 |
| 参数 | Ascend 910 | GPU |
| -------------------------- | -------------------------------------- | -------------------------------------- |
| 模型版本 | ResNet18 | RESNET18 |
| 资源 | Ascend 910CPU 2.60GHz192核内存 755G系统 Euler2.8 | PCIE V100-32G |
| 上传日期 | 2020-04-01 ; | 2021-07-23 |
| MindSpore版本 | 1.1.1 | 1.3.0 |
| 数据集 | ImageNet2012 | ImageNet2012 |
| 训练参数 | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 |
| 优化器 | Momentum | Momentum|
| 损失函数 | Softmax交叉熵 | Softmax交叉熵 |
| 输出 | 概率 | 概率 |
| 损失 | 2.15702 | 2.168664 |
| 速度 | 110毫秒/步8卡 (可能需要在datasetpy中增加set_numa_enbale绑核操作) | 107毫秒/步8卡 |
| 总时长 | 110分钟 | 130分钟 |
| 参数(M) | 11.7 | 11.7 |
| 微调检查点| 90M.ckpt文件 | 90M.ckpt文件 |
| 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
#### CIFAR-10上的ResNet50

View File

@ -0,0 +1,80 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
# Url for modelarts
data_url: ""
train_url: ""
checkpoint_url: ""
# Path for local
run_distribute: False
enable_profiling: False
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path/"
device_target: "GPU"
checkpoint_path: "./checkpoint/"
checkpoint_file_path: ""
# ==============================================================================
# Training options
optimizer: "Momentum"
infer_label: ""
class_num: 10
batch_size: 32
loss_scale: 1024
momentum: 0.9
weight_decay: 0.0001
epoch_size: 90
pretrain_epoch_size: 0
save_checkpoint: True
save_checkpoint_epochs: 5
keep_checkpoint_max: 10
warmup_epochs: 5
lr_decay_mode: "poly"
lr_init: 0.01
lr_end: 0.00001
lr_max: 0.1
lars_epsilon: 0.0
lars_coefficient: 0.001
net_name: "resnet18"
dataset: "cifar10"
device_num: 1
pre_trained: ""
run_eval: False
eval_dataset_path: ""
parameter_server: False
filter_weight: False
save_best_ckpt: True
eval_start_epoch: 40
eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
# Export options
device_id: 0
width: 224
height: 224
file_name: "resnet18"
file_format: "AIR"
ckpt_file: ""
network_dataset: "resnet18_cifar10"
---
# Help description for each configuration
enable_modelarts: "Whether training on modelarts, default: False"
data_url: "Dataset url for obs"
checkpoint_url: "The location of checkpoint for obs"
data_path: "Dataset path for local"
output_path: "Training output path for local"
load_path: "The location of checkpoint for obs"
device_target: "Target device type, available: [Ascend, GPU, CPU]"
enable_profiling: "Whether enable profiling while training, default: False"
num_classes: "Class for dataset"
batch_size: "Batch size for training and evaluation"
epoch_size: "Total training epochs."
checkpoint_path: "The location of the checkpoint file."
checkpoint_file_path: "The location of the checkpoint file."

View File

@ -0,0 +1,82 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
# Url for modelarts
data_url: ""
train_url: ""
checkpoint_url: ""
# Path for local
run_distribute: False
enable_profiling: False
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path/"
device_target: "GPU"
checkpoint_path: "./checkpoint/"
checkpoint_file_path: ""
# ==============================================================================
# Training options
optimizer: "Momentum"
infer_label: ""
class_num: 1001
batch_size: 256
loss_scale: 1024
momentum: 0.9
weight_decay: 0.0001
epoch_size: 90
pretrain_epoch_size: 0
save_checkpoint: True
save_checkpoint_epochs: 5
keep_checkpoint_max: 10
warmup_epochs: 0
lr_decay_mode: "linear"
use_label_smooth: True
label_smooth_factor: 0.1
lr_init: 0
lr_max: 0.8
lr_end: 0.0
lars_epsilon: 0.0
lars_coefficient: 0.001
net_name: "resnet18"
dataset: "imagenet2012"
device_num: 1
pre_trained: ""
run_eval: False
eval_dataset_path: ""
parameter_server: False
filter_weight: False
save_best_ckpt: True
eval_start_epoch: 40
eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
# Export options
device_id: 0
width: 224
height: 224
file_name: "resnet18"
file_format: "AIR"
ckpt_file: ""
network_dataset: "resnet18_imagenet2012"
---
# Help description for each configuration
enable_modelarts: "Whether training on modelarts, default: False"
data_url: "Dataset url for obs"
checkpoint_url: "The location of checkpoint for obs"
data_path: "Dataset path for local"
output_path: "Training output path for local"
load_path: "The location of checkpoint for obs"
device_target: "Target device type, available: [Ascend, GPU, CPU]"
enable_profiling: "Whether enable profiling while training, default: False"
num_classes: "Class for dataset"
batch_size: "Batch size for training and evaluation"
epoch_size: "Total training epochs."
checkpoint_path: "The location of the checkpoint file."
checkpoint_file_path: "The location of the checkpoint file."

View File

@ -35,7 +35,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
CONFIG_FILE=$3
CONFIG_FILE=$(get_real_path $3)
if [ $# == 4 ]
then
@ -101,7 +101,7 @@ do
mkdir ./train_parallel$i
cp ../*.py ./train_parallel$i
cp *.sh ./train_parallel$i
cp -r ../*.yaml ./train_parallel$i
cp -r ../config/*.yaml ./train_parallel$i
cp -r ../src ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"

View File

@ -34,7 +34,7 @@ get_real_path(){
}
PATH1=$(get_real_path $1)
CONFIG_FILE=$2
CONFIG_FILE=$(get_real_path $2)
if [ $# == 3 ]
then
@ -80,7 +80,7 @@ rm -rf ./train_parallel
mkdir ./train_parallel
cp ../*.py ./train_parallel
cp *.sh ./train_parallel
cp -r ../*.yaml ./train_parallel
cp -r ../config/*.yaml ./train_parallel
cp -r ../src ./train_parallel
cd ./train_parallel || exit

View File

@ -30,7 +30,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
CONFIG_FILE=$3
CONFIG_FILE=$(get_real_path $3)
if [ ! -d $PATH1 ]
@ -58,7 +58,7 @@ fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../*.yaml ./eval
cp -r ../config/*.yaml ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log

View File

@ -30,7 +30,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
CONFIG_FILE=$3
CONFIG_FILE=$(get_real_path $3)
if [ ! -d $PATH1 ]
@ -58,7 +58,7 @@ fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../*.yaml ./eval
cp -r ../config/*.yaml ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log

View File

@ -30,7 +30,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
CONFIG_FILE=$3
CONFIG_FILE=$(get_real_path $3)
if [ ! -d $PATH1 ]
@ -56,7 +56,7 @@ then
rm -rf ./infer
fi
mkdir ./infer
cp ../*.yaml ./infer
cp ../config/*.yaml ./infer
cp ../*.py ./infer
cp *.sh ./infer
cp -r ../src ./infer

View File

@ -87,7 +87,7 @@ function preprocess_data()
fi
mkdir preprocess_Result
BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
CONFIG_FILE="${BASE_PATH}/$1"
CONFIG_FILE="${BASE_PATH}/config/$1"
python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log
}

View File

@ -30,7 +30,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
CONFIG_FILE=$3
CONFIG_FILE=$(get_real_path $3)
if [ $# == 4 ]
then
@ -71,7 +71,7 @@ export DEVICE_ID=0
export RANK_ID=0
rm -rf ./sched
mkdir ./sched
cp ../*.yaml ./sched
cp ../config/*.yaml ./sched
cp ../*.py ./sched
cp *.sh ./sched
cp -r ../src ./sched
@ -97,7 +97,7 @@ do
export RANK_ID=$i
rm -rf ./server_$i
mkdir ./server_$i
cp ../*.yaml ./server_$i
cp ../config/*.yaml ./server_$i
cp ../*.py ./server_$i
cp *.sh ./server_$i
cp -r ../src ./server_$i
@ -125,7 +125,7 @@ do
export RANK_ID=$i
rm -rf ./worker_$i
mkdir ./worker_$i
cp ../*.yaml ./worker_$i
cp ../config/*.yaml ./worker_$i
cp ../*.py ./worker_$i
cp *.sh ./worker_$i
cp -r ../src ./worker_$i

View File

@ -29,7 +29,7 @@ get_real_path(){
}
PATH1=$(get_real_path $1)
CONFIG_FILE=$2
CONFIG_FILE=$(get_real_path $2)
if [ $# == 3 ]
then
PATH2=$(get_real_path $3)
@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081
export MS_ROLE=MS_SCHED
rm -rf ./sched
mkdir ./sched
cp ../*.yaml ./sched
cp ../config/*.yaml ./sched
cp ../*.py ./sched
cp *.sh ./sched
cp -r ../src ./sched
@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++));
do
rm -rf ./server_$i
mkdir ./server_$i
cp ../*.yaml ./server_$i
cp ../config/*.yaml ./server_$i
cp ../*.py ./server_$i
cp *.sh ./server_$i
cp -r ../src ./server_$i
@ -110,7 +110,7 @@ done
export MS_ROLE=MS_WORKER
rm -rf ./worker
mkdir ./worker
cp ../*.yaml ./worker
cp ../config/*.yaml ./worker
cp ../*.py ./worker
cp *.sh ./worker
cp -r ../src ./worker

View File

@ -34,7 +34,7 @@ get_real_path(){
}
PATH1=$(get_real_path $1)
CONFIG_FILE=$2
CONFIG_FILE=$(get_real_path $2)
if [ $# == 3 ]
then
PATH2=$(get_real_path $3)
@ -80,7 +80,7 @@ then
rm -rf ./train
fi
mkdir ./train
cp ../*.yaml ./train
cp ../config/*.yaml ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train

View File

@ -34,7 +34,7 @@ get_real_path(){
}
PATH1=$(get_real_path $1)
CONFIG_FILE=$2
CONFIG_FILE=$(get_real_path $2)
if [ $# == 3 ]
then
@ -83,7 +83,7 @@ then
rm -rf ./train
fi
mkdir ./train
cp ../*.yaml ./train
cp ../config/*.yaml ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train

View File

@ -21,7 +21,7 @@ import argparse
from pprint import pprint, pformat
import yaml
_config_path = "./resnet50_cifar10_config.yaml"
_config_path = "./config/resnet50_cifar10_config.yaml"
class Config:
"""
@ -118,7 +118,7 @@ def get_config():
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \
"../resnet50_cifar10_config.yaml"), help="Config file path")
"../config/resnet50_cifar10_config.yaml"), help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)
pprint(default)

View File

@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend():
new_list = ["total_epochs=10", "10"]
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\
.format(utils.rank_table_path, dataset_path, config_path)
os.system(exec_network_shell)
@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu():
new_list = ["total_epochs=10", "10"]
utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
os.system("nvidia-smi")
exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \
.format(dataset_path, config_path)