forked from mindspore-Ecosystem/mindspore
!5601 fix shufflenet_scripts
Merge pull request !5601 from panfengfeng/fix_shufflenet_scripts
This commit is contained in:
commit
0f344f33e2
|
@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
|
|||
+-- Readme.md # descriptions about ShuffleNetV2
|
||||
+-- scripts
|
||||
¦ +--run_distribute_train_for_gpu.sh # shell script for distributed training
|
||||
¦ +--run_eval_for_multi_gpu.sh # shell script for evaluation
|
||||
¦ +--run_eval_for_gpu.sh # shell script for evaluation
|
||||
¦ +--run_standalone_train_for_gpu.sh # shell script for standalone training
|
||||
+-- src
|
||||
¦ +--config.py # parameter configuration
|
||||
|
@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)
|
|||
|
||||
You can start training using python or shell scripts. The usage of shell scripts as follows:
|
||||
|
||||
- Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR]
|
||||
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR]
|
||||
- Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
|
||||
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]
|
||||
|
||||
### Launch
|
||||
|
||||
```
|
||||
# training example
|
||||
python:
|
||||
GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 &
|
||||
GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &
|
||||
|
||||
shell:
|
||||
GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/
|
||||
GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
|
||||
```
|
||||
|
||||
### Result
|
||||
|
||||
Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log`.
|
||||
Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.
|
||||
|
||||
## [Eval process](#contents)
|
||||
|
||||
|
@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a
|
|||
|
||||
You can start evaluation using python or shell scripts. The usage of shell scripts as follows:
|
||||
|
||||
- GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH]
|
||||
- GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
|
||||
|
||||
### Launch
|
||||
|
||||
```
|
||||
# infer example
|
||||
python:
|
||||
GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 &
|
||||
GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &
|
||||
|
||||
shell:
|
||||
GPU: sh run_eval_for_multi_gpu.sh 0 250
|
||||
GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file'
|
||||
```
|
||||
|
||||
> checkpoint can be produced in training process.
|
||||
|
||||
### Result
|
||||
|
||||
Inference result will be stored in the example path, you can find result in `val.log`.
|
||||
Inference result will be stored in the example path, you can find result in `eval.log`.
|
||||
|
|
|
@ -31,7 +31,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
|
||||
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
|
||||
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
|
||||
parser.add_argument('--epoch', type=str, default='')
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
if args_opt.platform == 'Ascend':
|
||||
|
@ -43,7 +42,7 @@ if __name__ == '__main__':
|
|||
ckpt = load_checkpoint(args_opt.checkpoint)
|
||||
load_param_into_net(net, ckpt)
|
||||
net.set_train(False)
|
||||
dataset = create_dataset(args_opt.dataset_path, cfg, False)
|
||||
dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
|
||||
smooth_factor=0.1, num_classes=cfg.num_classes)
|
||||
eval_metrics = {'Loss': nn.Loss(),
|
||||
|
|
|
@ -13,5 +13,45 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
DATA_DIR=$1
|
||||
mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
|
||||
if [ $# -lt 3 ]
|
||||
then
|
||||
echo "Usage: \
|
||||
sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
|
||||
"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $1 -lt 1 ] && [ $1 -gt 8 ]
|
||||
then
|
||||
echo "error: DEVICE_NUM=$1 is not in (1-8)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check dataset file
|
||||
if [ ! -d $3 ]
|
||||
then
|
||||
echo "error: DATASET_PATH=$3 is not a directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export DEVICE_NUM=$1
|
||||
export RANK_SIZE=$1
|
||||
|
||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
||||
if [ -d "../train" ];
|
||||
then
|
||||
rm -rf ../train
|
||||
fi
|
||||
mkdir ../train
|
||||
cd ../train || exit
|
||||
|
||||
export CUDA_VISIBLE_DEVICES="$2"
|
||||
|
||||
if [ $1 -gt 1 ]
|
||||
then
|
||||
mpirun -n $1 --allow-run-as-root \
|
||||
python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
|
||||
else
|
||||
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
|
||||
fi
|
||||
|
|
|
@ -13,6 +13,35 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
DEVICE_ID=$1
|
||||
EPOCH=$2
|
||||
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 &
|
||||
if [ $# != 2 ]
|
||||
then
|
||||
echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check dataset file
|
||||
if [ ! -d $1 ]
|
||||
then
|
||||
echo "error: DATASET_PATH=$1 is not a directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check checkpoint file
|
||||
if [ ! -f $2 ]
|
||||
then
|
||||
echo "error: CHECKPOINT_PATH=$2 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
||||
export DEVICE_ID=0
|
||||
|
||||
if [ -d "../eval" ];
|
||||
then
|
||||
rm -rf ../eval
|
||||
fi
|
||||
mkdir ../eval
|
||||
cd ../eval || exit
|
||||
|
||||
python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &
|
|
@ -13,6 +13,28 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
DEVICE_ID=$1
|
||||
DATA_DIR=$2
|
||||
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
|
||||
if [ $# -lt 1 ]
|
||||
then
|
||||
echo "Usage: \
|
||||
sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
|
||||
"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check dataset file
|
||||
if [ ! -d $1 ]
|
||||
then
|
||||
echo "error: DATASET_PATH=$1 is not a directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
||||
if [ -d "../train" ];
|
||||
then
|
||||
rm -rf ../train
|
||||
fi
|
||||
mkdir ../train
|
||||
cd ../train || exit
|
||||
|
||||
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &
|
||||
|
|
|
@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
|
|||
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
|
||||
# apply batch operations
|
||||
ds = ds.batch(cfg.batch_size, drop_remainder=True)
|
||||
# apply dataset repeat operation
|
||||
ds = ds.repeat(repeat_num)
|
||||
|
||||
return ds
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
# ============================================================================
|
||||
"""train_imagenet."""
|
||||
import argparse
|
||||
import ast
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
|
@ -23,7 +24,7 @@ from network import ShuffleNetV2
|
|||
import mindspore.nn as nn
|
||||
from mindspore import context
|
||||
from mindspore import dataset as de
|
||||
from mindspore import ParallelMode
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore import Tensor
|
||||
from mindspore.communication.management import init, get_rank, get_group_size
|
||||
from mindspore.nn.optim.momentum import Momentum
|
||||
|
@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='image classification training')
|
||||
parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path')
|
||||
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
|
||||
parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
|
||||
parser.add_argument('--is_distributed', action='store_true', default=False,
|
||||
help='distributed training')
|
||||
parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
|
||||
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
|
||||
parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
|
||||
args_opt = parser.parse_args()
|
||||
|
|
Loading…
Reference in New Issue