forked from mindspore-Ecosystem/mindspore
update resnet101 scripts
This commit is contained in:
parent
8a0b3e234d
commit
5714e07968
|
@ -52,7 +52,7 @@ class DatasetHelper:
|
|||
sink_size (int): Control the amount of data each sink.
|
||||
If sink_size=-1, sink the complete dataset each epoch.
|
||||
If sink_size>0, sink sink_size data each epoch. Default: -1.
|
||||
epoch_num (int): Control the number of epoch data to send.
|
||||
epoch_num (int): Control the number of epoch data to send. Default: 1.
|
||||
|
||||
Examples:
|
||||
>>> dataset_helper = DatasetHelper(dataset)
|
||||
|
|
|
@ -44,6 +44,9 @@ ImageNet2012
|
|||
├── run_distribute_train.sh # launch distributed training(8 pcs)
|
||||
├── run_eval.sh # launch evaluation
|
||||
└── run_standalone_train.sh # launch standalone training(1 pcs)
|
||||
├── run_distribute_train_gpu.sh # launch gpu distributed training(8 pcs)
|
||||
├── run_eval_gpu.sh # launch gpu evaluation
|
||||
└── run_standalone_train_gpu.sh # launch gpu standalone training(1 pcs)
|
||||
├── src
|
||||
├── config.py # parameter configuration
|
||||
├── dataset.py # data preprocessing
|
||||
|
@ -241,11 +244,11 @@ result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199
|
|||
### Running on GPU
|
||||
```
|
||||
# distributed training example
|
||||
mpirun -n 8 python train.py --net=resnet50 --dataset=cifar10 --dataset_path=~/cifar-10-batches-bin --device_target="GPU" --run_distribute=True
|
||||
sh run_distribute_train_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
|
||||
|
||||
# standalone training example
|
||||
python train.py --net=resnet50 --dataset=cifar10 --dataset_path=~/cifar-10-batches-bin --device_target="GPU"
|
||||
sh run_standalone_train_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
|
||||
|
||||
# infer example
|
||||
python eval.py --net=resnet50 --dataset=cifar10 --dataset_path=~/cifar10-10-verify-bin --device_target="GPU" --checkpoint_path=resnet-90_195.ckpt
|
||||
sh run_eval_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH]
|
||||
```
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 4 ]
|
||||
then
|
||||
echo "Usage: sh run_eval_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $1 != "resnet50" ] && [ $1 != "resnet101" ]
|
||||
then
|
||||
echo "error: the selected net is neither resnet50 nor resnet101"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $2 != "cifar10" ] && [ $2 != "imagenet2012" ]
|
||||
then
|
||||
echo "error: the selected dataset is neither cifar10 nor imagenet2012"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $1 == "resnet101" ] && [ $2 == "cifar10" ]
|
||||
then
|
||||
echo "error: evaluating resnet101 with cifar10 dataset is unsupported now!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $3)
|
||||
PATH2=$(get_real_path $4)
|
||||
|
||||
|
||||
if [ ! -d $PATH1 ]
|
||||
then
|
||||
echo "error: DATASET_PATH=$PATH1 is not a directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH2 ]
|
||||
then
|
||||
echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=1
|
||||
export DEVICE_ID=0
|
||||
export RANK_SIZE=$DEVICE_NUM
|
||||
export RANK_ID=0
|
||||
|
||||
if [ -d "eval" ];
|
||||
then
|
||||
rm -rf ./eval
|
||||
fi
|
||||
mkdir ./eval
|
||||
cp ../*.py ./eval
|
||||
cp *.sh ./eval
|
||||
cp -r ../src ./eval
|
||||
cd ./eval || exit
|
||||
env > env.log
|
||||
echo "start evaluation for device $DEVICE_ID"
|
||||
python eval.py --net=$1 --dataset=$2 --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log &
|
||||
cd ..
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
if [ $# != 3 ] && [ $# != 4 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
|
||||
echo "Usage: sh run_standalone_train_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
@ -157,13 +157,18 @@ if __name__ == '__main__':
|
|||
else:
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
|
||||
num_classes=config.class_num)
|
||||
## fp32 training
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay)
|
||||
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
|
||||
# # Mixed precision
|
||||
# loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
|
||||
# opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale)
|
||||
# model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, amp_level="O2")
|
||||
|
||||
if args_opt.net == "resnet101":
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay,
|
||||
config.loss_scale)
|
||||
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
|
||||
# Mixed precision
|
||||
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
|
||||
amp_level="O2", keep_batchnorm_fp32=True)
|
||||
else:
|
||||
## fp32 training
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay)
|
||||
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
|
||||
|
||||
# define callbacks
|
||||
time_cb = TimeMonitor(data_size=step_size)
|
||||
|
|
Loading…
Reference in New Issue