diff --git a/example/mobilenetv2_quant/Readme.md b/example/mobilenetv2_quant/Readme.md index 5d8f55c394e..351fc80d761 100644 --- a/example/mobilenetv2_quant/Readme.md +++ b/example/mobilenetv2_quant/Readme.md @@ -67,7 +67,7 @@ Dataset used: imagenet ``` # training example - Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ + Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt ``` ### Result @@ -104,156 +104,6 @@ Inference result will be stored in the example path, you can find result like th result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt ``` -# Model description - -## Performance - -### Training Performance - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParametersMobilenetV2MobilenetV2 Quant
ResourceAscend 910
- cpu:2.60GHz 56cores
- memory:314G
Ascend 910
- cpu:2.60GHz 56cores
- memory:314G
uploaded Date05/06/202006/12/2020
MindSpore Version0.3.00.3.0
DatasetImageNetImageNet
Training Parameterssrc/config.pysrc/config.py
OptimizerMomentumMomentum
Loss FunctionCrossEntropyWithLabelSmoothCrossEntropyWithLabelSmooth
Loss200 epoch:1.91350 epoch:1.912
Train AccuracyACC1[77.09%] ACC5[92.57%]ACC1[77.09%] ACC5[92.57%]
Eval AccuracyACC1[77.09%] ACC5[92.57%]ACC1[77.09%] ACC5[92.57%]
Total time48h12h
Checkpoint/mobilenetv2.ckpt
- -#### Inference Performance - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParametersAscend 910Ascend 310Nvidia V100
uploaded Date06/12/2020
MindSpore Version0.3.0
DatasetImageNet, 1.2W
batch_size
outputs
Accuracy
Speed
Total time
Model for inference
# ModelZoo Homepage [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) \ No newline at end of file diff --git a/example/mobilenetv2_quant/scripts/run_infer.sh b/example/mobilenetv2_quant/scripts/run_infer.sh index 907b823a475..2182eaeb40d 100644 --- a/example/mobilenetv2_quant/scripts/run_infer.sh +++ b/example/mobilenetv2_quant/scripts/run_infer.sh @@ -35,20 +35,19 @@ fi # set environment BASEPATH=$(cd "`dirname $0`" || exit; pwd) -export PYTHONPATH=${BASEPATH}:$PYTHONPATH export DEVICE_ID=0 export RANK_ID=0 export RANK_SIZE=1 -if [ -d "eval" ]; +if [ -d "../eval" ]; then rm -rf ../eval fi mkdir ../eval cd ../eval || exit -# luanch +# launch python ${BASEPATH}/../eval.py \ --platform=$1 \ --dataset_path=$2 \ --checkpoint_path=$3 \ - &> ../infer.log & # dataset val folder path + &> infer.log & # dataset val folder path diff --git a/example/mobilenetv2_quant/scripts/run_train.sh b/example/mobilenetv2_quant/scripts/run_train.sh index c18d03e4186..26946ad88bd 100644 --- a/example/mobilenetv2_quant/scripts/run_train.sh +++ b/example/mobilenetv2_quant/scripts/run_train.sh @@ -30,7 +30,7 @@ run_ascend() BASEPATH=$(cd "`dirname $0`" || exit; pwd) export PYTHONPATH=${BASEPATH}:$PYTHONPATH - if [ -d "train" ]; + if [ -d "../train" ]; then rm -rf ../train fi @@ -43,39 +43,7 @@ run_ascend() --training_script=${BASEPATH}/../train.py \ --dataset_path=$5 \ --pre_trained=$6 \ - --platform=$1 &> ../train.log & # dataset train folder -} - -run_gpu() -{ - if [ $2 -lt 1 ] && [ $2 -gt 8 ] - then - echo "error: DEVICE_NUM=$2 is not in (1-8)" - exit 1 - fi - - if [ ! -d $4 ] - then - echo "error: DATASET_PATH=$4 is not a directory" - exit 1 - fi - - BASEPATH=$(cd "`dirname $0`" || exit; pwd) - export PYTHONPATH=${BASEPATH}:$PYTHONPATH - if [ -d "train" ]; - then - rm -rf ../train - fi - mkdir ../train - cd ../train || exit - - export CUDA_VISIBLE_DEVICES="$3" - mpirun -n $2 --allow-run-as-root \ - python ${BASEPATH}/../train.py \ - --dataset_path=$4 \ - --platform=$1 \ - --pre_trained=$5 \ - &> ../train.log & # dataset train folder + --platform=$1 &> train.log & # dataset train folder } if [ $# -gt 6 ] || [ $# -lt 4 ] diff --git a/example/mobilenetv2_quant/src/config.py b/example/mobilenetv2_quant/src/config.py index 8cbab844d4f..61d02b24b1b 100644 --- a/example/mobilenetv2_quant/src/config.py +++ b/example/mobilenetv2_quant/src/config.py @@ -35,21 +35,3 @@ config_ascend = ed({ "keep_checkpoint_max": 200, "save_checkpoint_path": "./checkpoint", }) - -config_gpu = ed({ - "num_classes": 1000, - "image_height": 224, - "image_width": 224, - "batch_size": 64, - "epoch_size": 200, - "warmup_epochs": 4, - "lr": 0.5, - "momentum": 0.9, - "weight_decay": 4e-5, - "label_smooth": 0.1, - "loss_scale": 1024, - "save_checkpoint": True, - "save_checkpoint_epochs": 1, - "keep_checkpoint_max": 200, - "save_checkpoint_path": "./checkpoint", -}) diff --git a/example/mobilenetv2_quant/src/dataset.py b/example/mobilenetv2_quant/src/dataset.py index e4d757ec0a3..a933c505b98 100644 --- a/example/mobilenetv2_quant/src/dataset.py +++ b/example/mobilenetv2_quant/src/dataset.py @@ -41,17 +41,10 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch if rank_size == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: - ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False, num_shards=rank_size, shard_id=rank_id) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) - elif platform == "GPU": - if do_train: - from mindspore.communication.management import get_rank, get_group_size - ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, - num_shards=get_group_size(), shard_id=get_rank()) - else: - ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) else: raise ValueError("Unsupport platform.") diff --git a/example/mobilenetv2_quant/src/launch.py b/example/mobilenetv2_quant/src/launch.py index 52b0b0b3a87..08477a363a1 100644 --- a/example/mobilenetv2_quant/src/launch.py +++ b/example/mobilenetv2_quant/src/launch.py @@ -18,6 +18,7 @@ import sys import json import subprocess import shutil +import platform from argparse import ArgumentParser @@ -80,7 +81,8 @@ def main(): device_ips[device_id] = device_ip print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) hccn_table = {} - hccn_table['board_id'] = '0x0020' + arch = platform.processor() + hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch] hccn_table['chip_info'] = '910' hccn_table['deploy_mode'] = 'lab' hccn_table['group_count'] = '1' diff --git a/example/mobilenetv2_quant/train.py b/example/mobilenetv2_quant/train.py index 0491bdb2513..a3d54af26f2 100644 --- a/example/mobilenetv2_quant/train.py +++ b/example/mobilenetv2_quant/train.py @@ -21,7 +21,6 @@ import numpy as np from mindspore import context from mindspore import Tensor from mindspore import nn -from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.nn.optim.momentum import Momentum from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.loss.loss import _Loss @@ -57,9 +56,6 @@ if args_opt.platform == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) -elif args_opt.platform == "GPU": - context.set_context(mode=context.GRAPH_MODE, - device_target="GPU", save_graphs=False) else: raise ValueError("Unsupport platform.") @@ -191,7 +187,6 @@ if __name__ == '__main__': if run_distribute: context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, parameter_broadcast=True, mirror_mean=True) - auto_parallel_context().set_all_reduce_fusion_split_indices([140]) init() epoch_size = config_ascend.epoch_size diff --git a/example/resnet50_quant/scripts/run_infer.sh b/example/resnet50_quant/scripts/run_infer.sh index b60112920be..7ffdd803a7c 100644 --- a/example/resnet50_quant/scripts/run_infer.sh +++ b/example/resnet50_quant/scripts/run_infer.sh @@ -15,8 +15,7 @@ # ============================================================================ if [ $# != 3 ] then - echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH] \ - GPU: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]" + echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]" exit 1 fi diff --git a/example/resnet50_quant/scripts/run_train.sh b/example/resnet50_quant/scripts/run_train.sh index 5d905677488..3013e6ce5b9 100644 --- a/example/resnet50_quant/scripts/run_train.sh +++ b/example/resnet50_quant/scripts/run_train.sh @@ -46,51 +46,16 @@ run_ascend() --device_target=$1 &> train.log & # dataset train folder } -run_gpu() -{ - if [ $2 -lt 1 ] && [ $2 -gt 8 ] - then - echo "error: DEVICE_NUM=$2 is not in (1-8)" - exit 1 - fi - - if [ ! -d $4 ] - then - echo "error: DATASET_PATH=$4 is not a directory" - exit 1 - fi - - BASEPATH=$(cd "`dirname $0`" || exit; pwd) - export PYTHONPATH=${BASEPATH}:$PYTHONPATH - if [ -d "../train" ]; - then - rm -rf ../train - fi - mkdir ../train - cd ../train || exit - - export CUDA_VISIBLE_DEVICES="$3" - mpirun -n $2 --allow-run-as-root \ - python ${BASEPATH}/../train.py \ - --dataset_path=$4 \ - --platform=$1 \ - --pre_trained=$5 \ - &> train.log & # dataset train folder -} - if [ $# -gt 6 ] || [ $# -lt 4 ] then echo "Usage:\n \ Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ - GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ " exit 1 fi if [ $1 = "Ascend" ] ; then run_ascend "$@" -elif [ $1 = "GPU" ] ; then - run_gpu "$@" else echo "not support platform" fi; diff --git a/example/resnet50_quant/train.py b/example/resnet50_quant/train.py index b49160e968a..5a103af2b6a 100755 --- a/example/resnet50_quant/train.py +++ b/example/resnet50_quant/train.py @@ -23,7 +23,7 @@ from mindspore.train.model import Model, ParallelMode from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor from mindspore.train.loss_scale_manager import FixedLossScaleManager from mindspore.train.serialization import load_checkpoint -from mindspore.communication.management import init, get_rank, get_group_size +from mindspore.communication.management import init import mindspore.nn as nn import mindspore.common.initializer as weight_init from models.resnet_quant import resnet50_quant @@ -57,13 +57,8 @@ if __name__ == '__main__': mirror_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) ckpt_save_dir = config.save_checkpoint_path - elif target == "GPU": - context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False) - init("nccl") - context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, - mirror_mean=True) - ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" - + else: + raise ValueError("Unsupport platform.") epoch_size = config.epoch_size net = resnet50_quant(class_num=config.class_num) net.set_train(True)