forked from mindspore-Ecosystem/mindspore
remove unused code in quant train
This commit is contained in:
parent
e21a0aad69
commit
52a90f2587
|
@ -67,7 +67,7 @@ Dataset used: imagenet
|
||||||
|
|
||||||
```
|
```
|
||||||
# training example
|
# training example
|
||||||
Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/
|
Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt
|
||||||
```
|
```
|
||||||
|
|
||||||
### Result
|
### Result
|
||||||
|
@ -104,156 +104,6 @@ Inference result will be stored in the example path, you can find result like th
|
||||||
result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
|
result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
|
||||||
```
|
```
|
||||||
|
|
||||||
# Model description
|
|
||||||
|
|
||||||
## Performance
|
|
||||||
|
|
||||||
### Training Performance
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Parameters</th>
|
|
||||||
<th>MobilenetV2</th>
|
|
||||||
<th>MobilenetV2 Quant</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Resource</td>
|
|
||||||
<td>Ascend 910 <br />
|
|
||||||
cpu:2.60GHz 56cores <br />
|
|
||||||
memory:314G</td>
|
|
||||||
<td>Ascend 910 <br />
|
|
||||||
cpu:2.60GHz 56cores <br />
|
|
||||||
memory:314G</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>uploaded Date</td>
|
|
||||||
<td>05/06/2020</td>
|
|
||||||
<td>06/12/2020</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>MindSpore Version</td>
|
|
||||||
<td>0.3.0</td>
|
|
||||||
<td>0.3.0</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Dataset</td>
|
|
||||||
<td>ImageNet</td>
|
|
||||||
<td>ImageNet</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Training Parameters</td>
|
|
||||||
<td>src/config.py</td>
|
|
||||||
<td>src/config.py</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Optimizer</td>
|
|
||||||
<td>Momentum</td>
|
|
||||||
<td>Momentum</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Loss Function</td>
|
|
||||||
<td>CrossEntropyWithLabelSmooth</td>
|
|
||||||
<td>CrossEntropyWithLabelSmooth</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Loss</td>
|
|
||||||
<td>200 epoch:1.913</td>
|
|
||||||
<td>50 epoch:1.912</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Train Accuracy</td>
|
|
||||||
<td>ACC1[77.09%] ACC5[92.57%]</td>
|
|
||||||
<td>ACC1[77.09%] ACC5[92.57%]</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Eval Accuracy</td>
|
|
||||||
<td>ACC1[77.09%] ACC5[92.57%]</td>
|
|
||||||
<td>ACC1[77.09%] ACC5[92.57%]</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Total time</td>
|
|
||||||
<td>48h</td>
|
|
||||||
<td>12h</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Checkpoint</td>
|
|
||||||
<td>/</td>
|
|
||||||
<td>mobilenetv2.ckpt</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
#### Inference Performance
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Parameters</th>
|
|
||||||
<th>Ascend 910</th>
|
|
||||||
<th>Ascend 310</th>
|
|
||||||
<th>Nvidia V100</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>uploaded Date</td>
|
|
||||||
<td>06/12/2020</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>MindSpore Version</td>
|
|
||||||
<td>0.3.0</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Dataset</td>
|
|
||||||
<td>ImageNet, 1.2W</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>batch_size</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>outputs</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Accuracy</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Speed</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Total time</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Model for inference</td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
<td></td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
# ModelZoo Homepage
|
# ModelZoo Homepage
|
||||||
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)
|
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)
|
|
@ -35,20 +35,19 @@ fi
|
||||||
|
|
||||||
# set environment
|
# set environment
|
||||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
||||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
|
||||||
export DEVICE_ID=0
|
export DEVICE_ID=0
|
||||||
export RANK_ID=0
|
export RANK_ID=0
|
||||||
export RANK_SIZE=1
|
export RANK_SIZE=1
|
||||||
if [ -d "eval" ];
|
if [ -d "../eval" ];
|
||||||
then
|
then
|
||||||
rm -rf ../eval
|
rm -rf ../eval
|
||||||
fi
|
fi
|
||||||
mkdir ../eval
|
mkdir ../eval
|
||||||
cd ../eval || exit
|
cd ../eval || exit
|
||||||
|
|
||||||
# luanch
|
# launch
|
||||||
python ${BASEPATH}/../eval.py \
|
python ${BASEPATH}/../eval.py \
|
||||||
--platform=$1 \
|
--platform=$1 \
|
||||||
--dataset_path=$2 \
|
--dataset_path=$2 \
|
||||||
--checkpoint_path=$3 \
|
--checkpoint_path=$3 \
|
||||||
&> ../infer.log & # dataset val folder path
|
&> infer.log & # dataset val folder path
|
||||||
|
|
|
@ -30,7 +30,7 @@ run_ascend()
|
||||||
|
|
||||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
||||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
||||||
if [ -d "train" ];
|
if [ -d "../train" ];
|
||||||
then
|
then
|
||||||
rm -rf ../train
|
rm -rf ../train
|
||||||
fi
|
fi
|
||||||
|
@ -43,39 +43,7 @@ run_ascend()
|
||||||
--training_script=${BASEPATH}/../train.py \
|
--training_script=${BASEPATH}/../train.py \
|
||||||
--dataset_path=$5 \
|
--dataset_path=$5 \
|
||||||
--pre_trained=$6 \
|
--pre_trained=$6 \
|
||||||
--platform=$1 &> ../train.log & # dataset train folder
|
--platform=$1 &> train.log & # dataset train folder
|
||||||
}
|
|
||||||
|
|
||||||
run_gpu()
|
|
||||||
{
|
|
||||||
if [ $2 -lt 1 ] && [ $2 -gt 8 ]
|
|
||||||
then
|
|
||||||
echo "error: DEVICE_NUM=$2 is not in (1-8)"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -d $4 ]
|
|
||||||
then
|
|
||||||
echo "error: DATASET_PATH=$4 is not a directory"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
|
||||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
|
||||||
if [ -d "train" ];
|
|
||||||
then
|
|
||||||
rm -rf ../train
|
|
||||||
fi
|
|
||||||
mkdir ../train
|
|
||||||
cd ../train || exit
|
|
||||||
|
|
||||||
export CUDA_VISIBLE_DEVICES="$3"
|
|
||||||
mpirun -n $2 --allow-run-as-root \
|
|
||||||
python ${BASEPATH}/../train.py \
|
|
||||||
--dataset_path=$4 \
|
|
||||||
--platform=$1 \
|
|
||||||
--pre_trained=$5 \
|
|
||||||
&> ../train.log & # dataset train folder
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ $# -gt 6 ] || [ $# -lt 4 ]
|
if [ $# -gt 6 ] || [ $# -lt 4 ]
|
||||||
|
|
|
@ -35,21 +35,3 @@ config_ascend = ed({
|
||||||
"keep_checkpoint_max": 200,
|
"keep_checkpoint_max": 200,
|
||||||
"save_checkpoint_path": "./checkpoint",
|
"save_checkpoint_path": "./checkpoint",
|
||||||
})
|
})
|
||||||
|
|
||||||
config_gpu = ed({
|
|
||||||
"num_classes": 1000,
|
|
||||||
"image_height": 224,
|
|
||||||
"image_width": 224,
|
|
||||||
"batch_size": 64,
|
|
||||||
"epoch_size": 200,
|
|
||||||
"warmup_epochs": 4,
|
|
||||||
"lr": 0.5,
|
|
||||||
"momentum": 0.9,
|
|
||||||
"weight_decay": 4e-5,
|
|
||||||
"label_smooth": 0.1,
|
|
||||||
"loss_scale": 1024,
|
|
||||||
"save_checkpoint": True,
|
|
||||||
"save_checkpoint_epochs": 1,
|
|
||||||
"keep_checkpoint_max": 200,
|
|
||||||
"save_checkpoint_path": "./checkpoint",
|
|
||||||
})
|
|
||||||
|
|
|
@ -41,17 +41,10 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch
|
||||||
if rank_size == 1:
|
if rank_size == 1:
|
||||||
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
|
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
|
||||||
else:
|
else:
|
||||||
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
|
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False,
|
||||||
num_shards=rank_size, shard_id=rank_id)
|
num_shards=rank_size, shard_id=rank_id)
|
||||||
else:
|
else:
|
||||||
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
|
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
|
||||||
elif platform == "GPU":
|
|
||||||
if do_train:
|
|
||||||
from mindspore.communication.management import get_rank, get_group_size
|
|
||||||
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
|
|
||||||
num_shards=get_group_size(), shard_id=get_rank())
|
|
||||||
else:
|
|
||||||
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupport platform.")
|
raise ValueError("Unsupport platform.")
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ import sys
|
||||||
import json
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
|
import platform
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,7 +81,8 @@ def main():
|
||||||
device_ips[device_id] = device_ip
|
device_ips[device_id] = device_ip
|
||||||
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
|
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
|
||||||
hccn_table = {}
|
hccn_table = {}
|
||||||
hccn_table['board_id'] = '0x0020'
|
arch = platform.processor()
|
||||||
|
hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch]
|
||||||
hccn_table['chip_info'] = '910'
|
hccn_table['chip_info'] = '910'
|
||||||
hccn_table['deploy_mode'] = 'lab'
|
hccn_table['deploy_mode'] = 'lab'
|
||||||
hccn_table['group_count'] = '1'
|
hccn_table['group_count'] = '1'
|
||||||
|
|
|
@ -21,7 +21,6 @@ import numpy as np
|
||||||
from mindspore import context
|
from mindspore import context
|
||||||
from mindspore import Tensor
|
from mindspore import Tensor
|
||||||
from mindspore import nn
|
from mindspore import nn
|
||||||
from mindspore.parallel._auto_parallel_context import auto_parallel_context
|
|
||||||
from mindspore.nn.optim.momentum import Momentum
|
from mindspore.nn.optim.momentum import Momentum
|
||||||
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
||||||
from mindspore.nn.loss.loss import _Loss
|
from mindspore.nn.loss.loss import _Loss
|
||||||
|
@ -57,9 +56,6 @@ if args_opt.platform == "Ascend":
|
||||||
context.set_context(mode=context.GRAPH_MODE,
|
context.set_context(mode=context.GRAPH_MODE,
|
||||||
device_target="Ascend",
|
device_target="Ascend",
|
||||||
device_id=device_id, save_graphs=False)
|
device_id=device_id, save_graphs=False)
|
||||||
elif args_opt.platform == "GPU":
|
|
||||||
context.set_context(mode=context.GRAPH_MODE,
|
|
||||||
device_target="GPU", save_graphs=False)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupport platform.")
|
raise ValueError("Unsupport platform.")
|
||||||
|
|
||||||
|
@ -191,7 +187,6 @@ if __name__ == '__main__':
|
||||||
if run_distribute:
|
if run_distribute:
|
||||||
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
|
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
|
||||||
parameter_broadcast=True, mirror_mean=True)
|
parameter_broadcast=True, mirror_mean=True)
|
||||||
auto_parallel_context().set_all_reduce_fusion_split_indices([140])
|
|
||||||
init()
|
init()
|
||||||
|
|
||||||
epoch_size = config_ascend.epoch_size
|
epoch_size = config_ascend.epoch_size
|
||||||
|
|
|
@ -15,8 +15,7 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
if [ $# != 3 ]
|
if [ $# != 3 ]
|
||||||
then
|
then
|
||||||
echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH] \
|
echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
|
||||||
GPU: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -46,51 +46,16 @@ run_ascend()
|
||||||
--device_target=$1 &> train.log & # dataset train folder
|
--device_target=$1 &> train.log & # dataset train folder
|
||||||
}
|
}
|
||||||
|
|
||||||
run_gpu()
|
|
||||||
{
|
|
||||||
if [ $2 -lt 1 ] && [ $2 -gt 8 ]
|
|
||||||
then
|
|
||||||
echo "error: DEVICE_NUM=$2 is not in (1-8)"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -d $4 ]
|
|
||||||
then
|
|
||||||
echo "error: DATASET_PATH=$4 is not a directory"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
|
|
||||||
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
|
|
||||||
if [ -d "../train" ];
|
|
||||||
then
|
|
||||||
rm -rf ../train
|
|
||||||
fi
|
|
||||||
mkdir ../train
|
|
||||||
cd ../train || exit
|
|
||||||
|
|
||||||
export CUDA_VISIBLE_DEVICES="$3"
|
|
||||||
mpirun -n $2 --allow-run-as-root \
|
|
||||||
python ${BASEPATH}/../train.py \
|
|
||||||
--dataset_path=$4 \
|
|
||||||
--platform=$1 \
|
|
||||||
--pre_trained=$5 \
|
|
||||||
&> train.log & # dataset train folder
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -gt 6 ] || [ $# -lt 4 ]
|
if [ $# -gt 6 ] || [ $# -lt 4 ]
|
||||||
then
|
then
|
||||||
echo "Usage:\n \
|
echo "Usage:\n \
|
||||||
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
|
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
|
||||||
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
|
|
||||||
"
|
"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $1 = "Ascend" ] ; then
|
if [ $1 = "Ascend" ] ; then
|
||||||
run_ascend "$@"
|
run_ascend "$@"
|
||||||
elif [ $1 = "GPU" ] ; then
|
|
||||||
run_gpu "$@"
|
|
||||||
else
|
else
|
||||||
echo "not support platform"
|
echo "not support platform"
|
||||||
fi;
|
fi;
|
||||||
|
|
|
@ -23,7 +23,7 @@ from mindspore.train.model import Model, ParallelMode
|
||||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||||
from mindspore.train.loss_scale_manager import FixedLossScaleManager
|
from mindspore.train.loss_scale_manager import FixedLossScaleManager
|
||||||
from mindspore.train.serialization import load_checkpoint
|
from mindspore.train.serialization import load_checkpoint
|
||||||
from mindspore.communication.management import init, get_rank, get_group_size
|
from mindspore.communication.management import init
|
||||||
import mindspore.nn as nn
|
import mindspore.nn as nn
|
||||||
import mindspore.common.initializer as weight_init
|
import mindspore.common.initializer as weight_init
|
||||||
from models.resnet_quant import resnet50_quant
|
from models.resnet_quant import resnet50_quant
|
||||||
|
@ -57,13 +57,8 @@ if __name__ == '__main__':
|
||||||
mirror_mean=True)
|
mirror_mean=True)
|
||||||
auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
|
auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
|
||||||
ckpt_save_dir = config.save_checkpoint_path
|
ckpt_save_dir = config.save_checkpoint_path
|
||||||
elif target == "GPU":
|
else:
|
||||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
|
raise ValueError("Unsupport platform.")
|
||||||
init("nccl")
|
|
||||||
context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
|
|
||||||
mirror_mean=True)
|
|
||||||
ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
|
|
||||||
|
|
||||||
epoch_size = config.epoch_size
|
epoch_size = config.epoch_size
|
||||||
net = resnet50_quant(class_num=config.class_num)
|
net = resnet50_quant(class_num=config.class_num)
|
||||||
net.set_train(True)
|
net.set_train(True)
|
||||||
|
|
Loading…
Reference in New Issue