!9367 remove redundant parameters of resnext50 and inceptionv3

From: @zhouyaqiang0
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2020-12-03 17:22:02 +08:00 committed by Gitee
commit 921d344594
8 changed files with 33 additions and 52 deletions

View File

@ -90,8 +90,6 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
```python
Major parameters in train.py and config.py are:
'random_seed' # fix random seed
'rank' # local rank of distributed
'group_size' # world size of distributed
'work_nums' # number of workers to read the data
'decay_method' # learning rate scheduler mode
"loss_scale" # loss scale

View File

@ -20,8 +20,6 @@ from easydict import EasyDict as edict
config_gpu = edict({
'random_seed': 1,
'rank': 0,
'group_size': 1,
'work_nums': 8,
'decay_method': 'cosine',
"loss_scale": 1,
@ -47,8 +45,6 @@ config_gpu = edict({
config_ascend = edict({
'random_seed': 1,
'rank': 0,
'group_size': 1,
'work_nums': 8,
'decay_method': 'cosine',
"loss_scale": 1024,

View File

@ -4,9 +4,9 @@
- [Model Architecture](#model-architecture)
- [Dataset](#dataset)
- [Features](#features)
- [Mixed Precision](#mixed-precision)
- [Mixed Precision](#mixed-precision)
- [Environment Requirements](#environment-requirements)
- [Quick Start](#quick-start)
- [Quick Start](#quick-start)
- [Script Description](#script-description)
- [Script and Sample Code](#script-and-sample-code)
- [Script Parameters](#script-parameters)
@ -32,35 +32,33 @@ The overall network architecture of ResNeXt is show below:
[Link](https://arxiv.org/abs/1611.05431)
# [Dataset](#contents)
Dataset used: [imagenet](http://www.image-net.org/)
- Dataset size: ~125G, 1.2W colorful images in 1000 classes
- Train: 120G, 1.2W images
- Test: 5G, 50000 images
- Data format: RGB images.
- Note: Data will be processed in src/dataset.py
- Train: 120G, 1.2W images
- Test: 5G, 50000 images
- Data format: RGB images
- Note: Data will be processed in src/dataset.py
# [Features](#contents)
## [Mixed Precision](#contents)
The [mixed precision](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware.
For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching reduce precision.
# [Environment Requirements](#contents)
- HardwareAscend/GPU
- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
- Framework
- [MindSpore](https://www.mindspore.cn/install/en)
- [MindSpore](https://www.mindspore.cn/install/en)
- For more information, please check the resources below
- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
# [Script description](#contents)
@ -68,9 +66,9 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
```python
.
└─resnext50
└─resnext50
├─README.md
├─scripts
├─scripts
├─run_standalone_train.sh # launch standalone training for ascend(1p)
├─run_distribute_train.sh # launch distributed training for ascend(8p)
├─run_standalone_train_for_gpu.sh # launch standalone training for gpu(1p)
@ -100,14 +98,14 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
├──train.py # train net
├──export.py # export mindir script
├──mindspore_hub_conf.py # mindspore hub interface
```
## [Script Parameters](#contents)
Parameters for both training and evaluating can be set in config.py.
```
```config
"image_height": '224,224' # image size
"num_classes": 1000, # dataset class number
"per_batch_size": 128, # batch size of input tensor
@ -118,7 +116,6 @@ Parameters for both training and evaluating can be set in config.py.
"eta_min": 0, # eta_min in cosine_annealing scheduler
"T_max": 150, # T-max in cosine_annealing scheduler
"max_epoch": 150, # max epoch num to train the model
"backbone": 'resnext50', # backbone metwork
"warmup_epochs" : 1, # warmup epoch
"weight_decay": 0.0001, # weight decay
"momentum": 0.9, # momentum
@ -135,18 +132,18 @@ Parameters for both training and evaluating can be set in config.py.
## [Training Process](#contents)
#### Usage
### Usage
You can start training by python script:
```
```script
python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed 0
```
or shell stript:
```
Ascend:
```script
Ascend:
# distribute training example(8p)
sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
# standalone training
@ -180,16 +177,17 @@ You can find checkpoint file together with result in log.
You can start training by python script:
```
```script
python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext.ckpt
```
or shell stript:
```
```script
# Evaluation
sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
```
PLATFORM is Ascend or GPU, default is Ascend.
#### Launch
@ -202,8 +200,8 @@ sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext50_100.ckp
#### Result
Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log.
```
```log
acc=78.16%(TOP1)
acc=93.88%(TOP5)
```
@ -212,7 +210,7 @@ acc=93.88%(TOP5)
Change the export mode and export file in `src/config.py`, and run `export.py`.
```
```script
python export.py --platform PLATFORM --pretrained CKPT_PATH
```
@ -242,17 +240,16 @@ python export.py --platform PLATFORM --pretrained CKPT_PATH
| -------------------------- | ----------------------------- | ------------------------- | -------------------- |
| Resource | Ascend 910 | NV SMX2 V100-32G | Ascend 310 |
| uploaded Date | 06/30/2020 | 07/23/2020 | 07/23/2020 |
| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 |
| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 |
| Dataset | ImageNet, 1.2W | ImageNet, 1.2W | ImageNet, 1.2W |
| batch_size | 1 | 1 | 1 |
| outputs | probability | probability | probability |
| Accuracy | acc=78.16%(TOP1) | acc=78.05%(TOP1) | |
# [Description of Random Situation](#contents)
In dataset.py, we set the seed inside “create_dataset" function. We also use random seed in train.py.
# [ModelZoo Homepage](#contents)
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).

View File

@ -74,7 +74,6 @@ def parse_args(cloud_args=None):
args = merge_args(args, cloud_args)
args.image_size = config.image_size
args.num_classes = config.num_classes
args.backbone = config.backbone
args.rank = config.rank
args.group_size = config.group_size
@ -201,9 +200,7 @@ def test(cloud_args=None):
max_epoch=1, rank=args.rank, group_size=args.group_size,
mode='eval')
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone))
network = get_network(num_classes=args.num_classes, platform=args.platform)
load_pretrain_model(model, network, args)

View File

@ -33,7 +33,6 @@ def parse_args():
args, _ = parser.parse_known_args()
args.image_size = config.image_size
args.num_classes = config.num_classes
args.backbone = config.backbone
args.image_size = list(map(int, config.image_size.split(',')))
args.image_height = args.image_size[0]
@ -46,7 +45,7 @@ if __name__ == '__main__':
args_export = parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args_export.platform)
net = get_network(args_export.backbone, num_classes=args_export.num_classes, platform=args_export.platform)
net = get_network(num_classes=args_export.num_classes, platform=args_export.platform)
param_dict = load_checkpoint(args_export.pretrained)
load_param_into_net(net, param_dict)

View File

@ -26,7 +26,6 @@ config = ed({
"eta_min": 0,
"T_max": 150,
"max_epoch": 150,
"backbone": 'resnext50',
"warmup_epochs": 1,
"weight_decay": 0.0001,

View File

@ -94,7 +94,5 @@ class Resnet(ImageClassificationNetwork):
def get_network(backbone_name, **kwargs):
if backbone_name in ['resnext50']:
return Resnet(backbone_name, **kwargs)
return None
def get_network(**kwargs):
return Resnet('resnext50', **kwargs)

View File

@ -131,7 +131,6 @@ def parse_args(cloud_args=None):
args.eta_min = config.eta_min
args.T_max = config.T_max
args.max_epoch = config.max_epoch
args.backbone = config.backbone
args.warmup_epochs = config.warmup_epochs
args.weight_decay = config.weight_decay
args.momentum = config.momentum
@ -213,9 +212,7 @@ def train(cloud_args=None):
# network
args.logger.important_info('start create network')
# get network and init
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone))
network = get_network(num_classes=args.num_classes, platform=args.platform)
load_pretrain_model(args.pretrained, network, args)