forked from mindspore-Ecosystem/mindspore
!9367 remove redundant parameters of resnext50 and inceptionv3
From: @zhouyaqiang0 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
921d344594
|
@ -90,8 +90,6 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
|
|||
```python
|
||||
Major parameters in train.py and config.py are:
|
||||
'random_seed' # fix random seed
|
||||
'rank' # local rank of distributed
|
||||
'group_size' # world size of distributed
|
||||
'work_nums' # number of workers to read the data
|
||||
'decay_method' # learning rate scheduler mode
|
||||
"loss_scale" # loss scale
|
||||
|
|
|
@ -20,8 +20,6 @@ from easydict import EasyDict as edict
|
|||
|
||||
config_gpu = edict({
|
||||
'random_seed': 1,
|
||||
'rank': 0,
|
||||
'group_size': 1,
|
||||
'work_nums': 8,
|
||||
'decay_method': 'cosine',
|
||||
"loss_scale": 1,
|
||||
|
@ -47,8 +45,6 @@ config_gpu = edict({
|
|||
|
||||
config_ascend = edict({
|
||||
'random_seed': 1,
|
||||
'rank': 0,
|
||||
'group_size': 1,
|
||||
'work_nums': 8,
|
||||
'decay_method': 'cosine',
|
||||
"loss_scale": 1024,
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
- [Model Architecture](#model-architecture)
|
||||
- [Dataset](#dataset)
|
||||
- [Features](#features)
|
||||
- [Mixed Precision](#mixed-precision)
|
||||
- [Mixed Precision](#mixed-precision)
|
||||
- [Environment Requirements](#environment-requirements)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Script Description](#script-description)
|
||||
- [Script and Sample Code](#script-and-sample-code)
|
||||
- [Script Parameters](#script-parameters)
|
||||
|
@ -32,35 +32,33 @@ The overall network architecture of ResNeXt is show below:
|
|||
|
||||
[Link](https://arxiv.org/abs/1611.05431)
|
||||
|
||||
|
||||
# [Dataset](#contents)
|
||||
|
||||
Dataset used: [imagenet](http://www.image-net.org/)
|
||||
|
||||
- Dataset size: ~125G, 1.2W colorful images in 1000 classes
|
||||
- Train: 120G, 1.2W images
|
||||
- Test: 5G, 50000 images
|
||||
- Data format: RGB images.
|
||||
- Note: Data will be processed in src/dataset.py
|
||||
|
||||
- Train: 120G, 1.2W images
|
||||
- Test: 5G, 50000 images
|
||||
- Data format: RGB images
|
||||
- Note: Data will be processed in src/dataset.py
|
||||
|
||||
# [Features](#contents)
|
||||
|
||||
## [Mixed Precision](#contents)
|
||||
|
||||
The [mixed precision](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware.
|
||||
|
||||
|
||||
For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching ‘reduce precision’.
|
||||
|
||||
# [Environment Requirements](#contents)
|
||||
|
||||
- Hardware(Ascend/GPU)
|
||||
- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
|
||||
- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
|
||||
- Framework
|
||||
- [MindSpore](https://www.mindspore.cn/install/en)
|
||||
- [MindSpore](https://www.mindspore.cn/install/en)
|
||||
- For more information, please check the resources below:
|
||||
- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
|
||||
- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
|
||||
- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
|
||||
- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
|
||||
|
||||
# [Script description](#contents)
|
||||
|
||||
|
@ -68,9 +66,9 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
|
|||
|
||||
```python
|
||||
.
|
||||
└─resnext50
|
||||
└─resnext50
|
||||
├─README.md
|
||||
├─scripts
|
||||
├─scripts
|
||||
├─run_standalone_train.sh # launch standalone training for ascend(1p)
|
||||
├─run_distribute_train.sh # launch distributed training for ascend(8p)
|
||||
├─run_standalone_train_for_gpu.sh # launch standalone training for gpu(1p)
|
||||
|
@ -100,14 +98,14 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
|
|||
├──train.py # train net
|
||||
├──export.py # export mindir script
|
||||
├──mindspore_hub_conf.py # mindspore hub interface
|
||||
|
||||
|
||||
```
|
||||
|
||||
## [Script Parameters](#contents)
|
||||
|
||||
Parameters for both training and evaluating can be set in config.py.
|
||||
|
||||
```
|
||||
```config
|
||||
"image_height": '224,224' # image size
|
||||
"num_classes": 1000, # dataset class number
|
||||
"per_batch_size": 128, # batch size of input tensor
|
||||
|
@ -118,7 +116,6 @@ Parameters for both training and evaluating can be set in config.py.
|
|||
"eta_min": 0, # eta_min in cosine_annealing scheduler
|
||||
"T_max": 150, # T-max in cosine_annealing scheduler
|
||||
"max_epoch": 150, # max epoch num to train the model
|
||||
"backbone": 'resnext50', # backbone metwork
|
||||
"warmup_epochs" : 1, # warmup epoch
|
||||
"weight_decay": 0.0001, # weight decay
|
||||
"momentum": 0.9, # momentum
|
||||
|
@ -135,18 +132,18 @@ Parameters for both training and evaluating can be set in config.py.
|
|||
|
||||
## [Training Process](#contents)
|
||||
|
||||
#### Usage
|
||||
### Usage
|
||||
|
||||
You can start training by python script:
|
||||
|
||||
```
|
||||
```script
|
||||
python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed 0
|
||||
```
|
||||
|
||||
or shell stript:
|
||||
|
||||
```
|
||||
Ascend:
|
||||
```script
|
||||
Ascend:
|
||||
# distribute training example(8p)
|
||||
sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
|
||||
# standalone training
|
||||
|
@ -180,16 +177,17 @@ You can find checkpoint file together with result in log.
|
|||
|
||||
You can start training by python script:
|
||||
|
||||
```
|
||||
```script
|
||||
python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext.ckpt
|
||||
```
|
||||
|
||||
or shell stript:
|
||||
|
||||
```
|
||||
```script
|
||||
# Evaluation
|
||||
sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
|
||||
```
|
||||
|
||||
PLATFORM is Ascend or GPU, default is Ascend.
|
||||
|
||||
#### Launch
|
||||
|
@ -202,8 +200,8 @@ sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext50_100.ckp
|
|||
#### Result
|
||||
|
||||
Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log.
|
||||
|
||||
```
|
||||
|
||||
```log
|
||||
acc=78.16%(TOP1)
|
||||
acc=93.88%(TOP5)
|
||||
```
|
||||
|
@ -212,7 +210,7 @@ acc=93.88%(TOP5)
|
|||
|
||||
Change the export mode and export file in `src/config.py`, and run `export.py`.
|
||||
|
||||
```
|
||||
```script
|
||||
python export.py --platform PLATFORM --pretrained CKPT_PATH
|
||||
```
|
||||
|
||||
|
@ -242,17 +240,16 @@ python export.py --platform PLATFORM --pretrained CKPT_PATH
|
|||
| -------------------------- | ----------------------------- | ------------------------- | -------------------- |
|
||||
| Resource | Ascend 910 | NV SMX2 V100-32G | Ascend 310 |
|
||||
| uploaded Date | 06/30/2020 | 07/23/2020 | 07/23/2020 |
|
||||
| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 |
|
||||
| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 |
|
||||
| Dataset | ImageNet, 1.2W | ImageNet, 1.2W | ImageNet, 1.2W |
|
||||
| batch_size | 1 | 1 | 1 |
|
||||
| outputs | probability | probability | probability |
|
||||
| Accuracy | acc=78.16%(TOP1) | acc=78.05%(TOP1) | |
|
||||
|
||||
|
||||
# [Description of Random Situation](#contents)
|
||||
|
||||
In dataset.py, we set the seed inside “create_dataset" function. We also use random seed in train.py.
|
||||
|
||||
# [ModelZoo Homepage](#contents)
|
||||
|
||||
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
|
||||
|
||||
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
|
||||
|
|
|
@ -74,7 +74,6 @@ def parse_args(cloud_args=None):
|
|||
args = merge_args(args, cloud_args)
|
||||
args.image_size = config.image_size
|
||||
args.num_classes = config.num_classes
|
||||
args.backbone = config.backbone
|
||||
args.rank = config.rank
|
||||
args.group_size = config.group_size
|
||||
|
||||
|
@ -201,9 +200,7 @@ def test(cloud_args=None):
|
|||
max_epoch=1, rank=args.rank, group_size=args.group_size,
|
||||
mode='eval')
|
||||
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1)
|
||||
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
|
||||
if network is None:
|
||||
raise NotImplementedError('not implement {}'.format(args.backbone))
|
||||
network = get_network(num_classes=args.num_classes, platform=args.platform)
|
||||
|
||||
load_pretrain_model(model, network, args)
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ def parse_args():
|
|||
args, _ = parser.parse_known_args()
|
||||
args.image_size = config.image_size
|
||||
args.num_classes = config.num_classes
|
||||
args.backbone = config.backbone
|
||||
|
||||
args.image_size = list(map(int, config.image_size.split(',')))
|
||||
args.image_height = args.image_size[0]
|
||||
|
@ -46,7 +45,7 @@ if __name__ == '__main__':
|
|||
args_export = parse_args()
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=args_export.platform)
|
||||
|
||||
net = get_network(args_export.backbone, num_classes=args_export.num_classes, platform=args_export.platform)
|
||||
net = get_network(num_classes=args_export.num_classes, platform=args_export.platform)
|
||||
|
||||
param_dict = load_checkpoint(args_export.pretrained)
|
||||
load_param_into_net(net, param_dict)
|
||||
|
|
|
@ -26,7 +26,6 @@ config = ed({
|
|||
"eta_min": 0,
|
||||
"T_max": 150,
|
||||
"max_epoch": 150,
|
||||
"backbone": 'resnext50',
|
||||
"warmup_epochs": 1,
|
||||
|
||||
"weight_decay": 0.0001,
|
||||
|
|
|
@ -94,7 +94,5 @@ class Resnet(ImageClassificationNetwork):
|
|||
|
||||
|
||||
|
||||
def get_network(backbone_name, **kwargs):
|
||||
if backbone_name in ['resnext50']:
|
||||
return Resnet(backbone_name, **kwargs)
|
||||
return None
|
||||
def get_network(**kwargs):
|
||||
return Resnet('resnext50', **kwargs)
|
||||
|
|
|
@ -131,7 +131,6 @@ def parse_args(cloud_args=None):
|
|||
args.eta_min = config.eta_min
|
||||
args.T_max = config.T_max
|
||||
args.max_epoch = config.max_epoch
|
||||
args.backbone = config.backbone
|
||||
args.warmup_epochs = config.warmup_epochs
|
||||
args.weight_decay = config.weight_decay
|
||||
args.momentum = config.momentum
|
||||
|
@ -213,9 +212,7 @@ def train(cloud_args=None):
|
|||
# network
|
||||
args.logger.important_info('start create network')
|
||||
# get network and init
|
||||
network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform)
|
||||
if network is None:
|
||||
raise NotImplementedError('not implement {}'.format(args.backbone))
|
||||
network = get_network(num_classes=args.num_classes, platform=args.platform)
|
||||
|
||||
load_pretrain_model(args.pretrained, network, args)
|
||||
|
||||
|
|
Loading…
Reference in New Issue