!22634 update readme

Merge pull request !22634 from hjxcoder/sr_ea_branch5
This commit is contained in:
i-robot 2021-09-06 04:11:21 +00:00 committed by Gitee
commit 12be525ef1
10 changed files with 153 additions and 399 deletions

View File

@ -109,6 +109,33 @@ The benchmark datasets can be downloaded as follows:
[DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar).
After downloaded the correspond dataset to the target place, You can configure and use the dataset separately for train and test.
Dataset configuration parameters in esr_ea/esr_ea.yml:
```yaml
nas:
dataset:
type: DIV2K
train:
root_HR: /cache/datasets/DIV2K/div2k_train/hr # Directory where the HR image is located
root_LR: /cache/datasets/DIV2K/div2k_train/lr # Directory where the LR image is located
upscale: 2 # Up scale
crop: 64 # crop size of lr image
hflip: true # flip image horizontally
vflip: true # flip image vertically
rot90: true # flip image diagonally
shuffle: true # shuffle
batch_size: 16 # batch size
fixed_size: true
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_valid/lr
upscale: 2
fixed_size: true
crop: 64
```
## Requirements
### Hardware (Ascend)
@ -135,12 +162,12 @@ esr_ea
├── image
│ ├── esr_arch.png # the illustration of esr_ea network
│ └── esr_block.png #
├── readme.md # Readme
├── README.md # Readme
├── scripts
│ ├── run_distributed.sh # pre-training script for all tasks
│ ├── run_standalone.sh # shell script for standalone train on ascend
│ ├── run_distributed.sh # shell script for distributed train on ascend
└── src
├── esr_ea.yml # options/hyper-parameters of esr_ea
└── esr_ea_distributed.yml # options/hyper-parameters of esr_ea
└── esr_ea.yml # options/hyper-parameters of esr_ea
```
@ -152,14 +179,29 @@ esr_ea
### For training
- Standalone Ascend Training:
```bash
python3 train.py
sh scripts/run_standalone.sh
```
- Distributed Ascend Training:
```bash
sh scripts/run_distributed.sh [RANK_TABLE_FILE]
```
For distributed training, a hccl configuration file with JSON format needs to be created in advance.
Please follow the instructions in the link below:
<https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
`$RANK_TABLE_FILE` is needed when you are running a distribute task on ascend.
> Or one can run following script for all tasks.
```bash
sh scripts/run_distributed.sh [RANK_TABLE_FILE]
python3 train.py
```
## Evaluation

View File

@ -1 +1 @@
noah-vega
noah-vega==1.6.1

View File

@ -33,5 +33,5 @@ fi
RANK_TABLE_FILE=$(realpath $1)
export RANK_TABLE_FILE
python3 -m vega.tools.run_pipeline ../src/esr_ea_distributed.yml -b m -d NPU \
python3 -m vega.tools.run_pipeline ../src/esr_ea.yml -b m -d NPU \
> train.log 2>&1 &

View File

@ -1,182 +0,0 @@
general:
backend: mindspore
parallel_search: True
parallel_fully_train: True
pipeline: [nas, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
nas:
pipe_step:
type: SearchPipeStep
dataset:
type: DIV2K
train:
root_HR: /cache/datasets/DIV2K/div2k_train/hr
root_LR: /cache/datasets/DIV2K/div2k_train/lr
upscale: 2
crop: 64
hflip: true
vflip: true
rot90: true
shuffle: true
batch_size: 16
fixed_size: true
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_valid/lr
upscale: 2
fixed_size: true
crop: 64
search_space:
type: SearchSpace
modules: ['esrbody']
esrbody:
type: ESRN
block_type: [S,G,C]
conv_num: [4,6,8]
growth_rate: [8,16,24,32]
type_prob: [1,1,1]
conv_prob: [1,1,1]
growth_prob: [1,1,1,1]
G0: 32
scale: 2
search_algorithm:
type: ESRSearch
codec: ESRCodec
policy:
num_generation: 20
num_individual: 8
num_elitism: 4
mutation_rate: 0.05
range:
node_num: 20
min_active: 16
max_params: 325000
min_params: 315000
trainer:
type: Trainer
callbacks: ESRTrainerCallback
epochs: 500
optimizer:
type: Adam
params:
lr: 0.0001 # 0.001 for mindspore
lr_scheduler:
type: MultiStepLR
params:
milestones: [100,200]
gamma: 0.5
loss:
type: L1Loss
metric:
type: PSNR
params:
scale: 2
max_rgb: 255
scale: 2
cuda: True
seed: 10
fully_train:
pipe_step:
type: TrainPipeStep
models_folder: "{local_base_path}/output/nas/"
dataset:
ref: nas.dataset
trainer:
type: Trainer
callbacks: ESRTrainerCallback
node_num: 20
epochs: 15000
optimizer:
type: Adam
params:
lr: 0.0001
lr_scheduler:
type: MultiStepLR
params:
milestones: [8000,12000,13500,14500]
gamma: 0.5
loss:
type: L1Loss
metric:
type: PSNR
params:
scale: 2
max_rgb: 255
scale: 2
seed: 10
range:
node_num: 20
evaluator:
type: Evaluator
host_evaluator:
type: HostEvaluator
metric:
type: PSNR
benchmark_DIV2K:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
type: DIV2K
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_train/lr
upscale: 2
evaluator:
type: Evaluator
host_evaluator:
type: HostEvaluator
metric:
type: PSNR
params:
scale: 2
max_rgb: 255
benchmark_Set5:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: Set5
test:
root_HR: /cache/datasets/DIV2K/Set5/hr
root_LR: /cache/datasets/DIV2K/Set5/lr
evaluator:
ref: benchmark_DIV2K.evaluator
benchmark_Set14:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: Set14
test:
root_HR: /cache/datasets/DIV2K/Set14/hr
root_LR: /cache/datasets/DIV2K/Set14/lr
evaluator:
ref: benchmark_DIV2K.evaluator
benchmark_BSDS100:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: BSDS100
test:
root_HR: /cache/datasets/DIV2K/BSDS100/hr
root_LR: /cache/datasets/DIV2K/BSDS100/lr
evaluator:
ref: benchmark_DIV2K.evaluator

View File

@ -1,7 +1,7 @@
general:
backend: mindspore
parallel_search: False
parallel_fully_train: False
parallel_search: True
parallel_fully_train: True
pipeline: [nas, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]

View File

@ -3,7 +3,8 @@
- [Contents](#contents)
- [Algorithm Introduction](#algorithm-introduction)
- [Algorithm Principle](#algorithm-principle)
- [Search Space and Search Policy](#search-space-and-search-policy)
- [Search Space and Search Policy](#search-space-and-search-policy)
- [Configuring](#configuring)
- [Dataset](#dataset)
- [Requirements](#requirements)
- [Hardware (Ascend)](#hardware-ascend)
@ -44,7 +45,7 @@ CCRN-NAS is a network architecture dedicated to lightweight networks. The CCRN-N
Pipeline provides a sample for CCRN-NAS architecture search. It searches for the combination of the three modules to optimize the network architecture.
## Search Space and Search Policy
### Search Space and Search Policy
The search space of the modified SRResNet includes the number of blocks and channels. We provide two search methods: random search (RS) and brute force (BF). In the two search methods, users need to define the range of the block number and the channel number for each convolution layer. RS generates model randomly from these range until the number of models reaches max_count. On the other size, BF will train all selected models.
@ -56,12 +57,85 @@ The search space of CCRN-NAS is a combination of three types of blocks:
Change the kernel size of a random residual block from 2 to 3 or from 3 to 2.
A residual block is added to the random number of layers, and the kernel size is randomly generated in 2 and 3.
### Configuring
For details, see the configuration file sr_ea/sr_ea.yml in the sample code.
```yaml
pipeline: [random, mutate]
random:
pipe_step:
type: SearchPipeStep
search_space: # Set the network structure search parameters.
type: SearchSpace
modules: ['custom']
custom:
type: MtMSR
in_channel: 3 # number of input channels
out_channel: 3 # number of output channels
upscale: 2 # Up scale
rgb_mean: [0.4040, 0.4371, 0.4488] # mean of rgb value
candidates: [res2, res3]
block_range: [10, 80] # the range of block number
cib_range: [3, 4] # the range of CIB number
search_algorithm:
type: SRRandom
codec: SRCodec
policy:
mum_sample: 1000
mutate:
search_space:
ref: random.search_space
search_algorithm:
type: SRMutate
codec: SRCodec
policy:
mum_sample: 1000 # Set the number of samples
num_mutate: 3 # Set Genetic Algebra for Genetic Algorithm
```
## Dataset
The benchmark datasets can be downloaded as follows:
[DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar).
After downloaded the correspond dataset to the target place, You can configure and use the dataset separately for train and test.
Dataset configuration parameters in sr_ea/sr_ea.yml:
```yaml
random:
dataset:
type: DIV2K
common:
value_div: 255.0
train:
root_HR: /cache/datasets/DIV2K/div2k_train/hr # Directory where the HR image is located
root_LR: /cache/datasets/DIV2K/div2k_train/lr # Directory where the LR image is located
upscale: 2 # Up scale
crop: 64 # crop size of lr image
hflip: true # flip image horizontally
vflip: true # flip image vertically
rot90: true # flip image diagonally
shuffle: true # shuffle
num_workers: 2 # Number of read threads
batch_size: 16 # batch size
pin_memory: false # Pin memory
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_valid/lr
upscale: 2
crop: 64
pin_memory: false
```
## Requirements
### Hardware (Ascend)
@ -87,12 +161,12 @@ sr_ea
├── train.py # pre-training entry
├── image
│ └── sr_ea_SRResNet.png # the illustration of sr_ea network
├── readme.md # Readme
├── README.md # Readme
├── scripts
│ ├── run_distributed.sh # pre-training script for all tasks
│ ├── run_standalone.sh # shell script for standalone train on ascend
│ ├── run_distributed.sh # shell script for distributed train on ascend
└── src
├── sr_ea.yml # options/hyper-parameters of sr_ea
└── sr_ea_distributed.yml # options/hyper-parameters of sr_ea
└── sr_ea.yml # options/hyper-parameters of sr_ea
```
@ -104,14 +178,29 @@ sr_ea
### For training
- Standalone Ascend Training:
```bash
python3 train.py
sh scripts/run_standalone.sh
```
- Distributed Ascend Training:
```bash
sh scripts/run_distributed.sh [RANK_TABLE_FILE]
```
For distributed training, a hccl configuration file with JSON format needs to be created in advance.
Please follow the instructions in the link below:
<https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
`$RANK_TABLE_FILE` is needed when you are running a distribute task on ascend.
> Or one can run following script for all tasks.
```bash
sh scripts/run_distributed.sh [RANK_TABLE_FILE]
python3 train.py
```
## Evaluation

View File

@ -1 +1 @@
noah-vega
noah-vega==1.6.1

View File

@ -33,5 +33,5 @@ fi
RANK_TABLE_FILE=$(realpath $1)
export RANK_TABLE_FILE
python3 -m vega.tools.run_pipeline ../src/sr_ea_distributed.yml -b m -d NPU \
python3 -m vega.tools.run_pipeline ../src/sr_ea.yml -b m -d NPU \
> train.log 2>&1 &

View File

@ -1,5 +1,7 @@
general:
backend: mindspore
parallel_search: True
parallel_fully_train: True
pipeline: [random, mutate, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]

View File

@ -1,197 +0,0 @@
general:
backend: mindspore
parallel_search: True
parallel_fully_train: True
pipeline: [random, mutate, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
random:
pipe_step:
type: SearchPipeStep
dataset:
type: DIV2K
common:
value_div: 255.0
train:
root_HR: /cache/datasets/DIV2K/div2k_train/hr
root_LR: /cache/datasets/DIV2K/div2k_train/lr
upscale: 2
crop: 64 # crop size of lr image
hflip: true # flip image horizontally
vflip: true # flip image vertically
rot90: true # flip image diagonally
shuffle: true
num_workers: 2
batch_size: 16
pin_memory: false
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_valid/lr
upscale: 2
crop: 64
pin_memory: false
search_space:
type: SearchSpace
modules: ['custom']
custom:
type: MtMSR
in_channel: 3
out_channel: 3
upscale: 2
rgb_mean: [0.4040, 0.4371, 0.4488]
candidates: [res2, res3]
block_range: [10, 80]
cib_range: [3, 4]
search_algorithm:
type: SRRandom
codec: SRCodec
policy:
num_sample: 1000
trainer:
type: Trainer
epochs: 400
optimizer:
type: Adam
params:
lr: 0.0004
lr_scheduler:
type: MultiStepLR
params:
milestones: [100, 200]
gamma: 0.5
loss:
type: L1Loss
metric:
type: PSNR
params:
scale: 2
calc_params_each_epoch: True
evaluator:
type: Evaluator
host_evaluator:
type: HostEvaluator
metric:
type: PSNR
load_pkl: False
mutate:
pipe_step:
type: SearchPipeStep
dataset:
ref: random.dataset
search_space:
type: SearchSpace
ref: random.search_space
search_algorithm:
type: SRMutate
codec: SRCodec
policy:
num_mutate: 3
num_sample: 1000
trainer:
ref: random.trainer
epochs: 100
save_model_desc: True
fully_train:
pipe_step:
type: TrainPipeStep
models_folder: "{local_base_path}/output/mutate/"
dataset:
ref: random.dataset
train:
batch_size: 50
search_space:
ref: random.search_space
trainer:
type: Trainer
seed: 0
epochs: 20000
optimizer:
type: Adam
params:
lr: 0.0002
lr_scheduler:
type: StepLR
params:
step_size: 4000
gamma: 0.5
loss:
type: L1Loss
metric:
type: PSNR
params:
scale: 2
benchmark_DIV2K:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
type: DIV2K
test:
root_HR: /cache/datasets/DIV2K/div2k_valid/hr
root_LR: /cache/datasets/DIV2K/div2k_train/lr
upscale: 2
evaluator:
type: Evaluator
host_evaluator:
type: HostEvaluator
metric:
type: PSNR
params:
scale: 2
benchmark_Set5:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: Set5
test:
root_HR: /cache/datasets/DIV2K/Set5/hr
root_LR: /cache/datasets/DIV2K/Set5/lr
evaluator:
ref: benchmark_DIV2K.evaluator
benchmark_Set14:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: Set14
test:
root_HR: /cache/datasets/DIV2K/Set14/hr
root_LR: /cache/datasets/DIV2K/Set14/lr
evaluator:
ref: benchmark_DIV2K.evaluator
benchmark_BSDS100:
pipe_step:
type: BenchmarkPipeStep
models_folder: "{local_base_path}/output/fully_train/"
dataset:
ref: benchmark_DIV2K.dataset
type: BSDS100
test:
root_HR: /cache/datasets/DIV2K/BSDS100/hr
root_LR: /cache/datasets/DIV2K/BSDS100/lr
evaluator:
ref: benchmark_DIV2K.evaluator