!22634 update readme

Merge pull request !22634 from hjxcoder/sr_ea_branch5
2021-09-06 04:11:21 +00:00 · 2021-09-06 04:11:21 +00:00 · 12be525ef1
parent 061c84c6b5 7e47eb0e58
commit 12be525ef1
10 changed files with 153 additions and 399 deletions
--- a/model_zoo/research/cv/esr_ea/README.md
+++ b/model_zoo/research/cv/esr_ea/README.md
@ -109,6 +109,33 @@ The benchmark datasets can be downloaded as follows:

 [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar).

+After downloaded the correspond dataset to the target place, You can configure and use the dataset separately for train and test.
+
+Dataset configuration parameters in esr_ea/esr_ea.yml:
+
+```yaml
+nas:
+    dataset:
+        type: DIV2K
+        train:
+            root_HR: /cache/datasets/DIV2K/div2k_train/hr # Directory where the HR image is located
+            root_LR: /cache/datasets/DIV2K/div2k_train/lr # Directory where the LR image is located
+            upscale: 2                                    # Up scale
+            crop: 64                                      # crop size of lr image
+            hflip: true                                   # flip image horizontally
+            vflip: true                                   # flip image vertically
+            rot90: true                                   # flip image diagonally
+            shuffle: true                                 # shuffle
+            batch_size: 16                                # batch size
+            fixed_size: true
+        test:
+            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
+            root_LR: /cache/datasets/DIV2K/div2k_valid/lr
+            upscale: 2
+            fixed_size: true
+            crop: 64
+```
+
 ## Requirements

 ### Hardware (Ascend)
@ -135,12 +162,12 @@ esr_ea
 ├── image
 │   ├── esr_arch.png # the illustration of esr_ea network
 │   └── esr_block.png #
-├── readme.md # Readme
+├── README.md # Readme
 ├── scripts
-│   ├── run_distributed.sh # pre-training script for all tasks
+│   ├── run_standalone.sh # shell script for standalone train on ascend
+│   ├── run_distributed.sh # shell script for distributed train on ascend
 └── src
-    ├── esr_ea.yml # options/hyper-parameters of esr_ea
-    └── esr_ea_distributed.yml # options/hyper-parameters of esr_ea
+    └── esr_ea.yml # options/hyper-parameters of esr_ea

 ```

@ -152,14 +179,29 @@ esr_ea

 ### For training

+- Standalone Ascend Training:
+
 ```bash
-python3 train.py
+sh scripts/run_standalone.sh
 ```

+- Distributed Ascend Training:
+
+```bash
+sh scripts/run_distributed.sh  [RANK_TABLE_FILE]
+```
+
+  For distributed training, a hccl configuration file with JSON format needs to be created in advance.
+
+  Please follow the instructions in the link below:
+
+  <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
+`$RANK_TABLE_FILE` is needed when you are running a distribute task on ascend.
+
 > Or one can run following script for all tasks.

 ```bash
-sh scripts/run_distributed.sh  [RANK_TABLE_FILE]
+python3 train.py
 ```

 ## Evaluation
--- a/model_zoo/research/cv/esr_ea/requirements.txt
+++ b/model_zoo/research/cv/esr_ea/requirements.txt
@ -1 +1 @@
-noah-vega
+noah-vega==1.6.1
--- a/model_zoo/research/cv/esr_ea/scripts/run_distributed.sh
+++ b/model_zoo/research/cv/esr_ea/scripts/run_distributed.sh
@ -33,5 +33,5 @@ fi
 RANK_TABLE_FILE=$(realpath $1)
 export RANK_TABLE_FILE

-python3 -m vega.tools.run_pipeline ../src/esr_ea_distributed.yml -b m -d NPU \
+python3 -m vega.tools.run_pipeline ../src/esr_ea.yml -b m -d NPU \
 > train.log 2>&1 &
--- a/model_zoo/research/cv/esr_ea/src/esr_ea
+++ b/model_zoo/research/cv/esr_ea/src/esr_ea
@ -1,182 +0,0 @@
-general:
-    backend: mindspore
-    parallel_search: True
-    parallel_fully_train: True
-
-
-pipeline: [nas, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
-
-
-nas:
-    pipe_step:
-        type: SearchPipeStep
-
-    dataset:
-        type: DIV2K
-        train:
-            root_HR: /cache/datasets/DIV2K/div2k_train/hr
-            root_LR: /cache/datasets/DIV2K/div2k_train/lr
-            upscale: 2
-            crop: 64
-            hflip: true
-            vflip: true
-            rot90: true 
-            shuffle: true
-            batch_size: 16
-            fixed_size: true
-        test:
-            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
-            root_LR: /cache/datasets/DIV2K/div2k_valid/lr
-            upscale: 2
-            fixed_size: true
-            crop: 64
-
-    search_space:
-        type: SearchSpace
-        modules: ['esrbody']
-        esrbody:
-            type: ESRN
-            block_type: [S,G,C]
-            conv_num: [4,6,8]
-            growth_rate: [8,16,24,32]
-            type_prob: [1,1,1]
-            conv_prob: [1,1,1]
-            growth_prob: [1,1,1,1]
-            G0: 32
-            scale: 2
-
-    search_algorithm:
-        type: ESRSearch
-        codec: ESRCodec
-        policy:
-            num_generation: 20
-            num_individual: 8
-            num_elitism: 4 
-            mutation_rate: 0.05
-        range:
-            node_num: 20
-            min_active: 16
-            max_params: 325000
-            min_params: 315000
-
-    trainer:
-        type: Trainer
-        callbacks: ESRTrainerCallback
-        epochs: 500
-        optimizer:
-            type: Adam
-            params:
-                lr: 0.0001   # 0.001 for mindspore
-        lr_scheduler:
-            type: MultiStepLR
-            params:
-                milestones: [100,200]
-                gamma: 0.5
-        loss:
-            type: L1Loss
-        metric:
-            type: PSNR
-            params:
-                scale: 2 
-                max_rgb: 255
-        scale: 2
-        cuda: True
-        seed: 10
-
-
-fully_train:
-    pipe_step:
-        type: TrainPipeStep
-        models_folder: "{local_base_path}/output/nas/"
-    dataset:
-        ref: nas.dataset
-    trainer:
-        type: Trainer
-        callbacks: ESRTrainerCallback
-        node_num: 20
-        epochs: 15000
-        optimizer:
-            type: Adam
-            params:
-                lr: 0.0001
-        lr_scheduler:
-            type: MultiStepLR
-            params:
-                milestones: [8000,12000,13500,14500]
-                gamma: 0.5
-        loss:
-            type: L1Loss
-        metric:
-            type: PSNR
-            params:
-                scale: 2 
-                max_rgb: 255
-        scale: 2
-        seed: 10
-        range:
-            node_num: 20
-    evaluator:
-       type: Evaluator
-       host_evaluator:
-           type: HostEvaluator
-           metric:
-               type: PSNR
-
-
-benchmark_DIV2K:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        type: DIV2K
-        test:
-            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
-            root_LR: /cache/datasets/DIV2K/div2k_train/lr
-            upscale: 2
-    evaluator:
-        type: Evaluator
-        host_evaluator:
-            type: HostEvaluator
-            metric:
-                type: PSNR
-                params:
-                    scale: 2 
-                    max_rgb: 255
-benchmark_Set5:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: Set5
-        test:
-            root_HR: /cache/datasets/DIV2K/Set5/hr
-            root_LR: /cache/datasets/DIV2K/Set5/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator
-
-benchmark_Set14:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: Set14
-        test:
-            root_HR: /cache/datasets/DIV2K/Set14/hr
-            root_LR: /cache/datasets/DIV2K/Set14/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator
-
-benchmark_BSDS100:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: BSDS100
-        test:
-            root_HR: /cache/datasets/DIV2K/BSDS100/hr
-            root_LR: /cache/datasets/DIV2K/BSDS100/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator
--- a/model_zoo/research/cv/esr_ea/src/esr_ea.yml
+++ b/model_zoo/research/cv/esr_ea/src/esr_ea.yml
@ -1,7 +1,7 @@
 general:
    backend: mindspore
-    parallel_search: False
-    parallel_fully_train: False
+    parallel_search: True
+    parallel_fully_train: True


 pipeline: [nas, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
--- a/model_zoo/research/cv/sr_ea/README.md
+++ b/model_zoo/research/cv/sr_ea/README.md
@ -3,7 +3,8 @@
 - [Contents](#contents)
    - [Algorithm Introduction](#algorithm-introduction)
    - [Algorithm Principle](#algorithm-principle)
-    - [Search Space and Search Policy](#search-space-and-search-policy)
+        - [Search Space and Search Policy](#search-space-and-search-policy)
+        - [Configuring](#configuring)
    - [Dataset](#dataset)
    - [Requirements](#requirements)
        - [Hardware (Ascend)](#hardware-ascend)
@ -44,7 +45,7 @@ CCRN-NAS is a network architecture dedicated to lightweight networks. The CCRN-N

 Pipeline provides a sample for CCRN-NAS architecture search. It searches for the combination of the three modules to optimize the network architecture.

-## Search Space and Search Policy
+### Search Space and Search Policy

 The search space of the modified SRResNet includes the number of blocks and channels. We provide two search methods: random search (RS) and brute force (BF). In the two search methods, users need to define the range of the block number and the channel number for each convolution layer. RS generates model randomly from these range until the number of models reaches max_count. On the other size, BF will train all selected models.

@ -56,12 +57,85 @@ The search space of CCRN-NAS is a combination of three types of blocks:
  – Change the kernel size of a random residual block from 2 to 3 or from 3 to 2.
  – A residual block is added to the random number of layers, and the kernel size is randomly generated in 2 and 3.

+### Configuring
+
+For details, see the configuration file sr_ea/sr_ea.yml in the sample code.
+
+```yaml
+pipeline: [random, mutate]
+
+random:
+    pipe_step:
+        type: SearchPipeStep
+
+    search_space:                              # Set the network structure search parameters.
+        type: SearchSpace
+        modules: ['custom']
+        custom:
+            type: MtMSR
+            in_channel: 3                      # number of input channels
+            out_channel: 3                     # number of output channels
+            upscale: 2                         # Up scale
+            rgb_mean: [0.4040, 0.4371, 0.4488] # mean of rgb value
+            candidates: [res2, res3]
+            block_range: [10, 80]              # the range of block number
+            cib_range: [3, 4]                  # the range of CIB number
+
+    search_algorithm:
+        type: SRRandom
+        codec: SRCodec
+        policy:
+            mum_sample: 1000
+
+mutate:
+    search_space:
+        ref: random.search_space
+
+    search_algorithm:
+        type: SRMutate
+        codec: SRCodec
+        policy:
+            mum_sample: 1000                   # Set the number of samples
+            num_mutate: 3                      # Set Genetic Algebra for Genetic Algorithm
+```
+
 ## Dataset

 The benchmark datasets can be downloaded as follows:

 [DIV2K](https://cv.snu.ac.kr/research/EDSR/DIV2K.tar).

+After downloaded the correspond dataset to the target place, You can configure and use the dataset separately for train and test.
+
+Dataset configuration parameters in sr_ea/sr_ea.yml:
+
+```yaml
+random:
+    dataset:
+        type: DIV2K
+        common:
+            value_div: 255.0
+        train:
+            root_HR: /cache/datasets/DIV2K/div2k_train/hr # Directory where the HR image is located
+            root_LR: /cache/datasets/DIV2K/div2k_train/lr # Directory where the LR image is located
+            upscale: 2                                    # Up scale
+            crop: 64                                      # crop size of lr image
+            hflip: true                                   # flip image horizontally
+            vflip: true                                   # flip image vertically
+            rot90: true                                   # flip image diagonally
+            shuffle: true                                 # shuffle
+            num_workers: 2                                # Number of read threads
+            batch_size: 16                                # batch size
+            pin_memory: false                             # Pin memory
+        test:
+            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
+            root_LR: /cache/datasets/DIV2K/div2k_valid/lr
+            upscale: 2
+            crop: 64
+            pin_memory: false
+
+```
+
 ## Requirements

 ### Hardware (Ascend)
@ -87,12 +161,12 @@ sr_ea
 ├── train.py # pre-training entry
 ├── image
 │   └── sr_ea_SRResNet.png # the illustration of sr_ea network
-├── readme.md # Readme
+├── README.md # Readme
 ├── scripts
-│   ├── run_distributed.sh # pre-training script for all tasks
+│   ├── run_standalone.sh # shell script for standalone train on ascend
+│   ├── run_distributed.sh # shell script for distributed train on ascend
 └── src
-    ├── sr_ea.yml # options/hyper-parameters of sr_ea
-    └── sr_ea_distributed.yml # options/hyper-parameters of sr_ea
+    └── sr_ea.yml # options/hyper-parameters of sr_ea

 ```

@ -104,14 +178,29 @@ sr_ea

 ### For training

+- Standalone Ascend Training:
+
 ```bash
-python3 train.py
+sh scripts/run_standalone.sh
 ```

+- Distributed Ascend Training:
+
+```bash
+sh scripts/run_distributed.sh  [RANK_TABLE_FILE]
+```
+
+  For distributed training, a hccl configuration file with JSON format needs to be created in advance.
+
+  Please follow the instructions in the link below:
+
+  <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
+`$RANK_TABLE_FILE` is needed when you are running a distribute task on ascend.
+
 > Or one can run following script for all tasks.

 ```bash
-sh scripts/run_distributed.sh [RANK_TABLE_FILE]
+python3 train.py
 ```

 ## Evaluation
--- a/model_zoo/research/cv/sr_ea/requirements.txt
+++ b/model_zoo/research/cv/sr_ea/requirements.txt
@ -1 +1 @@
-noah-vega
+noah-vega==1.6.1
--- a/model_zoo/research/cv/sr_ea/scripts/run_distributed.sh
+++ b/model_zoo/research/cv/sr_ea/scripts/run_distributed.sh
@ -33,5 +33,5 @@ fi
 RANK_TABLE_FILE=$(realpath $1)
 export RANK_TABLE_FILE

-python3 -m vega.tools.run_pipeline ../src/sr_ea_distributed.yml -b m -d NPU \
+python3 -m vega.tools.run_pipeline ../src/sr_ea.yml -b m -d NPU \
 > train.log 2>&1 &
--- a/model_zoo/research/cv/sr_ea/src/sr_ea.yml
+++ b/model_zoo/research/cv/sr_ea/src/sr_ea.yml
@ -1,5 +1,7 @@
 general:
    backend: mindspore
+    parallel_search: True
+    parallel_fully_train: True


 pipeline: [random, mutate, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
--- a/model_zoo/research/cv/sr_ea/src/sr_ea_distributed.yml
+++ b/model_zoo/research/cv/sr_ea/src/sr_ea_distributed.yml
@ -1,197 +0,0 @@
-general:
-    backend: mindspore
-    parallel_search: True
-    parallel_fully_train: True
-
-
-pipeline: [random, mutate, fully_train, benchmark_DIV2K, benchmark_Set5, benchmark_Set14, benchmark_BSDS100]
-
-
-random:
-    pipe_step:
-        type: SearchPipeStep
-
-    dataset:
-        type: DIV2K
-        common:
-            value_div: 255.0
-        train:
-            root_HR: /cache/datasets/DIV2K/div2k_train/hr
-            root_LR: /cache/datasets/DIV2K/div2k_train/lr
-            upscale: 2
-            crop: 64    # crop size of lr image
-            hflip: true    # flip image horizontally
-            vflip: true    # flip image vertically
-            rot90: true    # flip image diagonally
-            shuffle: true
-            num_workers: 2
-            batch_size: 16
-            pin_memory: false
-        test:
-            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
-            root_LR: /cache/datasets/DIV2K/div2k_valid/lr
-            upscale: 2
-            crop: 64
-            pin_memory: false
-    search_space:
-        type: SearchSpace
-        modules: ['custom']
-        custom:
-            type: MtMSR
-            in_channel: 3
-            out_channel: 3
-            upscale: 2
-            rgb_mean: [0.4040, 0.4371, 0.4488]
-            candidates: [res2, res3]
-            block_range: [10, 80]
-            cib_range: [3, 4]
-
-    search_algorithm:
-        type: SRRandom
-        codec: SRCodec
-        policy:
-            num_sample: 1000
-
-    trainer:
-        type: Trainer
-        epochs: 400
-        optimizer:
-            type: Adam
-            params:
-                lr: 0.0004
-        lr_scheduler:
-            type: MultiStepLR
-            params:
-                milestones: [100, 200]
-                gamma: 0.5
-        loss:
-            type: L1Loss
-        metric:
-            type: PSNR
-            params:
-                scale: 2
-        calc_params_each_epoch: True
-
-    evaluator:
-        type: Evaluator
-        host_evaluator:
-            type: HostEvaluator
-            metric:
-                type: PSNR
-            load_pkl: False
-
-
-mutate:
-    pipe_step:
-        type: SearchPipeStep
-
-    dataset:
-        ref: random.dataset
-
-    search_space:
-        type: SearchSpace
-        ref: random.search_space
-
-    search_algorithm:
-        type: SRMutate
-        codec: SRCodec
-        policy:
-            num_mutate: 3
-            num_sample: 1000
-
-    trainer:
-        ref: random.trainer
-        epochs: 100
-        save_model_desc: True
-
-
-fully_train:
-    pipe_step:
-        type: TrainPipeStep
-        models_folder: "{local_base_path}/output/mutate/"
-    dataset:
-        ref: random.dataset
-        train:
-            batch_size: 50
-
-    search_space:
-        ref: random.search_space
-
-    trainer:
-        type: Trainer
-        seed: 0
-        epochs: 20000
-        optimizer:
-            type: Adam
-            params:
-                lr: 0.0002
-        lr_scheduler:
-            type: StepLR
-            params:
-                step_size: 4000
-                gamma: 0.5
-        loss:
-            type: L1Loss
-        metric:
-            type: PSNR
-            params:
-                scale: 2
-
-            
-benchmark_DIV2K:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        type: DIV2K
-        test:
-            root_HR: /cache/datasets/DIV2K/div2k_valid/hr
-            root_LR: /cache/datasets/DIV2K/div2k_train/lr
-            upscale: 2
-    evaluator:
-        type: Evaluator
-        host_evaluator:
-            type: HostEvaluator
-            metric:
-                type: PSNR
-                params:
-                    scale: 2
-
-benchmark_Set5:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: Set5
-        test:
-            root_HR: /cache/datasets/DIV2K/Set5/hr
-            root_LR: /cache/datasets/DIV2K/Set5/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator
-
-benchmark_Set14:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: Set14
-        test:
-            root_HR: /cache/datasets/DIV2K/Set14/hr
-            root_LR: /cache/datasets/DIV2K/Set14/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator
-
-benchmark_BSDS100:
-    pipe_step:
-        type: BenchmarkPipeStep
-        models_folder: "{local_base_path}/output/fully_train/"
-    dataset:
-        ref: benchmark_DIV2K.dataset
-        type: BSDS100
-        test:
-            root_HR: /cache/datasets/DIV2K/BSDS100/hr
-            root_LR: /cache/datasets/DIV2K/BSDS100/lr
-    evaluator:
-        ref: benchmark_DIV2K.evaluator