!15983 Dscnn implementation on GPU

From: @charlie__chen Reviewed-by: @limingqi107,@cristoval Signed-off-by: @cristoval
2021-05-10 17:18:17 +08:00 · 2021-05-10 17:18:17 +08:00 · f10c015be4
parent 2de5bbadb5 06a0db6df1
commit f10c015be4
8 changed files with 158 additions and 47 deletions
--- a/model_zoo/research/nlp/dscnn/README.md
+++ b/model_zoo/research/nlp/dscnn/README.md
@ -94,6 +94,20 @@ First set the config for data, train, eval in src/config.py
  python eval.py --model_dir your_models_folder_path
  ```

+- running on GPU
+
+  ```python
+  # run training example
+  python train.py --amp_level 'O3' --device_target='GPU'
+
+  # run evaluation example
+  # if you want to eval a specific model, you should specify model_dir to the ckpt path:
+  python eval.py --device_id 0 --model_dir your_ckpt_path --device_target 'GPU'
+
+  # if you want to eval all the model you saved, you should specify model_dir to the folder where the models are saved.
+  python eval.py --device_id 0 --model_dir your_models_folder_path --device_target 'GPU'
+  ```
+
 # [Script Description](#contents)

 ## [Script and Sample Code](#contents)
@ -105,6 +119,8 @@ First set the config for data, train, eval in src/config.py
    │   ├──run_download_process_data.sh   // shell script for download dataset and prepare feature and label
    │   ├──run_train_ascend.sh            // shell script for train on ascend
    │   ├──run_eval_ascend.sh             // shell script for evaluation on ascend
+    │   ├──run_train_gpu.sh               // shell script for train on gpu
+    │   ├──run_eval_gpu.sh                // shell script for evaluation on gpu
    ├── src
    │   ├──callback.py                    // callbacks
    │   ├──config.py                      // parameter configuration of data, train and eval
@ -173,6 +189,8 @@ Parameters for both training and evaluation can be set in config.py.
  'log_interval': 100           # logging interval
  'ckpt_path': 'train_outputs'  # the location where checkpoint and log will be saved
  'ckpt_interval': 100          # save ckpt_interval  
+  'device_target': 'Ascend'    # device target used to train or evaluate the dataset.
+  'amp_level': 'O3'             # amp level for the mix precision training
  ```

 - config for DS-CNN and evaluation parameters of Speech commands dataset version 1
@ -184,6 +202,7 @@ Parameters for both training and evaluation can be set in config.py.
                                # Words to use (others will be added to an unknown label)
  'sample_rate': 16000          # Expected sample rate of the wavs
  'device_id': 1000             # device ID used to train or evaluate the dataset.
+  'device_target': 'Ascend'    # device target used to train or evaluate the dataset.
  'clip_duration_ms': 10        # Expected duration in milliseconds of the wavs
  'window_size_ms': 40.0        # How long each spectrogram timeslice is
  'window_stride_ms': 20.0      # How long each spectrogram timeslice is
@ -227,6 +246,15 @@ Parameters for both training and evaluation can be set in config.py.
  Best epoch:41 acc:93.73%
  ```

+- running on GPU
+
+  for shell script:
+
+  ```python
+  # sh scripts/run_train_gpu.sh [device_num] [device_id] [amp_level]
+  sh scripts/run_train_gpu.sh 1 0 'O3'
+  ```
+
  The checkpoints and log will be saved in the train_outputs.

 ## [Evaluation Process](#contents)
@ -255,6 +283,17 @@ Parameters for both training and evaluation can be set in config.py.
  python eval.py --device_id 0 --model_dir train_outputs/*
  ```

+- evaluation on Speech commands dataset version 1 when running on GPU
+
+  for shell scripts:
+
+  ```bash
+  # sh scripts/run_eval_gpu.sh device_id model_dir
+  sh scripts/run_eval_gpu.sh 0 train_outputs/*/*.ckpt
+  or
+  sh scripts/run_eval_gpu.sh 0 train_outputs/*/
+  ```
+
  You can view the results on the screen or from logs in eval_outputs folder. The accuracy of the test dataset will be as follows:

  ```python
@ -268,39 +307,39 @@ Parameters for both training and evaluation can be set in config.py.

 ### Train Performance

-| Parameters                 | Ascend                                                       |
-| -------------------------- | ------------------------------------------------------------ |
-| Model Version              | DS-CNN                                                       |
-| Resource                   | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8               |
-| uploaded Date              | 27/09/2020 (month/day/year)                                  |
-| MindSpore Version          | 1.0.0                                                        |  
-| Dataset                    | Speech commands dataset version 1                            |
-| Training Parameters        | epoch=80, batch_size = 100, lr=0.1                           |
-| Optimizer                  | Momentum                                                     |
-| Loss Function              | Softmax Cross Entropy                                        |
-| outputs                    | probability                                                  |
-| Loss                       | 0.0019                                                       |
-| Speed                      | 2s/epoch                                                     |
-| Total time                 | 4 mins                                                       |
-| Parameters (K)             |  500K                                                        |
-| Checkpoint for Fine tuning |  3.3M (.ckpt file)                                           |
+| Parameters                 | Ascend                                                       | GPU                                              |
+| -------------------------- | ------------------------------------------------------------ | -------------------------------------------------|
+| Model Version              | DS-CNN                                                       | DS-CNN                                           |
+| Resource                   | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8   | NV SMX2 V100-32G                                 |
+| uploaded Date              | 27/09/2020 (month/day/year)                                  | 05/05/2021 (month/day/year)                      |
+| MindSpore Version          | 1.0.0                                                        | 1.2.0                                            |
+| Dataset                    | Speech commands dataset version 1                            | Speech commands dataset version 1                |
+| Training Parameters        | epoch=80, batch_size = 100, lr=0.1                           | epoch=80, batch_size = 100, lr=0.1               |
+| Optimizer                  | Momentum                                                     | Momentum                                         |
+| Loss Function              | Softmax Cross Entropy                                        | Softmax Cross Entropy                            |
+| outputs                    | probability                                                  | probability                                      |
+| Loss                       | 0.0019                                                       | 0.003304138                                               |
+| Speed                      | 2s/epoch                                                     | 3s/epoch                                         |
+| Total time                 | 4 mins                                                       | 6 mins                                                 |
+| Parameters (K)             |  500K                                                        | 500K                                             |
+| Checkpoint for Fine tuning |  3.3M (.ckpt file)                                           | 3.3M (.ckpt file)                                |
 | Script                     | [Link]() | [Link]() |

 ### Inference Performance

-| Parameters          | Ascend                      |
-| ------------------- | --------------------------- |
-| Model Version       | DS-CNN                      |
-| Resource            | Ascend 910; OS Euler2.8                  |
-| Uploaded Date       | 09/27/2020  |
-| MindSpore Version   | 1.0.0                         |
-| Dataset             |Speech commands dataset version 1     |
-| Training Parameters          | src/config.py                        |
-| outputs             | probability                 |
-| Accuracy            | 93.96%                 |
-| Total time            | 3min                 |
-| Params (K)            |       500K           |
-|Checkpoint for Fine tuning (M)            |      3.3M            |
+| Parameters          | Ascend                      | GPU                      |
+| ------------------- | --------------------------- | -------------------------|
+| Model Version       | DS-CNN                      | DS-CNN                   |
+| Resource            | Ascend 910; OS Euler2.8     | NV SMX2 V100-32G         |
+| Uploaded Date       | 09/27/2020                  | 05/05/2021               |
+| MindSpore Version   | 1.0.0                       | 1.2.0                    |
+| Dataset             |Speech commands dataset version 1     | Speech commands dataset version 1     |
+| Training Parameters          | src/config.py                        | src/config.py                        |
+| outputs             | probability                 |  probability                 |
+| Accuracy            | 93.96%                 | 93.97%                       |
+| Total time            | 3min                 | 2min20s
+| Params (K)            |       500K           |       500K           |
+|Checkpoint for Fine tuning (M)            |      3.3M            |      3.3M            |

 # [Description of Random Situation](#contents)

--- a/model_zoo/research/nlp/dscnn/eval.py
+++ b/model_zoo/research/nlp/dscnn/eval.py
@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -74,8 +74,9 @@ def val(args, model, test_de):
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device_id', type=int, default=1, help='which device the model will be trained on')
+    parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU', 'CPU'])
    args, model_settings = eval_config(parser)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Davinci", device_id=args.device_id)
+    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)

    # Logger
    args.outputs_dir = os.path.join(args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
--- a/model_zoo/research/nlp/dscnn/scripts/run_eval_gpu.sh
+++ b/model_zoo/research/nlp/dscnn/scripts/run_eval_gpu.sh
@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===========================================================================
+
+python eval.py --device_id $1 --model_dir $2 --device_target 'GPU' > eval.log 2>&1 &
--- a/model_zoo/research/nlp/dscnn/scripts/run_train_gpu.sh
+++ b/model_zoo/research/nlp/dscnn/scripts/run_train_gpu.sh
@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===========================================================================
+export DEVICE_NUM=$1
+export RANK_SIZE=$1
+export CUDA_VISIBLE_DEVICES="$2"
+if [ $1 -gt 1 ]
+then
+    mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
+    python train.py --is_distributed --amp_level $3 --device_target="GPU" > train.log 2>&1 &
+else
+    python train.py --amp_level $3 --device_target='GPU' > train.log 2>&1 &
+fi
--- a/model_zoo/research/nlp/dscnn/src/callback.py
+++ b/model_zoo/research/nlp/dscnn/src/callback.py
@ -16,6 +16,7 @@
 import time

 from mindspore.train.callback import ModelCheckpoint
+from mindspore.train.callback import TimeMonitor
 from mindspore.train.callback import CheckpointConfig, Callback


@ -85,4 +86,5 @@ def callback_func(args, cb, prefix):
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=args.outputs_dir, prefix=prefix)
        callbacks.append(ckpt_cb)
+    callbacks.append(TimeMonitor(args.per_batch_size))
    return callbacks
--- a/model_zoo/research/nlp/dscnn/src/dataset.py
+++ b/model_zoo/research/nlp/dscnn/src/dataset.py
@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -35,12 +35,13 @@ class NpyDataset():
        return data.astype(np.float32), label.astype(np.int32)


-def audio_dataset(data_dir, data_type, h, w, batch_size):
+def audio_dataset(data_dir, data_type, h, w, batch_size, device_num=1, rank=0):
    if 'testing' in data_dir:
        shuffle = False
    else:
        shuffle = True
    dataset = NpyDataset(data_dir, data_type, h, w)
-    de_dataset = de.GeneratorDataset(dataset, ["feats", "labels"], shuffle=shuffle)
-    de_dataset = de_dataset.batch(batch_size, drop_remainder=False)
+    de_dataset = de.GeneratorDataset(dataset, ["feats", "labels"], shuffle=shuffle,
+                                     num_shards=device_num, shard_id=rank)
+    de_dataset = de_dataset.batch(batch_size, drop_remainder=True)
    return de_dataset
--- a/model_zoo/research/nlp/dscnn/src/download_process_data.py
+++ b/model_zoo/research/nlp/dscnn/src/download_process_data.py
@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -30,8 +30,8 @@ import soundfile as sf
 from python_speech_features import mfcc
 from tqdm import tqdm

-from src.config import train_config, prepare_model_settings
-from src.utils import prepare_words_list
+from config import train_config, prepare_model_settings
+from utils import prepare_words_list

 FLAGS = None
 MAX_NUM_WAVS_PER_CLASS = 2 ** 27 - 1  # ~134M
--- a/model_zoo/research/nlp/dscnn/train.py
+++ b/model_zoo/research/nlp/dscnn/train.py
@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -20,9 +20,11 @@ import argparse
 import numpy as np
 from mindspore import context
 from mindspore import Tensor, Model
+from mindspore.train.model import ParallelMode
 from mindspore.nn.optim import Momentum
 from mindspore.common import dtype as mstype
 from mindspore.train.serialization import load_checkpoint
+from mindspore.communication.management import init, get_rank, get_group_size

 from src.config import train_config
 from src.log import get_logger
@ -47,6 +49,12 @@ def val(args, model, val_dataset):
    img_tot = 0
    top1_correct = 0
    top5_correct = 0
+    if args.amp_level == 'O0':
+        origin_mstype = mstype.float32
+    else:
+        origin_mstype = mstype.float16
+    model.predict_network.to_float(mstype.float32)
+
    for data, gt_classes in val_dataloader:
        output = model.predict(Tensor(data, mstype.float32))
        output = output.asnumpy()
@ -58,6 +66,7 @@ def val(args, model, val_dataset):
        top5_correct += get_top5_acc(top5_output, gt_classes)
        img_tot += output.shape[0]

+    model.predict_network.to_float(origin_mstype)
    results = [[top1_correct], [top5_correct], [img_tot]]

    results = np.array(results)
@ -74,27 +83,44 @@ def val(args, model, val_dataset):
                     .format(top1_correct, top5_correct, img_tot, acc1, acc5))


-def trainval(args, model, train_dataset, val_dataset, cb):
+def trainval(args, model, train_dataset, val_dataset, cb, rank):
    callbacks = callback_func(args, cb, 'epoch{}'.format(args.epoch_cnt))
    model.train(args.val_interval, train_dataset, callbacks=callbacks, dataset_sink_mode=args.dataset_sink_mode)
-    val(args, model, val_dataset)
+    if rank == 0:
+        val(args, model, val_dataset)


 def train():
    '''Train.'''
    parser = argparse.ArgumentParser()
-    parser.add_argument('--device_id', type=int, default=1, help='which device the model will be trained on')
+    parser.add_argument('--is_distributed', action='store_true', default=False, help='distributed training')
+    parser.add_argument('--device_id', type=int, default=0, help='which device the model will be trained on')
+    parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU', 'CPU'])
+    parser.add_argument('--amp_level', type=str, default='O0', choices=['O3', 'O2', 'O0'])
    args, model_settings = train_config(parser)
-    context.set_context(mode=context.GRAPH_MODE, device_id=args.device_id, enable_auto_mixed_precision=True)
+    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_auto_mixed_precision=True)
    args.rank_save_ckpt_flag = 1

+    # init distributed
+    if args.is_distributed:
+        if os.getenv('DEVICE_ID', "not_set").isdigit():
+            context.set_context(device_id=int(os.getenv('DEVICE_ID')))
+        init()
+        rank = get_rank()
+        group_size = get_group_size()
+        parallel_mode = ParallelMode.DATA_PARALLEL
+        context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=group_size, gradients_mean=True)
+    else:
+        rank = 0
+        group_size = 1
+        context.set_context(device_id=args.device_id)
    # Logger
    args.outputs_dir = os.path.join(args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir)

    # Dataloader: train, val
    train_dataset = audio_dataset(args.feat_dir, 'training', model_settings['spectrogram_length'],
-                                  model_settings['dct_coefficient_count'], args.per_batch_size)
+                                  model_settings['dct_coefficient_count'], args.per_batch_size, group_size, rank)
    args.steps_per_epoch = train_dataset.get_dataset_size()
    val_dataset = audio_dataset(args.feat_dir, 'validation', model_settings['spectrogram_length'],
                                model_settings['dct_coefficient_count'], args.per_batch_size)
@ -131,7 +157,7 @@ def train():
                   momentum=args.momentum,
                   weight_decay=args.weight_decay)

-    model = Model(network, loss_fn=criterion, optimizer=opt, amp_level='O0')
+    model = Model(network, loss_fn=criterion, optimizer=opt, amp_level=args.amp_level, keep_batchnorm_fp32=False)

    # Training
    args.epoch_cnt = 0
@ -139,10 +165,10 @@ def train():
    args.best_acc = 0
    progress_cb = ProgressMonitor(args)
    while args.epoch_cnt + args.val_interval < args.max_epoch:
-        trainval(args, model, train_dataset, val_dataset, progress_cb)
+        trainval(args, model, train_dataset, val_dataset, progress_cb, rank)
    rest_ep = args.max_epoch - args.epoch_cnt
    if rest_ep > 0:
-        trainval(args, model, train_dataset, val_dataset, progress_cb)
+        trainval(args, model, train_dataset, val_dataset, progress_cb, rank)

    args.logger.info('Best epoch:{} acc:{:.2f}%'.format(args.best_epoch, args.best_acc))