From 1c959b3489e98c95eed49b9a30e44602044d6acd Mon Sep 17 00:00:00 2001
From: Rescue <amir.lashkari1@huawei.com>
Date: Tue, 13 Jul 2021 19:29:57 +0000
Subject: [PATCH] Added GPU support for SE-Net

updated ReadMe

Updated ReadMe

Addressed comments

Addressed comments_2

pylint fix

updated ReadMe v2

Fixed standalone run ckpt

fixed ckpt
---
 model_zoo/research/cv/SE-Net/README.md        | 95 +++++++++++++------
 .../scripts/run_distribute_train_gpu.sh       | 43 +++++++++
 .../cv/SE-Net/scripts/run_eval_gpu.sh         | 45 +++++++++
 .../scripts/run_standalone_train_gpu.sh       | 46 +++++++++
 .../cv/SE-Net/src/CrossEntropySmooth.py       |  4 +-
 model_zoo/research/cv/SE-Net/src/config.py    |  2 +-
 model_zoo/research/cv/SE-Net/src/resnet.py    |  2 +-
 model_zoo/research/cv/SE-Net/train.py         | 23 +++--
 8 files changed, 220 insertions(+), 40 deletions(-)
 create mode 100644 model_zoo/research/cv/SE-Net/scripts/run_distribute_train_gpu.sh
 create mode 100644 model_zoo/research/cv/SE-Net/scripts/run_eval_gpu.sh
 create mode 100644 model_zoo/research/cv/SE-Net/scripts/run_standalone_train_gpu.sh

diff --git a/model_zoo/research/cv/SE-Net/README.md b/model_zoo/research/cv/SE-Net/README.md
index ef4e5447eb7..bbe5e145a5e 100644
--- a/model_zoo/research/cv/SE-Net/README.md
+++ b/model_zoo/research/cv/SE-Net/README.md
@@ -58,8 +58,8 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
 
 # [Environment Requirements](#contents)
 
-- Hardware（Ascend）
-    - Prepare hardware environment with Ascend processor.
+- Hardware（Ascend/GPU）
+    - Prepare hardware environment with Ascend or GPU processor.
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
@@ -88,6 +88,22 @@ export DEVICE_ID=0
 python eval.py --net=se-resnet50 --dataset=imagenet2012 --checkpoint_path=[CHECKPOINT_PATH] --dataset_path=[DATASET_PATH]
 ```
 
+- Running on GPU
+
+```bash
+# distributed training
+Usage:
+sh run_distribute_train_gpu.sh se-resnet50 imagenet2012 [DATASET_PATH]
+
+# standalone training
+Usage:
+sh run_standalone_train_gpu.sh se-resnet50 imagenet2012 [DATASET_PATH]
+
+# run evaluation example
+Usage:
+sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
 # [Script Description](#contents)
 
 ## [Script and Sample Code](#contents)
@@ -99,8 +115,11 @@ python eval.py --net=se-resnet50 --dataset=imagenet2012 --checkpoint_path=[CHECK
   ├── ascend310_infer                      # application for 310 inference
   ├── scripts
     ├── run_distribute_train.sh            # launch ascend distributed training(8 pcs)
+    ├── run_distribute_train_gpu.sh        # launch gpu distributed training(8 pcs)
     ├── run_eval.sh                        # launch ascend evaluation
+    ├── run_eval_gpu.sh                    # launch gpu evaluation
     ├── run_standalone_train.sh            # launch ascend standalone training(1 pcs)
+    ├── run_standalone_train_gpu.sh        # launch gpu standalone training(1 pcs)
     └─ run_infer_310.sh                    # shell script for 310inference on ascend
   ├── src
     ├── config.py                          # parameter configuration
@@ -159,6 +178,18 @@ export DEVICE_ID=0
 bash run_standalone_train.sh  se-resnet50  imagenet2012   /data/imagenet/train/
 ```
 
+#### Running on GPU
+
+```bash
+# distributed training
+Usage:
+sh run_distribute_train_gpu.sh se-resnet50 imagenet2012 [DATASET_PATH]
+
+# standalone training
+Usage:
+sh run_standalone_train_gpu.sh se-resnet50 imagenet2012 [DATASET_PATH]
+```
+
 For distributed training, a hccl configuration file with JSON format needs to be created in advance.
 
 Please follow the instructions in the link [hccn_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
@@ -195,6 +226,12 @@ export DEVICE_ID=0
 bash run_eval.sh /imagenet/val/  /path/to/resnet-90_625.ckpt
 ```
 
+#### Running on GPU
+
+```bash
+sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
 ### Result
 
 - Evaluating SE-ResNet50 with ImageNet2012 dataset
@@ -243,38 +280,38 @@ result: {'top_5_accuracy': 93.86%, 'top_1_accuracy': 77.80%}
 
 #### SE-ResNet50 on ImageNet2012
 
-| Parameters                 | Ascend 910
-| -------------------------- | ------------------------------------------------------------------------ |
-| Model Version              | SE-ResNet50                                               |
-| Resource                   | CentOs 8.2, Ascend 910，CPU 2.60GHz 192cores，Memory 755G  |
-| uploaded Date              | 03/19/2021 (month/day/year)                         |
-| MindSpore Version          | 0.7.0-alpha                                                 |
-| Dataset                    | ImageNet2012                                                |
-| Training Parameters        | epoch=90, steps per epoch=5004, batch_size = 256             |
-| Optimizer                  | Momentum                                              |
-| Loss Function              | Softmax Cross Entropy                                       |
-| outputs                    | probability                                                 |
-| Loss                       | 1.5931969                                                   |
-| Speed                      | # ms/step（8pcs）                     |
-| Total time                 | # mins                                                  |
-| Parameters (M)             | 285M                                                     |
-| Checkpoint for Fine tuning | # M (.ckpt file)                                         |
-| Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/SE-Net> |
+| Parameters                 | Ascend                                                     | GPU
+| -------------------------- | ---------------------------------------------------------- | ---------------------------------------------------------- |
+| Model Version              | SE-ResNet50                                                | SE-ResNet50                                                |
+| Resource                   | CentOs 8.2, Ascend 910，CPU 2.60GHz 192cores，Memory 755G  | V100-PCIE 32G                                              |
+| uploaded Date              | 03/19/2021 (month/day/year)                                | 07/14/2021 (month/day/year)                                |
+| MindSpore Version          | 0.7.0-alpha                                                | 1.3.0                                                      |
+| Dataset                    | ImageNet2012                                               | ImageNet2012                                               |
+| Training Parameters        | epoch=90, steps per epoch=5004, batch_size = 256           | epoch=90, steps per epoch=5004, batch_size = 256           |
+| Optimizer                  | Momentum                                                   | Momentum                                                   |
+| Loss Function              | Softmax Cross Entropy                                      | Softmax Cross Entropy                                      |
+| outputs                    | probability                                                | probability                                                |
+| Loss                       | 1.5931969                                                  | 1.6664593                                                  |
+| Speed                      | # ms/step（8pcs）                                          | 8pcs: 1016.9 ms/step                                       |
+| Total time                 | # mins                                                     | 8pcs: 15.9 hours                                           |
+| Parameters (M)             | 285M                                                       | 285M                                                       |
+| Checkpoint for Fine tuning | # M (.ckpt file)                                           | # M (.ckpt file)                                           |
+| Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/SE-Net> |<https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/SE-Net>                                 |
 
 ### Inference Performance
 
 #### SE-ResNet50 on ImageNet2012
 
-| Parameters          | Ascend                      |
-| ------------------- | --------------------------- |
-| Model Version       | SE-ResNet50                 |
-| Resource            | Ascend 910                  |
-| Uploaded Date       | 03/19/2021 (month/day/year) |
-| MindSpore Version   | 0.7.0-alpha                 |
-| Dataset             | ImageNet2012                |
-| batch_size          | 256                          |
-| Accuracy            | 77.74%                      |
-| Model for inference | # (.air file)            |
+| Parameters          | Ascend                      | GPU                         |
+| ------------------- | --------------------------- | --------------------------- |
+| Model Version       | SE-ResNet50                 | SE-ResNet50                 |
+| Resource            | Ascend 910                  | V100-PCIE 32G               |
+| Uploaded Date       | 03/19/2021 (month/day/year) | 07/14/2021 (month/day/year) |
+| MindSpore Version   | 0.7.0-alpha                 | 1.3.0                       |
+| Dataset             | ImageNet2012                | ImageNet2012                |
+| batch_size          | 256                         | 256                         |
+| Accuracy            | 77.74%                      | 77.66%                      |
+| Model for inference | # (.air file)               | ------                      |
 
 ### 310Inference Performance
 
diff --git a/model_zoo/research/cv/SE-Net/scripts/run_distribute_train_gpu.sh b/model_zoo/research/cv/SE-Net/scripts/run_distribute_train_gpu.sh
new file mode 100644
index 00000000000..e629f992d77
--- /dev/null
+++ b/model_zoo/research/cv/SE-Net/scripts/run_distribute_train_gpu.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+#Usage: sh run_distribute_train.sh [se-resnet50] [imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+
+ulimit -u unlimited
+export DEVICE_NUM=8
+export RANK_SIZE=8
+export NET=$1
+export DATASET=$2
+export DATASET_PATH=$3
+
+rm -rf ./train_parallel
+mkdir ./train_parallel
+cp ../*.py ./train_parallel
+cp *.sh ./train_parallel
+cp -r ../src ./train_parallel
+cd ./train_parallel || exit
+
+echo "start distributed training with $DEVICE_NUM GPUs."
+
+mpirun --allow-run-as-root -n $DEVICE_NUM \
+    python train.py \
+    --device_target="GPU" \
+    --net=$NET \
+    --dataset=$DATASET \
+    --run_distribute=True \
+    --device_num=$DEVICE_NUM \
+    --dataset_path=$DATASET_PATH > log 2>&1 &
diff --git a/model_zoo/research/cv/SE-Net/scripts/run_eval_gpu.sh b/model_zoo/research/cv/SE-Net/scripts/run_eval_gpu.sh
new file mode 100644
index 00000000000..1120c4d9ebb
--- /dev/null
+++ b/model_zoo/research/cv/SE-Net/scripts/run_eval_gpu.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+export DATA_PATH=$1
+export CKPT_PATH=$2
+
+if [ -d "eval" ];
+then
+    rm -rf ./eval
+fi
+mkdir ./eval
+cp ../*.py ./eval
+cp *.sh ./eval
+cp -r ../src ./eval
+cd ./eval || exit
+env > env.log
+echo "start evaluation for device $DEVICE_ID"
+
+python eval.py  --device_target="GPU" --dataset_path=$DATA_PATH --checkpoint_path=$CKPT_PATH  > log 2>&1 &
+
+cd ..
diff --git a/model_zoo/research/cv/SE-Net/scripts/run_standalone_train_gpu.sh b/model_zoo/research/cv/SE-Net/scripts/run_standalone_train_gpu.sh
new file mode 100644
index 00000000000..bf078df7039
--- /dev/null
+++ b/model_zoo/research/cv/SE-Net/scripts/run_standalone_train_gpu.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 3 ]
+then 
+    echo "Usage: sh run_standalone_train.sh [NET] [DATASET_NAME] [DATASET_PATH]"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+export NET=$1
+export DATASET=$2
+export DATASET_PATH=$3
+
+
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+
+mkdir ./train
+cp ../*.py ./train
+cp *.sh ./train
+cp -r ../src ./train
+cd ./train || exit
+echo "start training for GPU device $DEVICE_ID"
+env > env.log
+python train.py --device_target="GPU" --net=$NET --dataset=$DATASET --dataset_path=$DATASET_PATH > log 2>&1 &
+cd ..
diff --git a/model_zoo/research/cv/SE-Net/src/CrossEntropySmooth.py b/model_zoo/research/cv/SE-Net/src/CrossEntropySmooth.py
index 6d63b666946..8400b5413bf 100644
--- a/model_zoo/research/cv/SE-Net/src/CrossEntropySmooth.py
+++ b/model_zoo/research/cv/SE-Net/src/CrossEntropySmooth.py
@@ -16,12 +16,12 @@
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
-from mindspore.nn.loss.loss import _Loss
+from mindspore.nn.loss.loss import LossBase
 from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
-class CrossEntropySmooth(_Loss):
+class CrossEntropySmooth(LossBase):
     """CrossEntropy"""
     def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
         super(CrossEntropySmooth, self).__init__()
diff --git a/model_zoo/research/cv/SE-Net/src/config.py b/model_zoo/research/cv/SE-Net/src/config.py
index 28434a643b0..4817b51b219 100644
--- a/model_zoo/research/cv/SE-Net/src/config.py
+++ b/model_zoo/research/cv/SE-Net/src/config.py
@@ -27,7 +27,7 @@ config2 = ed({
     "pretrain_epoch_size": 0,
     "save_checkpoint": True,
     "save_checkpoint_epochs": 5,
-    "keep_checkpoint_max": 10,
+    "keep_checkpoint_max": 90,
     "save_checkpoint_path": "./",
     "warmup_epochs": 0,
     "lr_decay_mode": "linear",
diff --git a/model_zoo/research/cv/SE-Net/src/resnet.py b/model_zoo/research/cv/SE-Net/src/resnet.py
index bf14d2d5e49..7d7e80d6f42 100644
--- a/model_zoo/research/cv/SE-Net/src/resnet.py
+++ b/model_zoo/research/cv/SE-Net/src/resnet.py
@@ -116,7 +116,7 @@ class Se_ResidualBlock(nn.Cell):
 
         if self.down_sample:
             self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)])#use_se=self.use_se
-        self.add = P.TensorAdd()
+        self.add = P.Add()
         self.se = SELayer(out_channel, reduction)
 
     def construct(self, x):
diff --git a/model_zoo/research/cv/SE-Net/train.py b/model_zoo/research/cv/SE-Net/train.py
index 1b1613e9d39..eaaca3d4a65 100644
--- a/model_zoo/research/cv/SE-Net/train.py
+++ b/model_zoo/research/cv/SE-Net/train.py
@@ -24,7 +24,7 @@ from mindspore.context import ParallelMode
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-from mindspore.communication.management import init
+from mindspore.communication.management import init, get_rank, get_group_size
 from mindspore.common import set_seed
 from mindspore.parallel import set_algo_parameters
 import mindspore.nn as nn
@@ -67,12 +67,19 @@ if __name__ == '__main__':
             context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                               gradients_mean=True)
             set_algo_parameters(elementwise_op_strategy_follow=True)
-            if args_opt.net == "se-resnet50":
-                context.set_auto_parallel_context(all_reduce_fusion_config=[85, 160])
-            else:
-                context.set_auto_parallel_context(all_reduce_fusion_config=[180, 313])
             init()
-
+        elif target == "GPU":
+            init('nccl')
+            context.reset_auto_parallel_context()
+            rank = get_rank()
+            device_num = get_group_size()
+            context.set_auto_parallel_context(device_num=device_num,
+                                              parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              gradients_mean=True)
+        if args_opt.net == "se-resnet50":
+            context.set_auto_parallel_context(all_reduce_fusion_config=[85, 160])
+        else:
+            context.set_auto_parallel_context(all_reduce_fusion_config=[180, 313])
 
     # create dataset
     dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=1,
@@ -120,7 +127,7 @@ if __name__ == '__main__':
                     {'order_params': net.trainable_params()}]
     opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale)
     # define loss, model
-    if target == "Ascend":
+    if target in ["Ascend", "GPU"]:
         if args_opt.dataset == "imagenet2012":
             if not config.use_label_smooth:
                 config.label_smooth_factor = 0.0
@@ -137,6 +144,8 @@ if __name__ == '__main__':
     if config.save_checkpoint:
         config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
                                      keep_checkpoint_max=config.keep_checkpoint_max)
+        if target == "GPU" and args_opt.run_distribute:
+            ckpt_save_dir = os.path.join(config.save_checkpoint_path, "ckpt_" + str(rank) + "/")
         ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
         cb += [ckpt_cb]