!5499 Support manual convert to quantative network of resnet

Merge pull request !5499 from chenfei_mindspore/r0.7
2020-09-01 14:31:50 +08:00 · 2020-09-01 14:31:50 +08:00 · 26aba3b74c
parent 5120720bac 298393b66b
commit 26aba3b74c
6 changed files with 338 additions and 8 deletions
--- a/mindspore/train/quant/quant_utils.py
+++ b/mindspore/train/quant/quant_utils.py
@ -252,13 +252,14 @@ def without_fold_batchnorm(weight, cell_quant):
    return weight, bias


-def load_nonquant_param_into_quant_net(quant_model, params_dict):
+def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
    """
    load fp32 model parameters to quantization model.

    Args:
-        quant_model: quantization model
-        params_dict: f32 param
+        quant_model: quantization model.
+        params_dict: f32 param.
+        quant_new_params:parameters that exist in quantative network but not in unquantative network.

    Returns:
        None
@ -277,6 +278,8 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict):
    for name, param in quant_model.parameters_and_names():
        key_name = name.split(".")[-1]
        if key_name not in iterable_dict.keys():
+            if quant_new_params is not None and key_name in quant_new_params:
+                continue
            raise ValueError(f"Can't find match parameter in ckpt,param name = {name}")
        value_param = next(iterable_dict[key_name], None)
        if value_param is not None:
--- a/model_zoo/official/cv/mobilenetv2_quant/Readme.md
+++ b/model_zoo/official/cv/mobilenetv2_quant/Readme.md
@ -91,7 +91,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil

 You can start training using python or shell scripts. The usage of shell scripts as follows:

- Ascend: sh run_train_quant.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE] [DATASET_PATH] [CKPT_PATH]
+- Ascend: sh run_train_quant.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]

 ### Launch

--- a/model_zoo/official/cv/resnet50_quant/eval.py
+++ b/model_zoo/official/cv/resnet50_quant/eval.py
@ -20,7 +20,8 @@ import argparse
 from src.config import config_quant
 from src.dataset import create_dataset
 from src.crossentropy import CrossEntropy
-from models.resnet_quant import resnet50_quant
+#from models.resnet_quant import resnet50_quant #auto construct quantative network of resnet50
+from models.resnet_quant_manual import resnet50_quant #manually construct quantative network of resnet50

 from mindspore import context
 from mindspore.train.model import Model
--- a/model_zoo/official/cv/resnet50_quant/models/resnet_quant.py
+++ b/model_zoo/official/cv/resnet50_quant/models/resnet_quant.py
@ -209,7 +209,7 @@ class ResNet(nn.Cell):
        return out


-def resnet50_quant(class_num=10001):
+def resnet50_quant(class_num=10):
    """
    Get ResNet50 neural network.

--- a/model_zoo/official/cv/resnet50_quant/models/resnet_quant_manual.py
+++ b/model_zoo/official/cv/resnet50_quant/models/resnet_quant_manual.py
@ -0,0 +1,325 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ResNet."""
+import numpy as np
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore import Tensor
+from mindspore.nn import FakeQuantWithMinMax, Conv2dBnFoldQuant as Conv2dBatchNormQuant
+
+_ema_decay = 0.999
+_symmetric = True
+_fake = True
+_per_channel = True
+
+
+def _weight_variable(shape, factor=0.01):
+    init_value = np.random.randn(*shape).astype(np.float32) * factor
+    return Tensor(init_value)
+
+
+def _conv3x3(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 3, 3)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv1x1(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 1, 1)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _conv7x7(in_channel, out_channel, stride=1):
+    weight_shape = (out_channel, in_channel, 7, 7)
+    weight = _weight_variable(weight_shape)
+    return nn.Conv2d(in_channel, out_channel,
+                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+
+
+def _bn(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _bn_last(channel):
+    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
+                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _fc(in_channel, out_channel):
+    weight_shape = (out_channel, in_channel)
+    weight = _weight_variable(weight_shape)
+    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
+
+
+class ConvBNReLU(nn.Cell):
+    """
+    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
+
+    Args:
+        in_planes (int): Input channel.
+        out_planes (int): Output channel.
+        kernel_size (int): Input kernel size.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
+    """
+
+    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+        super(ConvBNReLU, self).__init__()
+        padding = (kernel_size - 1) // 2
+        conv = Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
+                                    group=groups, fake=_fake, per_channel=_per_channel, symmetric=_symmetric)
+        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
+        self.features = nn.SequentialCell(layers)
+
+    def construct(self, x):
+        output = self.features(x)
+        return output
+
+
+class ResidualBlock(nn.Cell):
+    """
+    ResNet V1 residual block definition.
+
+    Args:
+        in_channel (int): Input channel.
+        out_channel (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlock(3, 256, stride=2)
+    """
+    expansion = 4
+
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride=1):
+        super(ResidualBlock, self).__init__()
+
+        channel = out_channel // self.expansion
+        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
+        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
+        self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake, per_channel=_per_channel,
+                                                             symmetric=_symmetric,
+                                                             kernel_size=1, stride=1, pad_mode='same', padding=0),
+                                        FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay, symmetric=False)
+                                        ]) if _fake else Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
+                                                                              per_channel=_per_channel,
+                                                                              symmetric=_symmetric,
+                                                                              kernel_size=1, stride=1,
+                                                                              pad_mode='same', padding=0)
+
+        self.down_sample = False
+
+        if stride != 1 or in_channel != out_channel:
+            self.down_sample = True
+        self.down_sample_layer = None
+
+        if self.down_sample:
+            self.down_sample_layer = nn.SequentialCell([Conv2dBatchNormQuant(in_channel, out_channel,
+                                                                             per_channel=_per_channel,
+                                                                             symmetric=_symmetric,
+                                                                             kernel_size=1, stride=stride,
+                                                                             pad_mode='same', padding=0),
+                                                        FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay,
+                                                                            symmetric=False)
+                                                        ]) if _fake else Conv2dBatchNormQuant(in_channel, out_channel,
+                                                                                              fake=_fake,
+                                                                                              per_channel=_per_channel,
+                                                                                              symmetric=_symmetric,
+                                                                                              kernel_size=1,
+                                                                                              stride=stride,
+                                                                                              pad_mode='same',
+                                                                                              padding=0)
+        self.add = nn.TensorAddQuant()
+        self.relu = P.ReLU()
+
+    def construct(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.conv2(out)
+        out = self.conv3(out)
+
+        if self.down_sample:
+            identity = self.down_sample_layer(identity)
+
+        out = self.add(out, identity)
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Cell):
+    """
+    ResNet architecture.
+
+    Args:
+        block (Cell): Block for network.
+        layer_nums (list): Numbers of block in different layers.
+        in_channels (list): Input channel in each layer.
+        out_channels (list): Output channel in each layer.
+        strides (list):  Stride size in each layer.
+        num_classes (int): The number of classes that the training images are belonging to.
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResNet(ResidualBlock,
+        >>>        [3, 4, 6, 3],
+        >>>        [64, 256, 512, 1024],
+        >>>        [256, 512, 1024, 2048],
+        >>>        [1, 2, 2, 2],
+        >>>        10)
+    """
+
+    def __init__(self,
+                 block,
+                 layer_nums,
+                 in_channels,
+                 out_channels,
+                 strides,
+                 num_classes):
+        super(ResNet, self).__init__()
+
+        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
+            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
+
+        self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block,
+                                       layer_nums[0],
+                                       in_channel=in_channels[0],
+                                       out_channel=out_channels[0],
+                                       stride=strides[0])
+        self.layer2 = self._make_layer(block,
+                                       layer_nums[1],
+                                       in_channel=in_channels[1],
+                                       out_channel=out_channels[1],
+                                       stride=strides[1])
+        self.layer3 = self._make_layer(block,
+                                       layer_nums[2],
+                                       in_channel=in_channels[2],
+                                       out_channel=out_channels[2],
+                                       stride=strides[2])
+        self.layer4 = self._make_layer(block,
+                                       layer_nums[3],
+                                       in_channel=in_channels[3],
+                                       out_channel=out_channels[3],
+                                       stride=strides[3])
+
+        self.mean = P.ReduceMean(keep_dims=True)
+        self.flatten = nn.Flatten()
+        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, per_channel=_per_channel,
+                                       symmetric=_symmetric)
+        self.output_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)
+
+    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
+        """
+        Make stage network of ResNet.
+
+        Args:
+            block (Cell): Resnet block.
+            layer_num (int): Layer number.
+            in_channel (int): Input channel.
+            out_channel (int): Output channel.
+            stride (int): Stride size for the first convolutional layer.
+
+        Returns:
+            SequentialCell, the output layer.
+
+        Examples:
+            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
+        """
+        layers = []
+
+        resnet_block = block(in_channel, out_channel, stride=stride)
+        layers.append(resnet_block)
+
+        for _ in range(1, layer_num):
+            resnet_block = block(out_channel, out_channel, stride=1)
+            layers.append(resnet_block)
+
+        return nn.SequentialCell(layers)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        c1 = self.maxpool(x)
+
+        c2 = self.layer1(c1)
+        c3 = self.layer2(c2)
+        c4 = self.layer3(c3)
+        c5 = self.layer4(c4)
+
+        out = self.mean(c5, (2, 3))
+        out = self.flatten(out)
+        out = self.end_point(out)
+        out = self.output_fake(out)
+        return out
+
+
+def resnet50_quant(class_num=10):
+    """
+    Get ResNet50 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet50 neural network.
+
+    Examples:
+        >>> net = resnet50_quant(10)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 6, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
+
+
+def resnet101_quant(class_num=1001):
+    """
+    Get ResNet101 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet101 neural network.
+
+    Examples:
+        >>> net = resnet101(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 23, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
--- a/model_zoo/official/cv/resnet50_quant/train.py
+++ b/model_zoo/official/cv/resnet50_quant/train.py
@ -31,7 +31,8 @@ from mindspore.communication.management import init
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init

-from models.resnet_quant import resnet50_quant
+#from models.resnet_quant import resnet50_quant #auto construct quantative network of resnet50
+from models.resnet_quant_manual import resnet50_quant #manually construct quantative network of resnet50
 from src.dataset import create_dataset
 from src.lr_generator import get_lr
 from src.config import config_quant
@ -85,7 +86,7 @@ if __name__ == '__main__':
    # weight init and load checkpoint file
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
-        load_nonquant_param_into_quant_net(net, param_dict)
+        load_nonquant_param_into_quant_net(net, param_dict, ['step'])
        epoch_size = config.epoch_size - config.pretrained_epoch_size
    else:
        for _, cell in net.cells_and_names():