test thor r1.6

2022-02-15 11:29:33 +08:00 · 2022-02-15 11:29:33 +08:00 · b0a0fb68a6
parent 96b030a1c0
commit b0a0fb68a6
6 changed files with 453 additions and 185 deletions
--- a/tests/st/networks/models/resnet50/src_thor/config.py
+++ b/tests/st/networks/models/resnet50/src_thor/config.py
@ -35,5 +35,8 @@ config = ed({
    "label_smooth_factor": 0.1,
    "frequency": 834,
    "eval_interval": 1,
-    "eval_batch_size": 32
+    "eval_batch_size": 32,
+    "train_image_size": 224,
+    "eval_image_size": 224,
+    "device_target": "Ascend"
 })
--- a/tests/st/networks/models/resnet50/src_thor/dataset.py
+++ b/tests/st/networks/models/resnet50/src_thor/dataset.py
@ -12,72 +12,172 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
-"""create train or eval dataset."""
-
-import os
-import mindspore.common.dtype as mstype
+"""
+create train or eval dataset.
+"""
+import multiprocessing
+import mindspore as ms
 import mindspore.dataset as ds
-import mindspore.dataset.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
+from mindspore.communication.management import init, get_rank, get_group_size


-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
+def create_dataset1(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224,
+                    target="Ascend", distribute=False, enable_cache=False, cache_session_id=None):
    """
-    create a train or eval dataset.
+    create a train or evaluate cifar10 dataset for resnet50
+    Args:
+        dataset_path(string): the path of dataset.
+        do_train(bool): whether dataset is used for train or eval.
+        repeat_num(int): the repeat times of dataset. Default: 1
+        batch_size(int): the batch size of dataset. Default: 32
+        target(str): the device target. Default: Ascend
+        distribute(bool): data for distribute or not. Default: False
+        enable_cache(bool): whether tensor caching service is used for eval. Default: False
+        cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None
+
+    Returns:
+        dataset
+    """
+    device_num, rank_id = _get_rank_info(distribute)
+    ds.config.set_prefetch_size(64)
+    if device_num == 1:
+        data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True)
+    else:
+        data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True,
+                                     num_shards=device_num, shard_id=rank_id)
+
+    # define map operations
+    trans = []
+    if do_train:
+        trans += [
+            ds.vision.c_transforms.RandomCrop((32, 32), (4, 4, 4, 4)),
+            ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5)
+        ]
+
+    trans += [
+        ds.vision.c_transforms.Resize((train_image_size, train_image_size)),
+        ds.vision.c_transforms.Rescale(1.0 / 255.0, 0.0),
+        ds.vision.c_transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
+        ds.vision.c_transforms.HWC2CHW()
+    ]
+
+    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32)
+
+    data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                            num_parallel_workers=get_num_parallel_workers(8))
+    # only enable cache for eval
+    if do_train:
+        enable_cache = False
+    if enable_cache:
+        if not cache_session_id:
+            raise ValueError("A cache session_id must be provided to use cache.")
+        eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0)
+        data_set = data_set.map(operations=trans, input_columns="image",
+                                num_parallel_workers=get_num_parallel_workers(8), cache=eval_cache)
+    else:
+        data_set = data_set.map(operations=trans, input_columns="image",
+                                num_parallel_workers=get_num_parallel_workers(8))
+
+    # apply batch operations
+    data_set = data_set.batch(batch_size, drop_remainder=True)
+
+    return data_set
+
+
+def create_dataset2(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224,
+                    target="Ascend", distribute=False, enable_cache=False, cache_session_id=None):
+    """
+    create a train or eval imagenet2012 dataset for resnet50

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
+        target(str): the device target. Default: Ascend
+        distribute(bool): data for distribute or not. Default: False
+        enable_cache(bool): whether tensor caching service is used for eval. Default: False
+        cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None

    Returns:
        dataset
    """
+    device_num, rank_id = _get_rank_info(distribute)

-    device_num = int(os.getenv("RANK_SIZE"))
-    rank_id = int(os.getenv("RANK_ID"))
-    if do_train:
-        if device_num == 1:
-            data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=16, shuffle=True)
-        else:
-            data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
-                                             num_shards=device_num, shard_id=rank_id)
+    ds.config.set_prefetch_size(64)
+    if device_num == 1:
+        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True)
    else:
-        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=False,
+        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True,
                                         num_shards=device_num, shard_id=rank_id)

-    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]

    # define map operations
    if do_train:
        trans = [
-            C.Decode(),
-            C.Resize((256, 256)),
-            C.CenterCrop(image_size),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
+            ds.vision.c_transforms.RandomCropDecodeResize(train_image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
+            ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5)
        ]
    else:
        trans = [
-            C.Decode(),
-            C.Resize((256, 256)),
-            C.CenterCrop(image_size),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
+            ds.vision.c_transforms.Decode(),
+            ds.vision.c_transforms.Resize(256),
+            ds.vision.c_transforms.CenterCrop(eval_image_size)
        ]
+    trans_norm = [ds.vision.c_transforms.Normalize(mean=mean, std=std), ds.vision.c_transforms.HWC2CHW()]

-    type_cast_op = C2.TypeCast(mstype.int32)
+    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32)

-    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=24)
-    data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
+    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=get_num_parallel_workers(12))
+    data_set = data_set.map(operations=trans_norm, input_columns="image",
+                            num_parallel_workers=get_num_parallel_workers(12))
+    # only enable cache for eval
+    if do_train:
+        enable_cache = False
+    if enable_cache:
+        if not cache_session_id:
+            raise ValueError("A cache session_id must be provided to use cache.")
+        eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0)
+        data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                                num_parallel_workers=get_num_parallel_workers(12),
+                                cache=eval_cache)
+    else:
+        data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                                num_parallel_workers=get_num_parallel_workers(12))

-    # apply batch operations
+    # apply batch operationsif
    data_set = data_set.batch(batch_size, drop_remainder=True)

-    # apply dataset repeat operation
-    data_set = data_set.repeat(repeat_num)
    return data_set
+
+
+def _get_rank_info(distribute):
+    """
+    get rank size and rank id
+    """
+    if distribute:
+        init()
+        rank_id = get_rank()
+        device_num = get_group_size()
+    else:
+        rank_id = 0
+        device_num = 1
+    return device_num, rank_id
+
+
+def get_num_parallel_workers(num_parallel_workers):
+    """
+    Get num_parallel_workers used in dataset operations.
+    If num_parallel_workers > the real CPU cores number, set num_parallel_workers = the real CPU cores number.
+    """
+    cores = multiprocessing.cpu_count()
+    if isinstance(num_parallel_workers, int):
+        if cores < num_parallel_workers:
+            print("The num_parallel_workers {} is set too large, now set it {}".format(num_parallel_workers, cores))
+            num_parallel_workers = cores
+    else:
+        print("The num_parallel_workers {} is invalid, now set it {}".format(num_parallel_workers, min(cores, 8)))
+        num_parallel_workers = min(cores, 8)
+    return num_parallel_workers
--- a/tests/st/networks/models/resnet50/src_thor/model_thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/model_thor.py
@ -574,7 +574,7 @@ class Model:
            >>> model.train(2, dataset)
        """
        repeat_count = train_dataset.get_repeat_count()
-        if epoch != repeat_count and dataset_sink_mode is True:
+        if epoch != repeat_count and dataset_sink_mode:
            logger.warning(f"The epoch_size {epoch} is not the same with dataset repeat_count {repeat_count}")
        dataset_sink_mode = Validator.check_bool(dataset_sink_mode)
        _device_number_check(self._parallel_mode, self._device_number)
--- a/tests/st/networks/models/resnet50/src_thor/resnet.py
+++ b/tests/st/networks/models/resnet50/src_thor/resnet.py
@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -13,61 +13,149 @@
 # limitations under the License.
 # ============================================================================
 """ResNet."""
+import math
 import numpy as np
 from scipy.stats import truncnorm
 import mindspore.nn as nn
+import mindspore.ops as ops
 import mindspore.common.dtype as mstype
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
 from mindspore.common.tensor import Tensor
+from src.model_utils.config import config


-def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
+def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
    fan_in = in_channel * kernel_size * kernel_size
    scale = 1.0
    scale /= max(1., fan_in)
    stddev = (scale ** 0.5) / .87962566103423978
+    if config.net_name == "resnet152":
+        stddev = (scale ** 0.5)
    mu, sigma = 0, stddev
    weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size)
    weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size))
    return Tensor(weight, dtype=mstype.float32)

+
 def _weight_variable(shape, factor=0.01):
    init_value = np.random.randn(*shape).astype(np.float32) * factor
    return Tensor(init_value)


-def _conv3x3(in_channel, out_channel, stride=1, use_se=False):
+def calculate_gain(nonlinearity, param=None):
+    """calculate_gain"""
+    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    res = 0
+    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
+        res = 1
+    elif nonlinearity == 'tanh':
+        res = 5.0 / 3
+    elif nonlinearity == 'relu':
+        res = math.sqrt(2.0)
+    elif nonlinearity == 'leaky_relu':
+        if param is None:
+            neg_slope = 0.01
+        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
+            neg_slope = param
+        else:
+            raise ValueError("neg_slope {} not a valid number".format(param))
+        res = math.sqrt(2.0 / (1 + neg_slope ** 2))
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+    return res
+
+
+def _calculate_fan_in_and_fan_out(tensor):
+    """_calculate_fan_in_and_fan_out"""
+    dimensions = len(tensor)
+    if dimensions < 2:
+        raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
+    if dimensions == 2:  # Linear
+        fan_in = tensor[1]
+        fan_out = tensor[0]
+    else:
+        num_input_fmaps = tensor[1]
+        num_output_fmaps = tensor[0]
+        receptive_field_size = 1
+        if dimensions > 2:
+            receptive_field_size = tensor[2] * tensor[3]
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+    return fan_in, fan_out
+
+
+def _calculate_correct_fan(tensor, mode):
+    mode = mode.lower()
+    valid_modes = ['fan_in', 'fan_out']
+    if mode not in valid_modes:
+        raise ValueError("Unsupported mode {}, please use one of {}".format(mode, valid_modes))
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
+    return fan_in if mode == 'fan_in' else fan_out
+
+
+def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    fan = _calculate_correct_fan(inputs_shape, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    return np.random.normal(0, std, size=inputs_shape).astype(np.float32)
+
+
+def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'):
+    fan = _calculate_correct_fan(inputs_shape, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+    return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32)
+
+
+def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
    if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
    else:
        weight_shape = (out_channel, in_channel, 3, 3)
-        weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
+                         padding=1, pad_mode='pad', weight_init=weight)
+    return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
+                     padding=0, pad_mode='same', weight_init=weight)


-def _conv1x1(in_channel, out_channel, stride=1, use_se=False):
+def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
    if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
    else:
        weight_shape = (out_channel, in_channel, 1, 1)
-        weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
+                         padding=0, pad_mode='pad', weight_init=weight)
+    return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
+                     padding=0, pad_mode='same', weight_init=weight)


-def _conv7x7(in_channel, out_channel, stride=1, use_se=False):
+def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False):
    if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
    else:
        weight_shape = (out_channel, in_channel, 7, 7)
-        weight = _weight_variable(weight_shape)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel,
+                         kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight)
    return nn.Conv2d(in_channel, out_channel,
                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)


-def _bn(channel):
+def _bn(channel, res_base=False):
+    if res_base:
+        return nn.BatchNorm2d(channel, eps=1e-5, momentum=0.1,
+                              gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)

@ -79,11 +167,13 @@ def _bn_last(channel):

 def _fc(in_channel, out_channel, use_se=False):
    if use_se:
-        weight = np.random.normal(loc=0, scale=0.01, size=out_channel*in_channel)
+        weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel)
        weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32)
    else:
        weight_shape = (out_channel, in_channel)
-        weight = _weight_variable(weight_shape)
+        weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5)))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)


@ -95,8 +185,8 @@ class ResidualBlock(nn.Cell):
        in_channel (int): Input channel.
        out_channel (int): Output channel.
        stride (int): Stride size for the first convolutional layer. Default: 1.
-        use_se (bool): enable SE-ResNet50 net. Default: False.
-        se_block(bool): use se block in SE-ResNet50 net. Default: False.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net. Default: False.

    Returns:
        Tensor, output tensor.
@ -126,13 +216,15 @@ class ResidualBlock(nn.Cell):
            self.bn2 = _bn(channel)

        self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se)
-        self.bn3 = _bn_last(out_channel)
+        self.bn3 = _bn(out_channel)
+        if config.optimizer == "Thor" or config.net_name == "resnet152":
+            self.bn3 = _bn_last(out_channel)
        if self.se_block:
-            self.se_global_pool = P.ReduceMean(keep_dims=False)
-            self.se_dense_0 = _fc(out_channel, int(out_channel/4), use_se=self.use_se)
-            self.se_dense_1 = _fc(int(out_channel/4), out_channel, use_se=self.use_se)
+            self.se_global_pool = ops.ReduceMean(keep_dims=False)
+            self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se)
+            self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se)
            self.se_sigmoid = nn.Sigmoid()
-            self.se_mul = P.Mul()
+            self.se_mul = ops.Mul()
        self.relu = nn.ReLU()

        self.down_sample = False
@ -153,7 +245,6 @@ class ResidualBlock(nn.Cell):
            else:
                self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
                                                                     use_se=self.use_se), _bn(out_channel)])
-        self.add = P.Add()

    def construct(self, x):
        identity = x
@ -176,13 +267,76 @@ class ResidualBlock(nn.Cell):
            out = self.relu(out)
            out = self.se_dense_1(out)
            out = self.se_sigmoid(out)
-            out = F.reshape(out, F.shape(out) + (1, 1))
+            out = ops.reshape(out, ops.shape(out) + (1, 1))
            out = self.se_mul(out, out_se)

        if self.down_sample:
            identity = self.down_sample_layer(identity)

-        out = self.add(out, identity)
+        out = out + identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResidualBlockBase(nn.Cell):
+    """
+    ResNet V1 residual block definition.
+
+    Args:
+        in_channel (int): Input channel.
+        out_channel (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net. Default: False.
+        res_base (bool): Enable parameter setting of resnet18. Default: True.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlockBase(3, 256, stride=2)
+    """
+
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride=1,
+                 use_se=False,
+                 se_block=False,
+                 res_base=True):
+        super(ResidualBlockBase, self).__init__()
+        self.res_base = res_base
+        self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base)
+        self.bn1d = _bn(out_channel)
+        self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base)
+        self.bn2d = _bn(out_channel)
+        self.relu = nn.ReLU()
+
+        self.down_sample = False
+        if stride != 1 or in_channel != out_channel:
+            self.down_sample = True
+
+        self.down_sample_layer = None
+        if self.down_sample:
+            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
+                                                                 use_se=use_se, res_base=self.res_base),
+                                                        _bn(out_channel, res_base)])
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1d(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2d(out)
+
+        if self.down_sample:
+            identity = self.down_sample_layer(identity)
+
+        out = out + identity
        out = self.relu(out)

        return out
@ -199,8 +353,10 @@ class ResNet(nn.Cell):
        out_channels (list): Output channel in each layer.
        strides (list):  Stride size in each layer.
        num_classes (int): The number of classes that the training images are belonging to.
-        use_se (bool): enable SE-ResNet50 net. Default: False.
-        se_block(bool): use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False.
+        res_base (bool): Enable parameter setting of resnet18. Default: False.
+
    Returns:
        Tensor, output tensor.

@ -220,27 +376,26 @@ class ResNet(nn.Cell):
                 out_channels,
                 strides,
                 num_classes,
-                 use_se=False):
+                 use_se=False,
+                 res_base=False):
        super(ResNet, self).__init__()

        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
        self.use_se = use_se
+        self.res_base = res_base
        self.se_block = False
-        if self.use_se:
-            self.se_block = True

-        if self.use_se:
-            self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se)
-            self.bn1_0 = _bn(32)
-            self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se)
-            self.bn1_1 = _bn(32)
-            self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se)
+        self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base)
+        self.bn1 = _bn(64, self.res_base)
+        self.relu = ops.ReLU()
+
+        if self.res_base:
+            self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)))
+            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid")
        else:
-            self.conv1 = _conv7x7(3, 64, stride=2)
-        self.bn1 = _bn(64)
-        self.relu = P.ReLU()
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
@ -268,7 +423,7 @@ class ResNet(nn.Cell):
                                       use_se=self.use_se,
                                       se_block=self.se_block)

-        self.mean = P.ReduceMean(keep_dims=True)
+        self.mean = ops.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = _fc(out_channels[3], num_classes, use_se=self.use_se)

@ -282,7 +437,7 @@ class ResNet(nn.Cell):
            in_channel (int): Input channel.
            out_channel (int): Output channel.
            stride (int): Stride size for the first convolutional layer.
-            se_block(bool): use se block in SE-ResNet50 net. Default: False.
+            se_block(bool): Use se block in SE-ResNet50 net. Default: False.
        Returns:
            SequentialCell, the output layer.

@ -318,6 +473,8 @@ class ResNet(nn.Cell):
            x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
+        if self.res_base:
+            x = self.pad(x)
        c1 = self.maxpool(x)

        c2 = self.layer1(c1)
@ -332,6 +489,50 @@ class ResNet(nn.Cell):
        return out


+def resnet18(class_num=10):
+    """
+    Get ResNet18 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet18 neural network.
+
+    Examples:
+        >>> net = resnet18(10)
+    """
+    return ResNet(ResidualBlockBase,
+                  [2, 2, 2, 2],
+                  [64, 64, 128, 256],
+                  [64, 128, 256, 512],
+                  [1, 2, 2, 2],
+                  class_num,
+                  res_base=True)
+
+
+def resnet34(class_num=10):
+    """
+    Get ResNet34 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet34 neural network.
+
+    Examples:
+        >>> net = resnet18(10)
+    """
+    return ResNet(ResidualBlockBase,
+                  [3, 4, 6, 3],
+                  [64, 64, 128, 256],
+                  [64, 128, 256, 512],
+                  [1, 2, 2, 2],
+                  class_num,
+                  res_base=True)
+
+
 def resnet50(class_num=10):
    """
    Get ResNet50 neural network.
@ -352,6 +553,7 @@ def resnet50(class_num=10):
                  [1, 2, 2, 2],
                  class_num)

+
 def se_resnet50(class_num=1001):
    """
    Get SE-ResNet50 neural network.
@ -373,6 +575,7 @@ def se_resnet50(class_num=1001):
                  class_num,
                  use_se=True)

+
 def resnet101(class_num=1001):
    """
    Get ResNet101 neural network.
@ -392,3 +595,24 @@ def resnet101(class_num=1001):
                  [256, 512, 1024, 2048],
                  [1, 2, 2, 2],
                  class_num)
+
+
+def resnet152(class_num=1001):
+    """
+    Get ResNet152 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet152 neural network.
+
+    Examples:
+        # >>> net = resnet152(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 8, 36, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
--- a/tests/st/networks/models/resnet50/src_thor/thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/thor.py
@ -37,7 +37,6 @@ Embedding = 3
 LayerNorm = 4
 BatchNorm = 5

-
 _momentum_opt = C.MultitypeFuncGraph("momentum_opt")

 op_add = P.AddN()
@ -59,6 +58,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment):
    success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum))
    return success

+
 C0 = 16


@ -122,11 +122,13 @@ def find_net_layertype_recur(net, layertype_map):
        else:
            find_net_layertype_recur(subcell, layertype_map)

+
 def get_net_layertype_mask(net):
    layertype_map = []
    find_net_layertype_recur(net, layertype_map)
    return layertype_map

+
 def get_layer_counter(layer_type, layer_counter, params, idx):
    """get layer counter"""
    if layer_type in [Conv, FC, LayerNorm, BatchNorm]:
@ -247,7 +249,6 @@ class THOR_Ascend(Optimizer):
            self.grad_reducer_A = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=6)
            self.grad_reducer_G = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=8)

-
    def _process_matrix_init_and_weight_idx_map(self, net):
        """process matrix init shape, and get weight idx map"""
        layer_type_map = get_net_layertype_mask(net)
--- a/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py
@ -26,18 +26,17 @@ from mindspore.common.tensor import Tensor
 from mindspore.communication.management import init
 from mindspore.context import ParallelMode
 from mindspore.train.callback import Callback
+from mindspore.train.model import Model
+from mindspore.train.train_thor import ConvertModelUtils
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.nn.optim import thor
 import mindspore.dataset as ds

-from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell
 from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth
 from tests.st.networks.models.resnet50.src_thor.config import config as thor_config
-from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset as create_dataset_thor
-from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model
-from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor
-
+from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset2 as create_dataset_thor
+from tests.st.networks.models.resnet50.src.resnet import resnet50

 MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
 dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train"
@ -89,11 +88,12 @@ class LossGet(Callback):
        self._per_print_times = per_print_times
        self._loss = 0.0
        self.data_size = data_size
+        self._epoch = 0

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        loss = cb_params.net_outputs
-
+        self._epoch = cb_params.cur_epoch_num
        if isinstance(loss, (tuple, list)):
            if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
                loss = loss[0]
@ -106,8 +106,11 @@ class LossGet(Callback):
        if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
            raise ValueError("epoch: {} step: {}. Invalid loss, terminating training."
                             .format(cb_params.cur_epoch_num, cur_step_in_epoch))
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
        if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
            self._loss = loss
+            print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num,
+                                                      cur_step_in_epoch, loss), flush=True)

    def epoch_begin(self, run_context):
        self.epoch_time = time.time()
@ -122,6 +125,9 @@ class LossGet(Callback):
    def get_per_step_time(self):
        return self._per_step_mseconds

+    def get_epoch(self):
+        return self._epoch
+

 def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
@ -137,7 +143,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
        init()

    # network
-    net = resnet50_thor(thor_config.class_num)
+    net = resnet50(thor_config.class_num)

    if not thor_config.label_smooth:
        thor_config.label_smooth_factor = 0.0
@ -148,14 +154,10 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):

    # train dataset
    dataset = create_dataset_thor(dataset_path=dataset_path, do_train=True,
-                                  repeat_num=1, batch_size=thor_config.batch_size)
-
+                                  batch_size=thor_config.batch_size, train_image_size=thor_config.train_image_size,
+                                  eval_image_size=thor_config.eval_image_size, target="Ascend",
+                                  distribute=True)
    step_size = dataset.get_dataset_size()
-    eval_interval = thor_config.eval_interval
-
-    # evaluation dataset
-    eval_dataset = create_dataset(dataset_path=eval_path, do_train=False,
-                                  repeat_num=1, batch_size=thor_config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(thor_config.loss_scale, drop_overflow_update=False)
@ -171,90 +173,30 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)
    # model
-    model = THOR_Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level="O2",
-                       keep_batchnorm_fp32=False,
-                       metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)},
-                       eval_network=dist_eval_network, frequency=thor_config.frequency)
+    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale,
+                  metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)},
+                  amp_level="O2", keep_batchnorm_fp32=False,
+                  eval_network=dist_eval_network)

-    # model init
-    print("init_start", device_id)
-    model.init(dataset, eval_dataset)
-    print("init_stop", device_id)
+    model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt,
+                                                      loss_scale_manager=loss_scale, metrics={'acc'},
+                                                      amp_level="O2", keep_batchnorm_fp32=False)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
-    acc = 0.0
-    time_cost = 0.0
    print("run_start", device_id)
-    for epoch_idx in range(0, int(epoch_size / eval_interval)):
-        model.train(eval_interval, dataset, callbacks=loss_cb)
-        eval_start = time.time()
-        output = model.eval(eval_dataset)
-        eval_cost = (time.time() - eval_start) * 1000
-        acc = float(output["acc"])
-        time_cost = loss_cb.get_per_step_time()
-        loss = loss_cb.get_loss()
-        print("the {} epoch's resnet result:\n "
-              "device{}, training loss {}, acc {}, "
-              "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms".format(
-                  epoch_idx, device_id, loss, acc, time_cost, eval_cost, time_cost * step_size + eval_cost))
-    q.put({'acc': acc, 'cost': time_cost})
-
-
-def test_resnet_thor_imagenet_8p_0():
-    """
-    Feature: Resnet50 thor network
-    Description: Train and evaluate resnet50 thor network on imagenet dataset
-    Expectation: accuracy > 0.28, time cost < 25.
-    """
-    context.set_context(enable_graph_kernel=False, enable_sparse=False)
-    context.reset_auto_parallel_context()
-    context.reset_ps_context()
-
-    q = Queue()
-
-    # resnet50_thor
-    device_num = 8
-    epoch_size = 1
-    enable_hccl = True
-    process = []
-    for i in range(device_num):
-        device_id = i
-        process.append(Process(target=train_process_thor,
-                               args=(q, device_id, epoch_size, device_num, enable_hccl)))
-
-    cpu_count = os.cpu_count()
-    each_cpu_count = cpu_count // device_num
-    for i in range(device_num):
-        process[i].start()
-        if each_cpu_count > 1:
-            cpu_start = each_cpu_count * i
-            cpu_end = each_cpu_count * (i + 1)
-            process_cpu = [x for x in range(cpu_start, cpu_end)]
-            pid = process[i].pid
-            os.sched_setaffinity(pid, set(process_cpu))
-
-    print("Waiting for all subprocesses done...")
-
-    for i in range(device_num):
-        process[i].join()
-
-    # THOR
-    thor_acc = 0.0
-    thor_cost = 0.0
-    for i in range(device_num):
-        output = q.get()
-        thor_acc += output['acc']
-        thor_cost += output['cost']
-    thor_acc = thor_acc / device_num
-    thor_cost = thor_cost / device_num
-
-    for i in range(0, device_num):
-        os.system("rm -rf " + str(i))
-    print("End training...")
-    assert thor_acc > 0.25
+    model.train(2, dataset, callbacks=loss_cb,
+                sink_size=dataset.get_dataset_size(), dataset_sink_mode=True)
+    time_cost = loss_cb.get_per_step_time()
+    loss = loss_cb.get_loss()
+    epoch_idx = loss_cb.get_epoch()
+    print("the {} epoch's resnet result:\n "
+          "device{}, training loss {}, "
+          "training per step cost {:.2f} ms, total_cost {:.2f} ms".format(epoch_idx, device_id,
+                                                                          loss, time_cost, time_cost * step_size))
+    q.put({'loss': loss, 'cost': time_cost})


@pytest.mark.level1
@ -275,7 +217,7 @@ def test_resnet_thor_imagenet_8p_1():

    # resnet50_thor
    device_num = 8
-    epoch_size = 1
+    epoch_size = 2
    enable_hccl = True
    process = []
    for i in range(device_num):
@ -300,19 +242,17 @@ def test_resnet_thor_imagenet_8p_1():
        process[i].join()

    # THOR
-    thor_acc = 0.0
+    thor_loss = 0.0
    thor_cost = 0.0
    for i in range(device_num):
        output = q.get()
-        thor_acc += output['acc']
+        thor_loss += output['loss']
        thor_cost += output['cost']
-    thor_acc = thor_acc / device_num
+    thor_loss = thor_loss / device_num
    thor_cost = thor_cost / device_num

    for i in range(0, device_num):
        os.system("rm -rf " + str(i))
    print("End training...")
-    print('thor acc: ', thor_acc)
-    print('thor cost: ', thor_cost)
-    #assert thor_acc > 0.25
-    #assert thor_cost < 30
+    assert thor_loss < 7
+    assert thor_cost < 30