diff --git a/tests/st/networks/models/resnet50/src_thor/config.py b/tests/st/networks/models/resnet50/src_thor/config.py
index 17aaa8a0c07..60ff57468ce 100644
--- a/tests/st/networks/models/resnet50/src_thor/config.py
+++ b/tests/st/networks/models/resnet50/src_thor/config.py
@@ -35,5 +35,8 @@ config = ed({
     "label_smooth_factor": 0.1,
     "frequency": 834,
     "eval_interval": 1,
-    "eval_batch_size": 32
+    "eval_batch_size": 32,
+    "train_image_size": 224,
+    "eval_image_size": 224,
+    "device_target": "Ascend"
 })
diff --git a/tests/st/networks/models/resnet50/src_thor/dataset.py b/tests/st/networks/models/resnet50/src_thor/dataset.py
index 21cd223c0ad..0f7c3adbfc1 100644
--- a/tests/st/networks/models/resnet50/src_thor/dataset.py
+++ b/tests/st/networks/models/resnet50/src_thor/dataset.py
@@ -12,72 +12,172 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
-"""create train or eval dataset."""
-
-import os
-import mindspore.common.dtype as mstype
+"""
+create train or eval dataset.
+"""
+import multiprocessing
+import mindspore as ms
 import mindspore.dataset as ds
-import mindspore.dataset.vision.c_transforms as C
-import mindspore.dataset.transforms.c_transforms as C2
+from mindspore.communication.management import init, get_rank, get_group_size
 
 
-def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
+def create_dataset1(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224,
+                    target="Ascend", distribute=False, enable_cache=False, cache_session_id=None):
     """
-    create a train or eval dataset.
+    create a train or evaluate cifar10 dataset for resnet50
+    Args:
+        dataset_path(string): the path of dataset.
+        do_train(bool): whether dataset is used for train or eval.
+        repeat_num(int): the repeat times of dataset. Default: 1
+        batch_size(int): the batch size of dataset. Default: 32
+        target(str): the device target. Default: Ascend
+        distribute(bool): data for distribute or not. Default: False
+        enable_cache(bool): whether tensor caching service is used for eval. Default: False
+        cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None
+
+    Returns:
+        dataset
+    """
+    device_num, rank_id = _get_rank_info(distribute)
+    ds.config.set_prefetch_size(64)
+    if device_num == 1:
+        data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True)
+    else:
+        data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True,
+                                     num_shards=device_num, shard_id=rank_id)
+
+    # define map operations
+    trans = []
+    if do_train:
+        trans += [
+            ds.vision.c_transforms.RandomCrop((32, 32), (4, 4, 4, 4)),
+            ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5)
+        ]
+
+    trans += [
+        ds.vision.c_transforms.Resize((train_image_size, train_image_size)),
+        ds.vision.c_transforms.Rescale(1.0 / 255.0, 0.0),
+        ds.vision.c_transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
+        ds.vision.c_transforms.HWC2CHW()
+    ]
+
+    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32)
+
+    data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                            num_parallel_workers=get_num_parallel_workers(8))
+    # only enable cache for eval
+    if do_train:
+        enable_cache = False
+    if enable_cache:
+        if not cache_session_id:
+            raise ValueError("A cache session_id must be provided to use cache.")
+        eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0)
+        data_set = data_set.map(operations=trans, input_columns="image",
+                                num_parallel_workers=get_num_parallel_workers(8), cache=eval_cache)
+    else:
+        data_set = data_set.map(operations=trans, input_columns="image",
+                                num_parallel_workers=get_num_parallel_workers(8))
+
+    # apply batch operations
+    data_set = data_set.batch(batch_size, drop_remainder=True)
+
+    return data_set
+
+
+def create_dataset2(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224,
+                    target="Ascend", distribute=False, enable_cache=False, cache_session_id=None):
+    """
+    create a train or eval imagenet2012 dataset for resnet50
 
     Args:
         dataset_path(string): the path of dataset.
         do_train(bool): whether dataset is used for train or eval.
         repeat_num(int): the repeat times of dataset. Default: 1
         batch_size(int): the batch size of dataset. Default: 32
+        target(str): the device target. Default: Ascend
+        distribute(bool): data for distribute or not. Default: False
+        enable_cache(bool): whether tensor caching service is used for eval. Default: False
+        cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None
 
     Returns:
         dataset
     """
+    device_num, rank_id = _get_rank_info(distribute)
 
-    device_num = int(os.getenv("RANK_SIZE"))
-    rank_id = int(os.getenv("RANK_ID"))
-    if do_train:
-        if device_num == 1:
-            data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=16, shuffle=True)
-        else:
-            data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
-                                             num_shards=device_num, shard_id=rank_id)
+    ds.config.set_prefetch_size(64)
+    if device_num == 1:
+        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True)
     else:
-        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=False,
+        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True,
                                          num_shards=device_num, shard_id=rank_id)
 
-    image_size = 224
     mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
     std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
 
     # define map operations
     if do_train:
         trans = [
-            C.Decode(),
-            C.Resize((256, 256)),
-            C.CenterCrop(image_size),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
+            ds.vision.c_transforms.RandomCropDecodeResize(train_image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
+            ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5)
         ]
     else:
         trans = [
-            C.Decode(),
-            C.Resize((256, 256)),
-            C.CenterCrop(image_size),
-            C.Normalize(mean=mean, std=std),
-            C.HWC2CHW()
+            ds.vision.c_transforms.Decode(),
+            ds.vision.c_transforms.Resize(256),
+            ds.vision.c_transforms.CenterCrop(eval_image_size)
         ]
+    trans_norm = [ds.vision.c_transforms.Normalize(mean=mean, std=std), ds.vision.c_transforms.HWC2CHW()]
 
-    type_cast_op = C2.TypeCast(mstype.int32)
+    type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32)
 
-    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=24)
-    data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
+    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=get_num_parallel_workers(12))
+    data_set = data_set.map(operations=trans_norm, input_columns="image",
+                            num_parallel_workers=get_num_parallel_workers(12))
+    # only enable cache for eval
+    if do_train:
+        enable_cache = False
+    if enable_cache:
+        if not cache_session_id:
+            raise ValueError("A cache session_id must be provided to use cache.")
+        eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0)
+        data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                                num_parallel_workers=get_num_parallel_workers(12),
+                                cache=eval_cache)
+    else:
+        data_set = data_set.map(operations=type_cast_op, input_columns="label",
+                                num_parallel_workers=get_num_parallel_workers(12))
 
-    # apply batch operations
+    # apply batch operationsif
     data_set = data_set.batch(batch_size, drop_remainder=True)
 
-    # apply dataset repeat operation
-    data_set = data_set.repeat(repeat_num)
     return data_set
+
+
+def _get_rank_info(distribute):
+    """
+    get rank size and rank id
+    """
+    if distribute:
+        init()
+        rank_id = get_rank()
+        device_num = get_group_size()
+    else:
+        rank_id = 0
+        device_num = 1
+    return device_num, rank_id
+
+
+def get_num_parallel_workers(num_parallel_workers):
+    """
+    Get num_parallel_workers used in dataset operations.
+    If num_parallel_workers > the real CPU cores number, set num_parallel_workers = the real CPU cores number.
+    """
+    cores = multiprocessing.cpu_count()
+    if isinstance(num_parallel_workers, int):
+        if cores < num_parallel_workers:
+            print("The num_parallel_workers {} is set too large, now set it {}".format(num_parallel_workers, cores))
+            num_parallel_workers = cores
+    else:
+        print("The num_parallel_workers {} is invalid, now set it {}".format(num_parallel_workers, min(cores, 8)))
+        num_parallel_workers = min(cores, 8)
+    return num_parallel_workers
diff --git a/tests/st/networks/models/resnet50/src_thor/model_thor.py b/tests/st/networks/models/resnet50/src_thor/model_thor.py
index 4dacc818e0f..ce84b2f8c43 100644
--- a/tests/st/networks/models/resnet50/src_thor/model_thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/model_thor.py
@@ -574,7 +574,7 @@ class Model:
             >>> model.train(2, dataset)
         """
         repeat_count = train_dataset.get_repeat_count()
-        if epoch != repeat_count and dataset_sink_mode is True:
+        if epoch != repeat_count and dataset_sink_mode:
             logger.warning(f"The epoch_size {epoch} is not the same with dataset repeat_count {repeat_count}")
         dataset_sink_mode = Validator.check_bool(dataset_sink_mode)
         _device_number_check(self._parallel_mode, self._device_number)
diff --git a/tests/st/networks/models/resnet50/src_thor/resnet.py b/tests/st/networks/models/resnet50/src_thor/resnet.py
index 94f65f461e1..64bab0ef82b 100644
--- a/tests/st/networks/models/resnet50/src_thor/resnet.py
+++ b/tests/st/networks/models/resnet50/src_thor/resnet.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,61 +13,149 @@
 # limitations under the License.
 # ============================================================================
 """ResNet."""
+import math
 import numpy as np
 from scipy.stats import truncnorm
 import mindspore.nn as nn
+import mindspore.ops as ops
 import mindspore.common.dtype as mstype
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
 from mindspore.common.tensor import Tensor
+from src.model_utils.config import config
 
 
-def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
+def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
     fan_in = in_channel * kernel_size * kernel_size
     scale = 1.0
     scale /= max(1., fan_in)
     stddev = (scale ** 0.5) / .87962566103423978
+    if config.net_name == "resnet152":
+        stddev = (scale ** 0.5)
     mu, sigma = 0, stddev
     weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size)
     weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size))
     return Tensor(weight, dtype=mstype.float32)
 
+
 def _weight_variable(shape, factor=0.01):
     init_value = np.random.randn(*shape).astype(np.float32) * factor
     return Tensor(init_value)
 
 
-def _conv3x3(in_channel, out_channel, stride=1, use_se=False):
+def calculate_gain(nonlinearity, param=None):
+    """calculate_gain"""
+    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    res = 0
+    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
+        res = 1
+    elif nonlinearity == 'tanh':
+        res = 5.0 / 3
+    elif nonlinearity == 'relu':
+        res = math.sqrt(2.0)
+    elif nonlinearity == 'leaky_relu':
+        if param is None:
+            neg_slope = 0.01
+        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
+            neg_slope = param
+        else:
+            raise ValueError("neg_slope {} not a valid number".format(param))
+        res = math.sqrt(2.0 / (1 + neg_slope ** 2))
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+    return res
+
+
+def _calculate_fan_in_and_fan_out(tensor):
+    """_calculate_fan_in_and_fan_out"""
+    dimensions = len(tensor)
+    if dimensions < 2:
+        raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
+    if dimensions == 2:  # Linear
+        fan_in = tensor[1]
+        fan_out = tensor[0]
+    else:
+        num_input_fmaps = tensor[1]
+        num_output_fmaps = tensor[0]
+        receptive_field_size = 1
+        if dimensions > 2:
+            receptive_field_size = tensor[2] * tensor[3]
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+    return fan_in, fan_out
+
+
+def _calculate_correct_fan(tensor, mode):
+    mode = mode.lower()
+    valid_modes = ['fan_in', 'fan_out']
+    if mode not in valid_modes:
+        raise ValueError("Unsupported mode {}, please use one of {}".format(mode, valid_modes))
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
+    return fan_in if mode == 'fan_in' else fan_out
+
+
+def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    fan = _calculate_correct_fan(inputs_shape, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    return np.random.normal(0, std, size=inputs_shape).astype(np.float32)
+
+
+def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'):
+    fan = _calculate_correct_fan(inputs_shape, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+    return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32)
+
+
+def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
     else:
         weight_shape = (out_channel, in_channel, 3, 3)
-        weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
+                         padding=1, pad_mode='pad', weight_init=weight)
+    return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
+                     padding=0, pad_mode='same', weight_init=weight)
 
 
-def _conv1x1(in_channel, out_channel, stride=1, use_se=False):
+def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
     else:
         weight_shape = (out_channel, in_channel, 1, 1)
-        weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
+                         padding=0, pad_mode='pad', weight_init=weight)
+    return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
+                     padding=0, pad_mode='same', weight_init=weight)
 
 
-def _conv7x7(in_channel, out_channel, stride=1, use_se=False):
+def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
     else:
         weight_shape = (out_channel, in_channel, 7, 7)
-        weight = _weight_variable(weight_shape)
+        weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
+    if res_base:
+        return nn.Conv2d(in_channel, out_channel,
+                         kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight)
     return nn.Conv2d(in_channel, out_channel,
                      kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
 
 
-def _bn(channel):
+def _bn(channel, res_base=False):
+    if res_base:
+        return nn.BatchNorm2d(channel, eps=1e-5, momentum=0.1,
+                              gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
     return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                           gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
 
@@ -79,11 +167,13 @@ def _bn_last(channel):
 
 def _fc(in_channel, out_channel, use_se=False):
     if use_se:
-        weight = np.random.normal(loc=0, scale=0.01, size=out_channel*in_channel)
+        weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel)
         weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32)
     else:
         weight_shape = (out_channel, in_channel)
-        weight = _weight_variable(weight_shape)
+        weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5)))
+        if config.net_name == "resnet152":
+            weight = _weight_variable(weight_shape)
     return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
 
 
@@ -95,8 +185,8 @@ class ResidualBlock(nn.Cell):
         in_channel (int): Input channel.
         out_channel (int): Output channel.
         stride (int): Stride size for the first convolutional layer. Default: 1.
-        use_se (bool): enable SE-ResNet50 net. Default: False.
-        se_block(bool): use se block in SE-ResNet50 net. Default: False.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net. Default: False.
 
     Returns:
         Tensor, output tensor.
@@ -126,13 +216,15 @@ class ResidualBlock(nn.Cell):
             self.bn2 = _bn(channel)
 
         self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se)
-        self.bn3 = _bn_last(out_channel)
+        self.bn3 = _bn(out_channel)
+        if config.optimizer == "Thor" or config.net_name == "resnet152":
+            self.bn3 = _bn_last(out_channel)
         if self.se_block:
-            self.se_global_pool = P.ReduceMean(keep_dims=False)
-            self.se_dense_0 = _fc(out_channel, int(out_channel/4), use_se=self.use_se)
-            self.se_dense_1 = _fc(int(out_channel/4), out_channel, use_se=self.use_se)
+            self.se_global_pool = ops.ReduceMean(keep_dims=False)
+            self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se)
+            self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se)
             self.se_sigmoid = nn.Sigmoid()
-            self.se_mul = P.Mul()
+            self.se_mul = ops.Mul()
         self.relu = nn.ReLU()
 
         self.down_sample = False
@@ -153,7 +245,6 @@ class ResidualBlock(nn.Cell):
             else:
                 self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
                                                                      use_se=self.use_se), _bn(out_channel)])
-        self.add = P.Add()
 
     def construct(self, x):
         identity = x
@@ -176,13 +267,76 @@ class ResidualBlock(nn.Cell):
             out = self.relu(out)
             out = self.se_dense_1(out)
             out = self.se_sigmoid(out)
-            out = F.reshape(out, F.shape(out) + (1, 1))
+            out = ops.reshape(out, ops.shape(out) + (1, 1))
             out = self.se_mul(out, out_se)
 
         if self.down_sample:
             identity = self.down_sample_layer(identity)
 
-        out = self.add(out, identity)
+        out = out + identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResidualBlockBase(nn.Cell):
+    """
+    ResNet V1 residual block definition.
+
+    Args:
+        in_channel (int): Input channel.
+        out_channel (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net. Default: False.
+        res_base (bool): Enable parameter setting of resnet18. Default: True.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlockBase(3, 256, stride=2)
+    """
+
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 stride=1,
+                 use_se=False,
+                 se_block=False,
+                 res_base=True):
+        super(ResidualBlockBase, self).__init__()
+        self.res_base = res_base
+        self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base)
+        self.bn1d = _bn(out_channel)
+        self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base)
+        self.bn2d = _bn(out_channel)
+        self.relu = nn.ReLU()
+
+        self.down_sample = False
+        if stride != 1 or in_channel != out_channel:
+            self.down_sample = True
+
+        self.down_sample_layer = None
+        if self.down_sample:
+            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride,
+                                                                 use_se=use_se, res_base=self.res_base),
+                                                        _bn(out_channel, res_base)])
+
+    def construct(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1d(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2d(out)
+
+        if self.down_sample:
+            identity = self.down_sample_layer(identity)
+
+        out = out + identity
         out = self.relu(out)
 
         return out
@@ -199,8 +353,10 @@ class ResNet(nn.Cell):
         out_channels (list): Output channel in each layer.
         strides (list):  Stride size in each layer.
         num_classes (int): The number of classes that the training images are belonging to.
-        use_se (bool): enable SE-ResNet50 net. Default: False.
-        se_block(bool): use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False.
+        use_se (bool): Enable SE-ResNet50 net. Default: False.
+        se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False.
+        res_base (bool): Enable parameter setting of resnet18. Default: False.
+
     Returns:
         Tensor, output tensor.
 
@@ -220,27 +376,26 @@ class ResNet(nn.Cell):
                  out_channels,
                  strides,
                  num_classes,
-                 use_se=False):
+                 use_se=False,
+                 res_base=False):
         super(ResNet, self).__init__()
 
         if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
             raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
         self.use_se = use_se
+        self.res_base = res_base
         self.se_block = False
-        if self.use_se:
-            self.se_block = True
 
-        if self.use_se:
-            self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se)
-            self.bn1_0 = _bn(32)
-            self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se)
-            self.bn1_1 = _bn(32)
-            self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se)
+        self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base)
+        self.bn1 = _bn(64, self.res_base)
+        self.relu = ops.ReLU()
+
+        if self.res_base:
+            self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)))
+            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid")
         else:
-            self.conv1 = _conv7x7(3, 64, stride=2)
-        self.bn1 = _bn(64)
-        self.relu = P.ReLU()
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
         self.layer1 = self._make_layer(block,
                                        layer_nums[0],
                                        in_channel=in_channels[0],
@@ -268,7 +423,7 @@ class ResNet(nn.Cell):
                                        use_se=self.use_se,
                                        se_block=self.se_block)
 
-        self.mean = P.ReduceMean(keep_dims=True)
+        self.mean = ops.ReduceMean(keep_dims=True)
         self.flatten = nn.Flatten()
         self.end_point = _fc(out_channels[3], num_classes, use_se=self.use_se)
 
@@ -282,7 +437,7 @@ class ResNet(nn.Cell):
             in_channel (int): Input channel.
             out_channel (int): Output channel.
             stride (int): Stride size for the first convolutional layer.
-            se_block(bool): use se block in SE-ResNet50 net. Default: False.
+            se_block(bool): Use se block in SE-ResNet50 net. Default: False.
         Returns:
             SequentialCell, the output layer.
 
@@ -318,6 +473,8 @@ class ResNet(nn.Cell):
             x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
+        if self.res_base:
+            x = self.pad(x)
         c1 = self.maxpool(x)
 
         c2 = self.layer1(c1)
@@ -332,6 +489,50 @@ class ResNet(nn.Cell):
         return out
 
 
+def resnet18(class_num=10):
+    """
+    Get ResNet18 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet18 neural network.
+
+    Examples:
+        >>> net = resnet18(10)
+    """
+    return ResNet(ResidualBlockBase,
+                  [2, 2, 2, 2],
+                  [64, 64, 128, 256],
+                  [64, 128, 256, 512],
+                  [1, 2, 2, 2],
+                  class_num,
+                  res_base=True)
+
+
+def resnet34(class_num=10):
+    """
+    Get ResNet34 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet34 neural network.
+
+    Examples:
+        >>> net = resnet18(10)
+    """
+    return ResNet(ResidualBlockBase,
+                  [3, 4, 6, 3],
+                  [64, 64, 128, 256],
+                  [64, 128, 256, 512],
+                  [1, 2, 2, 2],
+                  class_num,
+                  res_base=True)
+
+
 def resnet50(class_num=10):
     """
     Get ResNet50 neural network.
@@ -352,6 +553,7 @@ def resnet50(class_num=10):
                   [1, 2, 2, 2],
                   class_num)
 
+
 def se_resnet50(class_num=1001):
     """
     Get SE-ResNet50 neural network.
@@ -373,6 +575,7 @@ def se_resnet50(class_num=1001):
                   class_num,
                   use_se=True)
 
+
 def resnet101(class_num=1001):
     """
     Get ResNet101 neural network.
@@ -392,3 +595,24 @@ def resnet101(class_num=1001):
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
                   class_num)
+
+
+def resnet152(class_num=1001):
+    """
+    Get ResNet152 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet152 neural network.
+
+    Examples:
+        # >>> net = resnet152(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 8, 36, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
diff --git a/tests/st/networks/models/resnet50/src_thor/thor.py b/tests/st/networks/models/resnet50/src_thor/thor.py
index a8ddb37c0d7..1af8b9fece9 100644
--- a/tests/st/networks/models/resnet50/src_thor/thor.py
+++ b/tests/st/networks/models/resnet50/src_thor/thor.py
@@ -37,7 +37,6 @@ Embedding = 3
 LayerNorm = 4
 BatchNorm = 5
 
-
 _momentum_opt = C.MultitypeFuncGraph("momentum_opt")
 
 op_add = P.AddN()
@@ -59,6 +58,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment):
     success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum))
     return success
 
+
 C0 = 16
 
 
@@ -122,11 +122,13 @@ def find_net_layertype_recur(net, layertype_map):
         else:
             find_net_layertype_recur(subcell, layertype_map)
 
+
 def get_net_layertype_mask(net):
     layertype_map = []
     find_net_layertype_recur(net, layertype_map)
     return layertype_map
 
+
 def get_layer_counter(layer_type, layer_counter, params, idx):
     """get layer counter"""
     if layer_type in [Conv, FC, LayerNorm, BatchNorm]:
@@ -247,7 +249,6 @@ class THOR_Ascend(Optimizer):
             self.grad_reducer_A = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=6)
             self.grad_reducer_G = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=8)
 
-
     def _process_matrix_init_and_weight_idx_map(self, net):
         """process matrix init shape, and get weight idx map"""
         layer_type_map = get_net_layertype_mask(net)
diff --git a/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py
index bdef7a1e0fe..6e03a0a1cbe 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py
@@ -26,18 +26,17 @@ from mindspore.common.tensor import Tensor
 from mindspore.communication.management import init
 from mindspore.context import ParallelMode
 from mindspore.train.callback import Callback
+from mindspore.train.model import Model
+from mindspore.train.train_thor import ConvertModelUtils
 from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.nn.optim import thor
 import mindspore.dataset as ds
 
-from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell
 from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth
 from tests.st.networks.models.resnet50.src_thor.config import config as thor_config
-from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset as create_dataset_thor
-from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model
-from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor
-
+from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset2 as create_dataset_thor
+from tests.st.networks.models.resnet50.src.resnet import resnet50
 
 MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
 dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train"
@@ -89,11 +88,12 @@ class LossGet(Callback):
         self._per_print_times = per_print_times
         self._loss = 0.0
         self.data_size = data_size
+        self._epoch = 0
 
     def step_end(self, run_context):
         cb_params = run_context.original_args()
         loss = cb_params.net_outputs
-
+        self._epoch = cb_params.cur_epoch_num
         if isinstance(loss, (tuple, list)):
             if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
                 loss = loss[0]
@@ -106,8 +106,11 @@ class LossGet(Callback):
         if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
             raise ValueError("epoch: {} step: {}. Invalid loss, terminating training."
                              .format(cb_params.cur_epoch_num, cur_step_in_epoch))
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
         if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
             self._loss = loss
+            print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num,
+                                                      cur_step_in_epoch, loss), flush=True)
 
     def epoch_begin(self, run_context):
         self.epoch_time = time.time()
@@ -122,6 +125,9 @@ class LossGet(Callback):
     def get_per_step_time(self):
         return self._per_step_mseconds
 
+    def get_epoch(self):
+        return self._epoch
+
 
 def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
     os.system("mkdir " + str(device_id))
@@ -137,7 +143,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
         init()
 
     # network
-    net = resnet50_thor(thor_config.class_num)
+    net = resnet50(thor_config.class_num)
 
     if not thor_config.label_smooth:
         thor_config.label_smooth_factor = 0.0
@@ -148,14 +154,10 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
 
     # train dataset
     dataset = create_dataset_thor(dataset_path=dataset_path, do_train=True,
-                                  repeat_num=1, batch_size=thor_config.batch_size)
-
+                                  batch_size=thor_config.batch_size, train_image_size=thor_config.train_image_size,
+                                  eval_image_size=thor_config.eval_image_size, target="Ascend",
+                                  distribute=True)
     step_size = dataset.get_dataset_size()
-    eval_interval = thor_config.eval_interval
-
-    # evaluation dataset
-    eval_dataset = create_dataset(dataset_path=eval_path, do_train=False,
-                                  repeat_num=1, batch_size=thor_config.eval_batch_size)
 
     # loss scale
     loss_scale = FixedLossScaleManager(thor_config.loss_scale, drop_overflow_update=False)
@@ -171,90 +173,30 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
     # evaluation network
     dist_eval_network = ClassifyCorrectCell(net)
     # model
-    model = THOR_Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level="O2",
-                       keep_batchnorm_fp32=False,
-                       metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)},
-                       eval_network=dist_eval_network, frequency=thor_config.frequency)
+    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale,
+                  metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)},
+                  amp_level="O2", keep_batchnorm_fp32=False,
+                  eval_network=dist_eval_network)
 
-    # model init
-    print("init_start", device_id)
-    model.init(dataset, eval_dataset)
-    print("init_stop", device_id)
+    model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt,
+                                                      loss_scale_manager=loss_scale, metrics={'acc'},
+                                                      amp_level="O2", keep_batchnorm_fp32=False)
 
     # callbacks
     loss_cb = LossGet(1, step_size)
 
     # train and eval
-    acc = 0.0
-    time_cost = 0.0
     print("run_start", device_id)
-    for epoch_idx in range(0, int(epoch_size / eval_interval)):
-        model.train(eval_interval, dataset, callbacks=loss_cb)
-        eval_start = time.time()
-        output = model.eval(eval_dataset)
-        eval_cost = (time.time() - eval_start) * 1000
-        acc = float(output["acc"])
-        time_cost = loss_cb.get_per_step_time()
-        loss = loss_cb.get_loss()
-        print("the {} epoch's resnet result:\n "
-              "device{}, training loss {}, acc {}, "
-              "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms".format(
-                  epoch_idx, device_id, loss, acc, time_cost, eval_cost, time_cost * step_size + eval_cost))
-    q.put({'acc': acc, 'cost': time_cost})
-
-
-def test_resnet_thor_imagenet_8p_0():
-    """
-    Feature: Resnet50 thor network
-    Description: Train and evaluate resnet50 thor network on imagenet dataset
-    Expectation: accuracy > 0.28, time cost < 25.
-    """
-    context.set_context(enable_graph_kernel=False, enable_sparse=False)
-    context.reset_auto_parallel_context()
-    context.reset_ps_context()
-
-    q = Queue()
-
-    # resnet50_thor
-    device_num = 8
-    epoch_size = 1
-    enable_hccl = True
-    process = []
-    for i in range(device_num):
-        device_id = i
-        process.append(Process(target=train_process_thor,
-                               args=(q, device_id, epoch_size, device_num, enable_hccl)))
-
-    cpu_count = os.cpu_count()
-    each_cpu_count = cpu_count // device_num
-    for i in range(device_num):
-        process[i].start()
-        if each_cpu_count > 1:
-            cpu_start = each_cpu_count * i
-            cpu_end = each_cpu_count * (i + 1)
-            process_cpu = [x for x in range(cpu_start, cpu_end)]
-            pid = process[i].pid
-            os.sched_setaffinity(pid, set(process_cpu))
-
-    print("Waiting for all subprocesses done...")
-
-    for i in range(device_num):
-        process[i].join()
-
-    # THOR
-    thor_acc = 0.0
-    thor_cost = 0.0
-    for i in range(device_num):
-        output = q.get()
-        thor_acc += output['acc']
-        thor_cost += output['cost']
-    thor_acc = thor_acc / device_num
-    thor_cost = thor_cost / device_num
-
-    for i in range(0, device_num):
-        os.system("rm -rf " + str(i))
-    print("End training...")
-    assert thor_acc > 0.25
+    model.train(2, dataset, callbacks=loss_cb,
+                sink_size=dataset.get_dataset_size(), dataset_sink_mode=True)
+    time_cost = loss_cb.get_per_step_time()
+    loss = loss_cb.get_loss()
+    epoch_idx = loss_cb.get_epoch()
+    print("the {} epoch's resnet result:\n "
+          "device{}, training loss {}, "
+          "training per step cost {:.2f} ms, total_cost {:.2f} ms".format(epoch_idx, device_id,
+                                                                          loss, time_cost, time_cost * step_size))
+    q.put({'loss': loss, 'cost': time_cost})
 
 
 @pytest.mark.level1
@@ -275,7 +217,7 @@ def test_resnet_thor_imagenet_8p_1():
 
     # resnet50_thor
     device_num = 8
-    epoch_size = 1
+    epoch_size = 2
     enable_hccl = True
     process = []
     for i in range(device_num):
@@ -300,19 +242,17 @@ def test_resnet_thor_imagenet_8p_1():
         process[i].join()
 
     # THOR
-    thor_acc = 0.0
+    thor_loss = 0.0
     thor_cost = 0.0
     for i in range(device_num):
         output = q.get()
-        thor_acc += output['acc']
+        thor_loss += output['loss']
         thor_cost += output['cost']
-    thor_acc = thor_acc / device_num
+    thor_loss = thor_loss / device_num
     thor_cost = thor_cost / device_num
 
     for i in range(0, device_num):
         os.system("rm -rf " + str(i))
     print("End training...")
-    print('thor acc: ', thor_acc)
-    print('thor cost: ', thor_cost)
-    #assert thor_acc > 0.25
-    #assert thor_cost < 30
+    assert thor_loss < 7
+    assert thor_cost < 30