diff --git a/tests/st/networks/models/resnet50/src_thor/config.py b/tests/st/networks/models/resnet50/src_thor/config.py index 17aaa8a0c07..60ff57468ce 100644 --- a/tests/st/networks/models/resnet50/src_thor/config.py +++ b/tests/st/networks/models/resnet50/src_thor/config.py @@ -35,5 +35,8 @@ config = ed({ "label_smooth_factor": 0.1, "frequency": 834, "eval_interval": 1, - "eval_batch_size": 32 + "eval_batch_size": 32, + "train_image_size": 224, + "eval_image_size": 224, + "device_target": "Ascend" }) diff --git a/tests/st/networks/models/resnet50/src_thor/dataset.py b/tests/st/networks/models/resnet50/src_thor/dataset.py index 21cd223c0ad..0f7c3adbfc1 100644 --- a/tests/st/networks/models/resnet50/src_thor/dataset.py +++ b/tests/st/networks/models/resnet50/src_thor/dataset.py @@ -12,72 +12,172 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ - -"""create train or eval dataset.""" - -import os -import mindspore.common.dtype as mstype +""" +create train or eval dataset. +""" +import multiprocessing +import mindspore as ms import mindspore.dataset as ds -import mindspore.dataset.vision.c_transforms as C -import mindspore.dataset.transforms.c_transforms as C2 +from mindspore.communication.management import init, get_rank, get_group_size -def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): +def create_dataset1(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224, + target="Ascend", distribute=False, enable_cache=False, cache_session_id=None): """ - create a train or eval dataset. + create a train or evaluate cifar10 dataset for resnet50 + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + target(str): the device target. Default: Ascend + distribute(bool): data for distribute or not. Default: False + enable_cache(bool): whether tensor caching service is used for eval. Default: False + cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None + + Returns: + dataset + """ + device_num, rank_id = _get_rank_info(distribute) + ds.config.set_prefetch_size(64) + if device_num == 1: + data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True) + else: + data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True, + num_shards=device_num, shard_id=rank_id) + + # define map operations + trans = [] + if do_train: + trans += [ + ds.vision.c_transforms.RandomCrop((32, 32), (4, 4, 4, 4)), + ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5) + ] + + trans += [ + ds.vision.c_transforms.Resize((train_image_size, train_image_size)), + ds.vision.c_transforms.Rescale(1.0 / 255.0, 0.0), + ds.vision.c_transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), + ds.vision.c_transforms.HWC2CHW() + ] + + type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32) + + data_set = data_set.map(operations=type_cast_op, input_columns="label", + num_parallel_workers=get_num_parallel_workers(8)) + # only enable cache for eval + if do_train: + enable_cache = False + if enable_cache: + if not cache_session_id: + raise ValueError("A cache session_id must be provided to use cache.") + eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0) + data_set = data_set.map(operations=trans, input_columns="image", + num_parallel_workers=get_num_parallel_workers(8), cache=eval_cache) + else: + data_set = data_set.map(operations=trans, input_columns="image", + num_parallel_workers=get_num_parallel_workers(8)) + + # apply batch operations + data_set = data_set.batch(batch_size, drop_remainder=True) + + return data_set + + +def create_dataset2(dataset_path, do_train, batch_size=32, train_image_size=224, eval_image_size=224, + target="Ascend", distribute=False, enable_cache=False, cache_session_id=None): + """ + create a train or eval imagenet2012 dataset for resnet50 Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 + target(str): the device target. Default: Ascend + distribute(bool): data for distribute or not. Default: False + enable_cache(bool): whether tensor caching service is used for eval. Default: False + cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None Returns: dataset """ + device_num, rank_id = _get_rank_info(distribute) - device_num = int(os.getenv("RANK_SIZE")) - rank_id = int(os.getenv("RANK_ID")) - if do_train: - if device_num == 1: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=16, shuffle=True) - else: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True, - num_shards=device_num, shard_id=rank_id) + ds.config.set_prefetch_size(64) + if device_num == 1: + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True) else: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=False, + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=get_num_parallel_workers(12), shuffle=True, num_shards=device_num, shard_id=rank_id) - image_size = 224 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # define map operations if do_train: trans = [ - C.Decode(), - C.Resize((256, 256)), - C.CenterCrop(image_size), - C.Normalize(mean=mean, std=std), - C.HWC2CHW() + ds.vision.c_transforms.RandomCropDecodeResize(train_image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), + ds.vision.c_transforms.RandomHorizontalFlip(prob=0.5) ] else: trans = [ - C.Decode(), - C.Resize((256, 256)), - C.CenterCrop(image_size), - C.Normalize(mean=mean, std=std), - C.HWC2CHW() + ds.vision.c_transforms.Decode(), + ds.vision.c_transforms.Resize(256), + ds.vision.c_transforms.CenterCrop(eval_image_size) ] + trans_norm = [ds.vision.c_transforms.Normalize(mean=mean, std=std), ds.vision.c_transforms.HWC2CHW()] - type_cast_op = C2.TypeCast(mstype.int32) + type_cast_op = ds.transforms.c_transforms.TypeCast(ms.int32) - data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=24) - data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=get_num_parallel_workers(12)) + data_set = data_set.map(operations=trans_norm, input_columns="image", + num_parallel_workers=get_num_parallel_workers(12)) + # only enable cache for eval + if do_train: + enable_cache = False + if enable_cache: + if not cache_session_id: + raise ValueError("A cache session_id must be provided to use cache.") + eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0) + data_set = data_set.map(operations=type_cast_op, input_columns="label", + num_parallel_workers=get_num_parallel_workers(12), + cache=eval_cache) + else: + data_set = data_set.map(operations=type_cast_op, input_columns="label", + num_parallel_workers=get_num_parallel_workers(12)) - # apply batch operations + # apply batch operationsif data_set = data_set.batch(batch_size, drop_remainder=True) - # apply dataset repeat operation - data_set = data_set.repeat(repeat_num) return data_set + + +def _get_rank_info(distribute): + """ + get rank size and rank id + """ + if distribute: + init() + rank_id = get_rank() + device_num = get_group_size() + else: + rank_id = 0 + device_num = 1 + return device_num, rank_id + + +def get_num_parallel_workers(num_parallel_workers): + """ + Get num_parallel_workers used in dataset operations. + If num_parallel_workers > the real CPU cores number, set num_parallel_workers = the real CPU cores number. + """ + cores = multiprocessing.cpu_count() + if isinstance(num_parallel_workers, int): + if cores < num_parallel_workers: + print("The num_parallel_workers {} is set too large, now set it {}".format(num_parallel_workers, cores)) + num_parallel_workers = cores + else: + print("The num_parallel_workers {} is invalid, now set it {}".format(num_parallel_workers, min(cores, 8))) + num_parallel_workers = min(cores, 8) + return num_parallel_workers diff --git a/tests/st/networks/models/resnet50/src_thor/model_thor.py b/tests/st/networks/models/resnet50/src_thor/model_thor.py index 4dacc818e0f..ce84b2f8c43 100644 --- a/tests/st/networks/models/resnet50/src_thor/model_thor.py +++ b/tests/st/networks/models/resnet50/src_thor/model_thor.py @@ -574,7 +574,7 @@ class Model: >>> model.train(2, dataset) """ repeat_count = train_dataset.get_repeat_count() - if epoch != repeat_count and dataset_sink_mode is True: + if epoch != repeat_count and dataset_sink_mode: logger.warning(f"The epoch_size {epoch} is not the same with dataset repeat_count {repeat_count}") dataset_sink_mode = Validator.check_bool(dataset_sink_mode) _device_number_check(self._parallel_mode, self._device_number) diff --git a/tests/st/networks/models/resnet50/src_thor/resnet.py b/tests/st/networks/models/resnet50/src_thor/resnet.py index 94f65f461e1..64bab0ef82b 100644 --- a/tests/st/networks/models/resnet50/src_thor/resnet.py +++ b/tests/st/networks/models/resnet50/src_thor/resnet.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,61 +13,149 @@ # limitations under the License. # ============================================================================ """ResNet.""" +import math import numpy as np from scipy.stats import truncnorm import mindspore.nn as nn +import mindspore.ops as ops import mindspore.common.dtype as mstype -from mindspore.ops import operations as P -from mindspore.ops import functional as F from mindspore.common.tensor import Tensor +from src.model_utils.config import config -def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): +def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): fan_in = in_channel * kernel_size * kernel_size scale = 1.0 scale /= max(1., fan_in) stddev = (scale ** 0.5) / .87962566103423978 + if config.net_name == "resnet152": + stddev = (scale ** 0.5) mu, sigma = 0, stddev weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size) weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size)) return Tensor(weight, dtype=mstype.float32) + def _weight_variable(shape, factor=0.01): init_value = np.random.randn(*shape).astype(np.float32) * factor return Tensor(init_value) -def _conv3x3(in_channel, out_channel, stride=1, use_se=False): +def calculate_gain(nonlinearity, param=None): + """calculate_gain""" + linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] + res = 0 + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + res = 1 + elif nonlinearity == 'tanh': + res = 5.0 / 3 + elif nonlinearity == 'relu': + res = math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + neg_slope = 0.01 + elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): + neg_slope = param + else: + raise ValueError("neg_slope {} not a valid number".format(param)) + res = math.sqrt(2.0 / (1 + neg_slope ** 2)) + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + return res + + +def _calculate_fan_in_and_fan_out(tensor): + """_calculate_fan_in_and_fan_out""" + dimensions = len(tensor) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: # Linear + fan_in = tensor[1] + fan_out = tensor[0] + else: + num_input_fmaps = tensor[1] + num_output_fmaps = tensor[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = tensor[2] * tensor[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def _calculate_correct_fan(tensor, mode): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Unsupported mode {}, please use one of {}".format(mode, valid_modes)) + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == 'fan_in' else fan_out + + +def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + return np.random.normal(0, std, size=inputs_shape).astype(np.float32) + + +def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation + return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32) + + +def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) else: weight_shape = (out_channel, in_channel, 3, 3) - weight = _weight_variable(weight_shape) - return nn.Conv2d(in_channel, out_channel, - kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if config.net_name == "resnet152": + weight = _weight_variable(weight_shape) + if res_base: + return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=1, pad_mode='pad', weight_init=weight) + return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=0, pad_mode='same', weight_init=weight) -def _conv1x1(in_channel, out_channel, stride=1, use_se=False): +def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) else: weight_shape = (out_channel, in_channel, 1, 1) - weight = _weight_variable(weight_shape) - return nn.Conv2d(in_channel, out_channel, - kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if config.net_name == "resnet152": + weight = _weight_variable(weight_shape) + if res_base: + return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='pad', weight_init=weight) + return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='same', weight_init=weight) -def _conv7x7(in_channel, out_channel, stride=1, use_se=False): +def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) else: weight_shape = (out_channel, in_channel, 7, 7) - weight = _weight_variable(weight_shape) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if config.net_name == "resnet152": + weight = _weight_variable(weight_shape) + if res_base: + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight) return nn.Conv2d(in_channel, out_channel, kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) -def _bn(channel): +def _bn(channel, res_base=False): + if res_base: + return nn.BatchNorm2d(channel, eps=1e-5, momentum=0.1, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) @@ -79,11 +167,13 @@ def _bn_last(channel): def _fc(in_channel, out_channel, use_se=False): if use_se: - weight = np.random.normal(loc=0, scale=0.01, size=out_channel*in_channel) + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32) else: weight_shape = (out_channel, in_channel) - weight = _weight_variable(weight_shape) + weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))) + if config.net_name == "resnet152": + weight = _weight_variable(weight_shape) return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) @@ -95,8 +185,8 @@ class ResidualBlock(nn.Cell): in_channel (int): Input channel. out_channel (int): Output channel. stride (int): Stride size for the first convolutional layer. Default: 1. - use_se (bool): enable SE-ResNet50 net. Default: False. - se_block(bool): use se block in SE-ResNet50 net. Default: False. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. Returns: Tensor, output tensor. @@ -126,13 +216,15 @@ class ResidualBlock(nn.Cell): self.bn2 = _bn(channel) self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se) - self.bn3 = _bn_last(out_channel) + self.bn3 = _bn(out_channel) + if config.optimizer == "Thor" or config.net_name == "resnet152": + self.bn3 = _bn_last(out_channel) if self.se_block: - self.se_global_pool = P.ReduceMean(keep_dims=False) - self.se_dense_0 = _fc(out_channel, int(out_channel/4), use_se=self.use_se) - self.se_dense_1 = _fc(int(out_channel/4), out_channel, use_se=self.use_se) + self.se_global_pool = ops.ReduceMean(keep_dims=False) + self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se) + self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se) self.se_sigmoid = nn.Sigmoid() - self.se_mul = P.Mul() + self.se_mul = ops.Mul() self.relu = nn.ReLU() self.down_sample = False @@ -153,7 +245,6 @@ class ResidualBlock(nn.Cell): else: self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, use_se=self.use_se), _bn(out_channel)]) - self.add = P.Add() def construct(self, x): identity = x @@ -176,13 +267,76 @@ class ResidualBlock(nn.Cell): out = self.relu(out) out = self.se_dense_1(out) out = self.se_sigmoid(out) - out = F.reshape(out, F.shape(out) + (1, 1)) + out = ops.reshape(out, ops.shape(out) + (1, 1)) out = self.se_mul(out, out_se) if self.down_sample: identity = self.down_sample_layer(identity) - out = self.add(out, identity) + out = out + identity + out = self.relu(out) + + return out + + +class ResidualBlockBase(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: True. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlockBase(3, 256, stride=2) + """ + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, + se_block=False, + res_base=True): + super(ResidualBlockBase, self).__init__() + self.res_base = res_base + self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base) + self.bn1d = _bn(out_channel) + self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base) + self.bn2d = _bn(out_channel) + self.relu = nn.ReLU() + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + + self.down_sample_layer = None + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=use_se, res_base=self.res_base), + _bn(out_channel, res_base)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1d(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2d(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity out = self.relu(out) return out @@ -199,8 +353,10 @@ class ResNet(nn.Cell): out_channels (list): Output channel in each layer. strides (list): Stride size in each layer. num_classes (int): The number of classes that the training images are belonging to. - use_se (bool): enable SE-ResNet50 net. Default: False. - se_block(bool): use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: False. + Returns: Tensor, output tensor. @@ -220,27 +376,26 @@ class ResNet(nn.Cell): out_channels, strides, num_classes, - use_se=False): + use_se=False, + res_base=False): super(ResNet, self).__init__() if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") self.use_se = use_se + self.res_base = res_base self.se_block = False - if self.use_se: - self.se_block = True - if self.use_se: - self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se) - self.bn1_0 = _bn(32) - self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se) - self.bn1_1 = _bn(32) - self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se) + self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base) + self.bn1 = _bn(64, self.res_base) + self.relu = ops.ReLU() + + if self.res_base: + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1))) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") else: - self.conv1 = _conv7x7(3, 64, stride=2) - self.bn1 = _bn(64) - self.relu = P.ReLU() - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + self.layer1 = self._make_layer(block, layer_nums[0], in_channel=in_channels[0], @@ -268,7 +423,7 @@ class ResNet(nn.Cell): use_se=self.use_se, se_block=self.se_block) - self.mean = P.ReduceMean(keep_dims=True) + self.mean = ops.ReduceMean(keep_dims=True) self.flatten = nn.Flatten() self.end_point = _fc(out_channels[3], num_classes, use_se=self.use_se) @@ -282,7 +437,7 @@ class ResNet(nn.Cell): in_channel (int): Input channel. out_channel (int): Output channel. stride (int): Stride size for the first convolutional layer. - se_block(bool): use se block in SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. Returns: SequentialCell, the output layer. @@ -318,6 +473,8 @@ class ResNet(nn.Cell): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) + if self.res_base: + x = self.pad(x) c1 = self.maxpool(x) c2 = self.layer1(c1) @@ -332,6 +489,50 @@ class ResNet(nn.Cell): return out +def resnet18(class_num=10): + """ + Get ResNet18 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet18 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [2, 2, 2, 2], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + +def resnet34(class_num=10): + """ + Get ResNet34 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet34 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [3, 4, 6, 3], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + + def resnet50(class_num=10): """ Get ResNet50 neural network. @@ -352,6 +553,7 @@ def resnet50(class_num=10): [1, 2, 2, 2], class_num) + def se_resnet50(class_num=1001): """ Get SE-ResNet50 neural network. @@ -373,6 +575,7 @@ def se_resnet50(class_num=1001): class_num, use_se=True) + def resnet101(class_num=1001): """ Get ResNet101 neural network. @@ -392,3 +595,24 @@ def resnet101(class_num=1001): [256, 512, 1024, 2048], [1, 2, 2, 2], class_num) + + +def resnet152(class_num=1001): + """ + Get ResNet152 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet152 neural network. + + Examples: + # >>> net = resnet152(1001) + """ + return ResNet(ResidualBlock, + [3, 8, 36, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/tests/st/networks/models/resnet50/src_thor/thor.py b/tests/st/networks/models/resnet50/src_thor/thor.py index a8ddb37c0d7..1af8b9fece9 100644 --- a/tests/st/networks/models/resnet50/src_thor/thor.py +++ b/tests/st/networks/models/resnet50/src_thor/thor.py @@ -37,7 +37,6 @@ Embedding = 3 LayerNorm = 4 BatchNorm = 5 - _momentum_opt = C.MultitypeFuncGraph("momentum_opt") op_add = P.AddN() @@ -59,6 +58,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment): success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum)) return success + C0 = 16 @@ -122,11 +122,13 @@ def find_net_layertype_recur(net, layertype_map): else: find_net_layertype_recur(subcell, layertype_map) + def get_net_layertype_mask(net): layertype_map = [] find_net_layertype_recur(net, layertype_map) return layertype_map + def get_layer_counter(layer_type, layer_counter, params, idx): """get layer counter""" if layer_type in [Conv, FC, LayerNorm, BatchNorm]: @@ -247,7 +249,6 @@ class THOR_Ascend(Optimizer): self.grad_reducer_A = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=6) self.grad_reducer_G = DistributedGradReducer(self.matrix_A, mean, degree, fusion_type=8) - def _process_matrix_init_and_weight_idx_map(self, net): """process matrix init shape, and get weight idx map""" layer_type_map = get_net_layertype_mask(net) diff --git a/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py index bdef7a1e0fe..6e03a0a1cbe 100644 --- a/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py +++ b/tests/st/networks/models/resnet50/test_resnet50_thor_imagenet.py @@ -26,18 +26,17 @@ from mindspore.common.tensor import Tensor from mindspore.communication.management import init from mindspore.context import ParallelMode from mindspore.train.callback import Callback +from mindspore.train.model import Model +from mindspore.train.train_thor import ConvertModelUtils from mindspore.train.loss_scale_manager import FixedLossScaleManager from mindspore.nn.optim import thor import mindspore.dataset as ds -from tests.st.networks.models.resnet50.src.dataset import create_dataset from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth from tests.st.networks.models.resnet50.src_thor.config import config as thor_config -from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset as create_dataset_thor -from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model -from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor - +from tests.st.networks.models.resnet50.src_thor.dataset import create_dataset2 as create_dataset_thor +from tests.st.networks.models.resnet50.src.resnet import resnet50 MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_table_8p.json" dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train" @@ -89,11 +88,12 @@ class LossGet(Callback): self._per_print_times = per_print_times self._loss = 0.0 self.data_size = data_size + self._epoch = 0 def step_end(self, run_context): cb_params = run_context.original_args() loss = cb_params.net_outputs - + self._epoch = cb_params.cur_epoch_num if isinstance(loss, (tuple, list)): if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray): loss = loss[0] @@ -106,8 +106,11 @@ class LossGet(Callback): if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)): raise ValueError("epoch: {} step: {}. Invalid loss, terminating training." .format(cb_params.cur_epoch_num, cur_step_in_epoch)) + cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0: self._loss = loss + print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, + cur_step_in_epoch, loss), flush=True) def epoch_begin(self, run_context): self.epoch_time = time.time() @@ -122,6 +125,9 @@ class LossGet(Callback): def get_per_step_time(self): return self._per_step_mseconds + def get_epoch(self): + return self._epoch + def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): os.system("mkdir " + str(device_id)) @@ -137,7 +143,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): init() # network - net = resnet50_thor(thor_config.class_num) + net = resnet50(thor_config.class_num) if not thor_config.label_smooth: thor_config.label_smooth_factor = 0.0 @@ -148,14 +154,10 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): # train dataset dataset = create_dataset_thor(dataset_path=dataset_path, do_train=True, - repeat_num=1, batch_size=thor_config.batch_size) - + batch_size=thor_config.batch_size, train_image_size=thor_config.train_image_size, + eval_image_size=thor_config.eval_image_size, target="Ascend", + distribute=True) step_size = dataset.get_dataset_size() - eval_interval = thor_config.eval_interval - - # evaluation dataset - eval_dataset = create_dataset(dataset_path=eval_path, do_train=False, - repeat_num=1, batch_size=thor_config.eval_batch_size) # loss scale loss_scale = FixedLossScaleManager(thor_config.loss_scale, drop_overflow_update=False) @@ -171,90 +173,30 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): # evaluation network dist_eval_network = ClassifyCorrectCell(net) # model - model = THOR_Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level="O2", - keep_batchnorm_fp32=False, - metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)}, - eval_network=dist_eval_network, frequency=thor_config.frequency) + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, + metrics={'acc': DistAccuracy(batch_size=thor_config.eval_batch_size, device_num=device_num)}, + amp_level="O2", keep_batchnorm_fp32=False, + eval_network=dist_eval_network) - # model init - print("init_start", device_id) - model.init(dataset, eval_dataset) - print("init_stop", device_id) + model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt, + loss_scale_manager=loss_scale, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False) # callbacks loss_cb = LossGet(1, step_size) # train and eval - acc = 0.0 - time_cost = 0.0 print("run_start", device_id) - for epoch_idx in range(0, int(epoch_size / eval_interval)): - model.train(eval_interval, dataset, callbacks=loss_cb) - eval_start = time.time() - output = model.eval(eval_dataset) - eval_cost = (time.time() - eval_start) * 1000 - acc = float(output["acc"]) - time_cost = loss_cb.get_per_step_time() - loss = loss_cb.get_loss() - print("the {} epoch's resnet result:\n " - "device{}, training loss {}, acc {}, " - "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms".format( - epoch_idx, device_id, loss, acc, time_cost, eval_cost, time_cost * step_size + eval_cost)) - q.put({'acc': acc, 'cost': time_cost}) - - -def test_resnet_thor_imagenet_8p_0(): - """ - Feature: Resnet50 thor network - Description: Train and evaluate resnet50 thor network on imagenet dataset - Expectation: accuracy > 0.28, time cost < 25. - """ - context.set_context(enable_graph_kernel=False, enable_sparse=False) - context.reset_auto_parallel_context() - context.reset_ps_context() - - q = Queue() - - # resnet50_thor - device_num = 8 - epoch_size = 1 - enable_hccl = True - process = [] - for i in range(device_num): - device_id = i - process.append(Process(target=train_process_thor, - args=(q, device_id, epoch_size, device_num, enable_hccl))) - - cpu_count = os.cpu_count() - each_cpu_count = cpu_count // device_num - for i in range(device_num): - process[i].start() - if each_cpu_count > 1: - cpu_start = each_cpu_count * i - cpu_end = each_cpu_count * (i + 1) - process_cpu = [x for x in range(cpu_start, cpu_end)] - pid = process[i].pid - os.sched_setaffinity(pid, set(process_cpu)) - - print("Waiting for all subprocesses done...") - - for i in range(device_num): - process[i].join() - - # THOR - thor_acc = 0.0 - thor_cost = 0.0 - for i in range(device_num): - output = q.get() - thor_acc += output['acc'] - thor_cost += output['cost'] - thor_acc = thor_acc / device_num - thor_cost = thor_cost / device_num - - for i in range(0, device_num): - os.system("rm -rf " + str(i)) - print("End training...") - assert thor_acc > 0.25 + model.train(2, dataset, callbacks=loss_cb, + sink_size=dataset.get_dataset_size(), dataset_sink_mode=True) + time_cost = loss_cb.get_per_step_time() + loss = loss_cb.get_loss() + epoch_idx = loss_cb.get_epoch() + print("the {} epoch's resnet result:\n " + "device{}, training loss {}, " + "training per step cost {:.2f} ms, total_cost {:.2f} ms".format(epoch_idx, device_id, + loss, time_cost, time_cost * step_size)) + q.put({'loss': loss, 'cost': time_cost}) @pytest.mark.level1 @@ -275,7 +217,7 @@ def test_resnet_thor_imagenet_8p_1(): # resnet50_thor device_num = 8 - epoch_size = 1 + epoch_size = 2 enable_hccl = True process = [] for i in range(device_num): @@ -300,19 +242,17 @@ def test_resnet_thor_imagenet_8p_1(): process[i].join() # THOR - thor_acc = 0.0 + thor_loss = 0.0 thor_cost = 0.0 for i in range(device_num): output = q.get() - thor_acc += output['acc'] + thor_loss += output['loss'] thor_cost += output['cost'] - thor_acc = thor_acc / device_num + thor_loss = thor_loss / device_num thor_cost = thor_cost / device_num for i in range(0, device_num): os.system("rm -rf " + str(i)) print("End training...") - print('thor acc: ', thor_acc) - print('thor cost: ', thor_cost) - #assert thor_acc > 0.25 - #assert thor_cost < 30 + assert thor_loss < 7 + assert thor_cost < 30