forked from OSSInnovation/mindspore
add smoke test
modify st st code modify step threhold const modify threshold value add resnet50 st modify monitor init weights modify batch size px delete loss scale and add init weiht modify loss precision format code modify dataset path modify mobiluenet dataset path format code delete () delete unuse import delete nn
This commit is contained in:
parent
73b5ffb99c
commit
9dad633ca8
|
@ -0,0 +1,67 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
""" create train dataset. """
|
||||
|
||||
from functools import partial
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.vision.c_transforms as C
|
||||
import mindspore.dataset.transforms.c_transforms as C2
|
||||
|
||||
|
||||
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
|
||||
"""
|
||||
create a train dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
config(EasyDict):the basic config for training
|
||||
repeat_num(int): the repeat times of dataset. Default: 1.
|
||||
batch_size(int): the batch size of dataset. Default: 32.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
|
||||
load_func = partial(ds.Cifar10Dataset, dataset_path)
|
||||
cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
|
||||
|
||||
resize_height = config.image_height
|
||||
resize_width = config.image_width
|
||||
rescale = 1.0 / 255.0
|
||||
shift = 0.0
|
||||
|
||||
# define map operations
|
||||
# interpolation default BILINEAR
|
||||
resize_op = C.Resize((resize_height, resize_width))
|
||||
rescale_op = C.Rescale(rescale, shift)
|
||||
normalize_op = C.Normalize(
|
||||
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
|
||||
changeswap_op = C.HWC2CHW()
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
|
||||
|
||||
# apply map operations on images
|
||||
cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
|
||||
cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
|
||||
|
||||
# apply batch operations
|
||||
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
cifar_ds = cifar_ds.repeat(repeat_num)
|
||||
|
||||
return cifar_ds
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
global_step(int): total steps of the training
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_end + \
|
||||
(lr_max - lr_end) * \
|
||||
(1. + math.cos(math.pi * (i - warmup_steps) /
|
||||
(total_steps - warmup_steps))) / 2.
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
current_step = global_step
|
||||
lr_each_step = np.array(lr_each_step).astype(np.float32)
|
||||
learning_rate = lr_each_step[current_step:]
|
||||
|
||||
return learning_rate
|
|
@ -0,0 +1,263 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""MobileNetV2 Quant model define"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import Tensor
|
||||
|
||||
__all__ = ['mobilenetV2']
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class GlobalAvgPooling(nn.Cell):
|
||||
"""
|
||||
Global avg pooling definition.
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> GlobalAvgPooling()
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(GlobalAvgPooling, self).__init__()
|
||||
self.mean = P.ReduceMean(keep_dims=False)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.mean(x, (2, 3))
|
||||
return x
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Cell):
|
||||
"""
|
||||
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
|
||||
|
||||
Args:
|
||||
in_planes (int): Input channel.
|
||||
out_planes (int): Output channel.
|
||||
kernel_size (int): Input kernel size.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
|
||||
"""
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
|
||||
stride=stride,
|
||||
pad_mode='pad',
|
||||
padding=padding,
|
||||
group=groups,
|
||||
has_bn=True,
|
||||
activation='relu')
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv(x)
|
||||
return x
|
||||
|
||||
|
||||
class InvertedResidual(nn.Cell):
|
||||
"""
|
||||
Mobilenetv2 residual block definition.
|
||||
|
||||
Args:
|
||||
inp (int): Input channel.
|
||||
oup (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
expand_ratio (int): expand ration of input channel
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResidualBlock(3, 256, 1, 1)
|
||||
"""
|
||||
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim,
|
||||
stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
|
||||
pad_mode='pad', padding=0, group=1, has_bn=True)
|
||||
])
|
||||
self.conv = nn.SequentialCell(layers)
|
||||
self.add = P.TensorAdd()
|
||||
|
||||
def construct(self, x):
|
||||
out = self.conv(x)
|
||||
if self.use_res_connect:
|
||||
out = self.add(out, x)
|
||||
return out
|
||||
|
||||
|
||||
class mobilenetV2(nn.Cell):
|
||||
"""
|
||||
mobilenetV2 fusion architecture.
|
||||
|
||||
Args:
|
||||
class_num (Cell): number of classes.
|
||||
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
|
||||
has_dropout (bool): Is dropout used. Default is false
|
||||
inverted_residual_setting (list): Inverted residual settings. Default is None
|
||||
round_nearest (list): Channel round to . Default is 8
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> mobilenetV2(num_classes=1000)
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=1000, width_mult=1.,
|
||||
has_dropout=False, inverted_residual_setting=None, round_nearest=8):
|
||||
super(mobilenetV2, self).__init__()
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
# setting of inverted residual blocks
|
||||
self.cfgs = inverted_residual_setting
|
||||
if inverted_residual_setting is None:
|
||||
self.cfgs = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(
|
||||
input_channel * width_mult, round_nearest)
|
||||
self.out_channels = _make_divisible(
|
||||
last_channel * max(1.0, width_mult), round_nearest)
|
||||
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in self.cfgs:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(
|
||||
block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(
|
||||
input_channel, self.out_channels, kernel_size=1))
|
||||
# make it nn.CellList
|
||||
self.features = nn.SequentialCell(features)
|
||||
# mobilenet head
|
||||
head = ([GlobalAvgPooling(),
|
||||
nn.DenseBnAct(self.out_channels, num_classes,
|
||||
has_bias=True, has_bn=False)
|
||||
] if not has_dropout else
|
||||
[GlobalAvgPooling(),
|
||||
nn.Dropout(0.2),
|
||||
nn.DenseBnAct(self.out_channels, num_classes,
|
||||
has_bias=True, has_bn=False)
|
||||
])
|
||||
self.head = nn.SequentialCell(head)
|
||||
|
||||
# init weights
|
||||
self.init_parameters_data()
|
||||
self._initialize_weights()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.features(x)
|
||||
x = self.head(x)
|
||||
return x
|
||||
|
||||
def _initialize_weights(self):
|
||||
"""
|
||||
Initialize weights.
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
None.
|
||||
|
||||
Examples:
|
||||
>>> _initialize_weights()
|
||||
"""
|
||||
self.init_parameters_data()
|
||||
for _, m in self.cells_and_names():
|
||||
np.random.seed(1)
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
|
||||
m.weight.data.shape).astype("float32"))
|
||||
m.weight.set_data(w)
|
||||
if m.bias is not None:
|
||||
m.bias.set_data(
|
||||
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.Conv2dBnAct):
|
||||
n = m.conv.kernel_size[0] * \
|
||||
m.conv.kernel_size[1] * m.conv.out_channels
|
||||
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
|
||||
m.conv.weight.data.shape).astype("float32"))
|
||||
m.conv.weight.set_data(w)
|
||||
if m.conv.bias is not None:
|
||||
m.conv.bias.set_data(
|
||||
Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.gamma.set_data(
|
||||
Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
|
||||
m.beta.set_data(
|
||||
Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.Dense):
|
||||
m.weight.set_data(Tensor(np.random.normal(
|
||||
0, 0.01, m.weight.data.shape).astype("float32")))
|
||||
if m.bias is not None:
|
||||
m.bias.set_data(
|
||||
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.DenseBnAct):
|
||||
m.dense.weight.set_data(
|
||||
Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
|
||||
if m.dense.bias is not None:
|
||||
m.dense.bias.set_data(
|
||||
Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Train Mobilenetv2_quant on Cifar10"""
|
||||
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.quant import quant
|
||||
from mindspore.common import set_seed
|
||||
|
||||
from dataset import create_dataset
|
||||
from lr_generator import get_lr
|
||||
from utils import Monitor, CrossEntropyWithLabelSmooth
|
||||
from mobilenetV2 import mobilenetV2
|
||||
|
||||
config_ascend_quant = ed({
|
||||
"num_classes": 10,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 200,
|
||||
"step_threshold": 10,
|
||||
"data_load_mode": "mindata",
|
||||
"epoch_size": 1,
|
||||
"start_epoch": 200,
|
||||
"warmup_epochs": 1,
|
||||
"lr": 0.3,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 300,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
"quantization_aware": True,
|
||||
})
|
||||
|
||||
dataset_path = "/dataset/workspace/mindspore_dataset/cifar-10-batches-bin/"
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def train_on_ascend():
|
||||
set_seed(1)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
config = config_ascend_quant
|
||||
print("training configure: {}".format(config))
|
||||
|
||||
epoch_size = config.epoch_size
|
||||
|
||||
# define network
|
||||
network = mobilenetV2(num_classes=config.num_classes)
|
||||
# define loss
|
||||
if config.label_smooth > 0:
|
||||
loss = CrossEntropyWithLabelSmooth(
|
||||
smooth_factor=config.label_smooth, num_classes=config.num_classes)
|
||||
else:
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
# define dataset
|
||||
dataset = create_dataset(dataset_path=dataset_path,
|
||||
config=config,
|
||||
repeat_num=1,
|
||||
batch_size=config.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
network = quant.convert_quant_network(network,
|
||||
bn_fold=True,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
|
||||
# get learning rate
|
||||
lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
|
||||
lr_init=0,
|
||||
lr_end=0,
|
||||
lr_max=config.lr,
|
||||
warmup_epochs=config.warmup_epochs,
|
||||
total_epochs=epoch_size + config.start_epoch,
|
||||
steps_per_epoch=step_size))
|
||||
|
||||
# define optimization
|
||||
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
|
||||
config.weight_decay)
|
||||
# define model
|
||||
model = Model(network, loss_fn=loss, optimizer=opt)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
monitor = Monitor(lr_init=lr.asnumpy(),
|
||||
step_threshold=config.step_threshold)
|
||||
callback = [monitor]
|
||||
model.train(epoch_size, dataset, callbacks=callback,
|
||||
dataset_sink_mode=False)
|
||||
print("============== End Training ==============")
|
||||
|
||||
expect_avg_step_loss = 2.32
|
||||
avg_step_loss = np.mean(np.array(monitor.losses))
|
||||
|
||||
print("average step loss:{}".format(avg_step_loss))
|
||||
assert avg_step_loss < expect_avg_step_loss
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train_on_ascend()
|
|
@ -0,0 +1,118 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""MobileNetV2 utils"""
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
from mindspore.train.callback import Callback
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.nn.loss.loss import _Loss
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import functional as F
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
|
||||
class Monitor(Callback):
|
||||
"""
|
||||
Monitor loss and time.
|
||||
|
||||
Args:
|
||||
lr_init (numpy array): train lr
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Examples:
|
||||
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
|
||||
"""
|
||||
|
||||
def __init__(self, lr_init=None, step_threshold=10):
|
||||
super(Monitor, self).__init__()
|
||||
self.lr_init = lr_init
|
||||
self.lr_init_len = len(lr_init)
|
||||
self.step_threshold = step_threshold
|
||||
|
||||
def epoch_begin(self, run_context):
|
||||
self.losses = []
|
||||
self.epoch_time = time.time()
|
||||
|
||||
def epoch_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
|
||||
epoch_mseconds = (time.time() - self.epoch_time) * 1000
|
||||
per_step_mseconds = epoch_mseconds / cb_params.batch_num
|
||||
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
|
||||
per_step_mseconds,
|
||||
np.mean(self.losses)))
|
||||
self.epoch_mseconds = epoch_mseconds
|
||||
|
||||
def step_begin(self, run_context):
|
||||
self.step_time = time.time()
|
||||
|
||||
def step_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
step_mseconds = (time.time() - self.step_time) * 1000
|
||||
step_loss = cb_params.net_outputs
|
||||
|
||||
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
|
||||
step_loss = step_loss[0]
|
||||
if isinstance(step_loss, Tensor):
|
||||
step_loss = np.mean(step_loss.asnumpy())
|
||||
|
||||
self.losses.append(step_loss)
|
||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
|
||||
|
||||
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
|
||||
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
|
||||
1, cb_params.batch_num, step_loss,
|
||||
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
|
||||
|
||||
if cb_params.cur_step_num == self.step_threshold:
|
||||
run_context.request_stop()
|
||||
|
||||
|
||||
class CrossEntropyWithLabelSmooth(_Loss):
|
||||
"""
|
||||
CrossEntropyWith LabelSmooth.
|
||||
|
||||
Args:
|
||||
smooth_factor (float): smooth factor, default=0.
|
||||
num_classes (int): num classes
|
||||
|
||||
Returns:
|
||||
None.
|
||||
|
||||
Examples:
|
||||
>>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
|
||||
"""
|
||||
|
||||
def __init__(self, smooth_factor=0., num_classes=1000):
|
||||
super(CrossEntropyWithLabelSmooth, self).__init__()
|
||||
self.onehot = P.OneHot()
|
||||
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
|
||||
self.off_value = Tensor(1.0 * smooth_factor /
|
||||
(num_classes - 1), mstype.float32)
|
||||
self.ce = nn.SoftmaxCrossEntropyWithLogits()
|
||||
self.mean = P.ReduceMean(False)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, logit, label):
|
||||
one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
|
||||
self.on_value, self.off_value)
|
||||
out_loss = self.ce(logit, one_hot_label)
|
||||
out_loss = self.mean(out_loss, 0)
|
||||
return out_loss
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
""" create train dataset. """
|
||||
|
||||
|
||||
from functools import partial
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.engine as de
|
||||
import mindspore.dataset.transforms.c_transforms as C2
|
||||
import mindspore.dataset.vision.c_transforms as C
|
||||
|
||||
|
||||
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
|
||||
"""
|
||||
create a train dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
config(EasyDict):the basic config for training
|
||||
repeat_num(int): the repeat times of dataset. Default: 1.
|
||||
batch_size(int): the batch size of dataset. Default: 32.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
|
||||
load_func = partial(de.Cifar10Dataset, dataset_path)
|
||||
ds = load_func(num_parallel_workers=8, shuffle=False)
|
||||
|
||||
resize_height = config.image_height
|
||||
resize_width = config.image_width
|
||||
|
||||
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
||||
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
|
||||
|
||||
# define map operations
|
||||
resize_op = C.Resize((resize_height, resize_width))
|
||||
normalize_op = C.Normalize(mean=mean, std=std)
|
||||
changeswap_op = C.HWC2CHW()
|
||||
c_trans = [resize_op, normalize_op, changeswap_op]
|
||||
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
ds = ds.map(operations=c_trans, input_columns="image",
|
||||
num_parallel_workers=8)
|
||||
ds = ds.map(operations=type_cast_op,
|
||||
input_columns="label", num_parallel_workers=8)
|
||||
|
||||
# apply batch operations
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
ds = ds.repeat(repeat_num)
|
||||
|
||||
return ds
|
|
@ -0,0 +1,93 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
if lr_decay_mode == 'steps':
|
||||
decay_epoch_index = [0.3 * total_steps,
|
||||
0.6 * total_steps, 0.8 * total_steps]
|
||||
for i in range(total_steps):
|
||||
if i < decay_epoch_index[0]:
|
||||
lr = lr_max
|
||||
elif i < decay_epoch_index[1]:
|
||||
lr = lr_max * 0.1
|
||||
elif i < decay_epoch_index[2]:
|
||||
lr = lr_max * 0.01
|
||||
else:
|
||||
lr = lr_max * 0.001
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'poly':
|
||||
if warmup_steps != 0:
|
||||
inc_each_step = (float(lr_max) - float(lr_init)) / \
|
||||
float(warmup_steps)
|
||||
else:
|
||||
inc_each_step = 0
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = float(lr_init) + inc_each_step * float(i)
|
||||
else:
|
||||
base = (1.0 - (float(i) - float(warmup_steps)) /
|
||||
(float(total_steps) - float(warmup_steps)))
|
||||
lr = float(lr_max) * base * base
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'cosine':
|
||||
decay_steps = total_steps - warmup_steps
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
|
||||
lr = float(lr_init) + lr_inc * (i + 1)
|
||||
else:
|
||||
linear_decay = (total_steps - i) / decay_steps
|
||||
cosine_decay = 0.5 * \
|
||||
(1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
|
||||
decayed = linear_decay * cosine_decay + 0.00001
|
||||
lr = lr_max * decayed
|
||||
lr_each_step.append(lr)
|
||||
else:
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_max - (lr_max - lr_end) * \
|
||||
(i - warmup_steps) / (total_steps - warmup_steps)
|
||||
lr_each_step.append(lr)
|
||||
|
||||
learning_rate = np.array(lr_each_step).astype(np.float32)
|
||||
|
||||
return learning_rate
|
|
@ -0,0 +1,354 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""ResNet."""
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore.nn as nn
|
||||
import mindspore.common.initializer as weight_init
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.nn import FakeQuantWithMinMax, Conv2dBnFoldQuant as Conv2dBatchNormQuant
|
||||
|
||||
|
||||
_ema_decay = 0.999
|
||||
_symmetric = True
|
||||
_fake = True
|
||||
_per_channel = True
|
||||
|
||||
|
||||
def _weight_variable(shape, factor=0.01):
|
||||
init_value = np.random.randn(*shape).astype(np.float32) * factor
|
||||
return Tensor(init_value)
|
||||
|
||||
|
||||
def _conv3x3(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 3, 3)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _conv1x1(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 1, 1)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _conv7x7(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 7, 7)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _bn(channel):
|
||||
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
|
||||
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
|
||||
def _bn_last(channel):
|
||||
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
|
||||
gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
|
||||
def _fc(in_channel, out_channel):
|
||||
weight_shape = (out_channel, in_channel)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Cell):
|
||||
"""
|
||||
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
|
||||
|
||||
Args:
|
||||
in_planes (int): Input channel.
|
||||
out_planes (int): Output channel.
|
||||
kernel_size (int): Input kernel size.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
|
||||
"""
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
conv = Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
|
||||
group=groups, fake=_fake, per_channel=_per_channel, symmetric=_symmetric)
|
||||
layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
|
||||
self.features = nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
output = self.features(x)
|
||||
return output
|
||||
|
||||
|
||||
class ResidualBlock(nn.Cell):
|
||||
"""
|
||||
ResNet V1 residual block definition.
|
||||
|
||||
Args:
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResidualBlock(3, 256, stride=2)
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
in_channel,
|
||||
out_channel,
|
||||
stride=1):
|
||||
super(ResidualBlock, self).__init__()
|
||||
|
||||
channel = out_channel // self.expansion
|
||||
self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
|
||||
self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
|
||||
self.conv3 = nn.SequentialCell([Conv2dBatchNormQuant(channel, out_channel, fake=_fake, per_channel=_per_channel,
|
||||
symmetric=_symmetric,
|
||||
kernel_size=1, stride=1, pad_mode='same', padding=0),
|
||||
FakeQuantWithMinMax(
|
||||
ema=True, ema_decay=_ema_decay, symmetric=False)
|
||||
]) if _fake else Conv2dBatchNormQuant(channel, out_channel, fake=_fake,
|
||||
per_channel=_per_channel,
|
||||
symmetric=_symmetric,
|
||||
kernel_size=1, stride=1,
|
||||
pad_mode='same', padding=0)
|
||||
|
||||
self.down_sample = False
|
||||
|
||||
if stride != 1 or in_channel != out_channel:
|
||||
self.down_sample = True
|
||||
self.down_sample_layer = None
|
||||
|
||||
if self.down_sample:
|
||||
self.down_sample_layer = nn.SequentialCell([Conv2dBatchNormQuant(in_channel, out_channel,
|
||||
per_channel=_per_channel,
|
||||
symmetric=_symmetric,
|
||||
kernel_size=1, stride=stride,
|
||||
pad_mode='same', padding=0),
|
||||
FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay,
|
||||
symmetric=False)
|
||||
]) if _fake else Conv2dBatchNormQuant(in_channel, out_channel,
|
||||
fake=_fake,
|
||||
per_channel=_per_channel,
|
||||
symmetric=_symmetric,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
pad_mode='same',
|
||||
padding=0)
|
||||
self.add = nn.TensorAddQuant()
|
||||
self.relu = P.ReLU()
|
||||
|
||||
def construct(self, x):
|
||||
identity = x
|
||||
out = self.conv1(x)
|
||||
out = self.conv2(out)
|
||||
out = self.conv3(out)
|
||||
|
||||
if self.down_sample:
|
||||
identity = self.down_sample_layer(identity)
|
||||
|
||||
out = self.add(out, identity)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Cell):
|
||||
"""
|
||||
ResNet architecture.
|
||||
|
||||
Args:
|
||||
block (Cell): Block for network.
|
||||
layer_nums (list): Numbers of block in different layers.
|
||||
in_channels (list): Input channel in each layer.
|
||||
out_channels (list): Output channel in each layer.
|
||||
strides (list): Stride size in each layer.
|
||||
num_classes (int): The number of classes that the training images are belonging to.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResNet(ResidualBlock,
|
||||
>>> [3, 4, 6, 3],
|
||||
>>> [64, 256, 512, 1024],
|
||||
>>> [256, 512, 1024, 2048],
|
||||
>>> [1, 2, 2, 2],
|
||||
>>> 10)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
block,
|
||||
layer_nums,
|
||||
in_channels,
|
||||
out_channels,
|
||||
strides,
|
||||
num_classes):
|
||||
super(ResNet, self).__init__()
|
||||
|
||||
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
|
||||
raise ValueError(
|
||||
"the length of layer_num, in_channels, out_channels list must be 4!")
|
||||
|
||||
self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
|
||||
|
||||
self.layer1 = self._make_layer(block,
|
||||
layer_nums[0],
|
||||
in_channel=in_channels[0],
|
||||
out_channel=out_channels[0],
|
||||
stride=strides[0])
|
||||
self.layer2 = self._make_layer(block,
|
||||
layer_nums[1],
|
||||
in_channel=in_channels[1],
|
||||
out_channel=out_channels[1],
|
||||
stride=strides[1])
|
||||
self.layer3 = self._make_layer(block,
|
||||
layer_nums[2],
|
||||
in_channel=in_channels[2],
|
||||
out_channel=out_channels[2],
|
||||
stride=strides[2])
|
||||
self.layer4 = self._make_layer(block,
|
||||
layer_nums[3],
|
||||
in_channel=in_channels[3],
|
||||
out_channel=out_channels[3],
|
||||
stride=strides[3])
|
||||
|
||||
self.mean = P.ReduceMean(keep_dims=True)
|
||||
self.flatten = nn.Flatten()
|
||||
self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, per_channel=_per_channel,
|
||||
symmetric=_symmetric)
|
||||
self.output_fake = nn.FakeQuantWithMinMax(
|
||||
ema=True, ema_decay=_ema_decay)
|
||||
|
||||
# init weights
|
||||
self._initialize_weights()
|
||||
|
||||
def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
|
||||
"""
|
||||
Make stage network of ResNet.
|
||||
|
||||
Args:
|
||||
block (Cell): Resnet block.
|
||||
layer_num (int): Layer number.
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer.
|
||||
|
||||
Returns:
|
||||
SequentialCell, the output layer.
|
||||
|
||||
Examples:
|
||||
>>> _make_layer(ResidualBlock, 3, 128, 256, 2)
|
||||
"""
|
||||
layers = []
|
||||
|
||||
resnet_block = block(in_channel, out_channel, stride=stride)
|
||||
layers.append(resnet_block)
|
||||
|
||||
for _ in range(1, layer_num):
|
||||
resnet_block = block(out_channel, out_channel, stride=1)
|
||||
layers.append(resnet_block)
|
||||
|
||||
return nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv1(x)
|
||||
c1 = self.maxpool(x)
|
||||
|
||||
c2 = self.layer1(c1)
|
||||
c3 = self.layer2(c2)
|
||||
c4 = self.layer3(c3)
|
||||
c5 = self.layer4(c4)
|
||||
|
||||
out = self.mean(c5, (2, 3))
|
||||
out = self.flatten(out)
|
||||
out = self.end_point(out)
|
||||
out = self.output_fake(out)
|
||||
return out
|
||||
|
||||
def _initialize_weights(self):
|
||||
|
||||
self.init_parameters_data()
|
||||
for _, m in self.cells_and_names():
|
||||
np.random.seed(1)
|
||||
|
||||
if isinstance(m, nn.Conv2dBnFoldQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
elif isinstance(m, nn.DenseQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
|
||||
|
||||
def resnet50_quant(class_num=10):
|
||||
"""
|
||||
Get ResNet50 neural network.
|
||||
|
||||
Args:
|
||||
class_num (int): Class number.
|
||||
|
||||
Returns:
|
||||
Cell, cell instance of ResNet50 neural network.
|
||||
|
||||
Examples:
|
||||
>>> net = resnet50_quant(10)
|
||||
"""
|
||||
return ResNet(ResidualBlock,
|
||||
[3, 4, 6, 3],
|
||||
[64, 256, 512, 1024],
|
||||
[256, 512, 1024, 2048],
|
||||
[1, 2, 2, 2],
|
||||
class_num)
|
||||
|
||||
|
||||
def resnet101_quant(class_num=1001):
|
||||
"""
|
||||
Get ResNet101 neural network.
|
||||
|
||||
Args:
|
||||
class_num (int): Class number.
|
||||
|
||||
Returns:
|
||||
Cell, cell instance of ResNet101 neural network.
|
||||
|
||||
Examples:
|
||||
>>> net = resnet101(1001)
|
||||
"""
|
||||
return ResNet(ResidualBlock,
|
||||
[3, 4, 23, 3],
|
||||
[64, 256, 512, 1024],
|
||||
[256, 512, 1024, 2048],
|
||||
[1, 2, 2, 2],
|
||||
class_num)
|
|
@ -0,0 +1,131 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Train Resnet50_quant on Cifar10"""
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore.nn.optim.momentum import Momentum
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.quant import quant
|
||||
from mindspore import set_seed
|
||||
|
||||
from resnet_quant_manual import resnet50_quant
|
||||
from dataset import create_dataset
|
||||
from lr_generator import get_lr
|
||||
from utils import Monitor, CrossEntropy
|
||||
|
||||
|
||||
config_quant = ed({
|
||||
"class_num": 10,
|
||||
"batch_size": 128,
|
||||
"step_threshold": 20,
|
||||
"loss_scale": 1024,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 1e-4,
|
||||
"epoch_size": 1,
|
||||
"pretrained_epoch_size": 90,
|
||||
"buffer_size": 1000,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"data_load_mode": "mindata",
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 50,
|
||||
"save_checkpoint_path": "./",
|
||||
"warmup_epochs": 0,
|
||||
"lr_decay_mode": "cosine",
|
||||
"use_label_smooth": True,
|
||||
"label_smooth_factor": 0.1,
|
||||
"lr_init": 0,
|
||||
"lr_max": 0.005,
|
||||
})
|
||||
|
||||
dataset_path = "/dataset/workspace/mindspore_dataset/cifar-10-batches-bin/"
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def train_on_ascend():
|
||||
set_seed(1)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
config = config_quant
|
||||
print("training configure: {}".format(config))
|
||||
epoch_size = config.epoch_size
|
||||
|
||||
# define network
|
||||
net = resnet50_quant(class_num=config.class_num)
|
||||
net.set_train(True)
|
||||
|
||||
# define loss
|
||||
if not config.use_label_smooth:
|
||||
config.label_smooth_factor = 0.0
|
||||
loss = CrossEntropy(
|
||||
smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
|
||||
#loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
|
||||
|
||||
# define dataset
|
||||
dataset = create_dataset(dataset_path=dataset_path,
|
||||
config=config,
|
||||
repeat_num=1,
|
||||
batch_size=config.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
net = quant.convert_quant_network(net,
|
||||
bn_fold=True,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
|
||||
# get learning rate
|
||||
lr = Tensor(get_lr(lr_init=config.lr_init,
|
||||
lr_end=0.0,
|
||||
lr_max=config.lr_max,
|
||||
warmup_epochs=config.warmup_epochs,
|
||||
total_epochs=config.epoch_size,
|
||||
steps_per_epoch=step_size,
|
||||
lr_decay_mode='cosine'))
|
||||
|
||||
# define optimization
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
|
||||
config.weight_decay, config.loss_scale)
|
||||
|
||||
# define model
|
||||
#model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
|
||||
model = Model(net, loss_fn=loss, optimizer=opt)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
monitor = Monitor(lr_init=lr.asnumpy(),
|
||||
step_threshold=config.step_threshold)
|
||||
|
||||
callbacks = [monitor]
|
||||
model.train(epoch_size, dataset, callbacks=callbacks,
|
||||
dataset_sink_mode=False)
|
||||
print("============== End Training ==============")
|
||||
|
||||
expect_avg_step_loss = 2.40
|
||||
avg_step_loss = np.mean(np.array(monitor.losses))
|
||||
|
||||
print("average step loss:{}".format(avg_step_loss))
|
||||
assert avg_step_loss < expect_avg_step_loss
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
train_on_ascend()
|
|
@ -0,0 +1,105 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Resnet50 utils"""
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
from mindspore.train.callback import Callback
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.nn.loss.loss import _Loss
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import functional as F
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
|
||||
class Monitor(Callback):
|
||||
"""
|
||||
Monitor loss and time.
|
||||
|
||||
Args:
|
||||
lr_init (numpy array): train lr
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Examples:
|
||||
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
|
||||
"""
|
||||
|
||||
def __init__(self, lr_init=None, step_threshold=10):
|
||||
super(Monitor, self).__init__()
|
||||
self.lr_init = lr_init
|
||||
self.lr_init_len = len(lr_init)
|
||||
self.step_threshold = step_threshold
|
||||
|
||||
def epoch_begin(self, run_context):
|
||||
self.losses = []
|
||||
self.epoch_time = time.time()
|
||||
|
||||
def epoch_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
|
||||
epoch_mseconds = (time.time() - self.epoch_time) * 1000
|
||||
per_step_mseconds = epoch_mseconds / cb_params.batch_num
|
||||
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
|
||||
per_step_mseconds,
|
||||
np.mean(self.losses)))
|
||||
self.epoch_mseconds = epoch_mseconds
|
||||
|
||||
def step_begin(self, run_context):
|
||||
self.step_time = time.time()
|
||||
|
||||
def step_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
step_mseconds = (time.time() - self.step_time) * 1000
|
||||
step_loss = cb_params.net_outputs
|
||||
|
||||
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
|
||||
step_loss = step_loss[0]
|
||||
if isinstance(step_loss, Tensor):
|
||||
step_loss = np.mean(step_loss.asnumpy())
|
||||
|
||||
self.losses.append(step_loss)
|
||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
|
||||
|
||||
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
|
||||
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
|
||||
1, cb_params.batch_num, step_loss,
|
||||
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
|
||||
|
||||
if cb_params.cur_step_num == self.step_threshold:
|
||||
run_context.request_stop()
|
||||
|
||||
|
||||
class CrossEntropy(_Loss):
|
||||
"""the redefined loss function with SoftmaxCrossEntropyWithLogits"""
|
||||
|
||||
def __init__(self, smooth_factor=0, num_classes=1001):
|
||||
super(CrossEntropy, self).__init__()
|
||||
self.onehot = P.OneHot()
|
||||
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
|
||||
self.off_value = Tensor(1.0 * smooth_factor /
|
||||
(num_classes - 1), mstype.float32)
|
||||
self.ce = nn.SoftmaxCrossEntropyWithLogits()
|
||||
self.mean = P.ReduceMean(False)
|
||||
|
||||
def construct(self, logit, label):
|
||||
one_hot_label = self.onehot(label, F.shape(
|
||||
logit)[1], self.on_value, self.off_value)
|
||||
loss = self.ce(logit, one_hot_label)
|
||||
loss = self.mean(loss, 0)
|
||||
return loss
|
Loading…
Reference in New Issue