diff --git a/model_zoo/official/cv/cnn_direction_model/eval.py b/model_zoo/official/cv/cnn_direction_model/eval.py new file mode 100644 index 00000000000..52a93236375 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/eval.py @@ -0,0 +1,69 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train resnet.""" +import argparse +import os +import random + +import numpy as np +from src.cnn_direction_model import CNNDirectionModel +from src.config import config1 as config +from src.dataset import create_dataset_eval + +from mindspore import context +from mindspore import dataset as de +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net + +parser = argparse.ArgumentParser(description='Image classification') + +parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +args_opt = parser.parse_args() + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + +if __name__ == '__main__': + # init context + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(device_id=device_id) + + # create dataset + dataset = create_dataset_eval(args_opt.dataset_path + "/ocr_eval_pos.mindrecord", config=config) + step_size = dataset.get_dataset_size() + + print("step_size ", step_size) + + # define net + net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512]) + + # load checkpoint + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + + # define loss, model + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum") + + # define model + model = Model(net, loss_fn=loss, metrics={'top_1_accuracy'}) + + # eval model + res = model.eval(dataset, dataset_sink_mode=False) + print("result:", res, "ckpt=", args_opt.checkpoint_path) diff --git a/model_zoo/official/cv/cnn_direction_model/requirements.txt b/model_zoo/official/cv/cnn_direction_model/requirements.txt new file mode 100644 index 00000000000..ef77c2d2fa2 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/requirements.txt @@ -0,0 +1,5 @@ +mindspore +numpy +Pillow +python-opencv +scikit-image \ No newline at end of file diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh new file mode 100644 index 00000000000..df5ba8dbb6b --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_distribute_train_ascend.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ $# == 3 ] +then + PATH3=$(get_real_path $3) +fi + +if [ ! -f $PATH1 ] +then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ $# == 3 ] && [ ! -f $PATH3 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$PATH1 + +export SERVER_ID=0 +rank_start=$((DEVICE_NUM * SERVER_ID)) + +for((i=0; i<${DEVICE_NUM}; i++)) +do + export DEVICE_ID=$i + export RANK_ID=$((rank_start + i)) + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp ../*.py ./train_parallel$i + cp *.sh ./train_parallel$i + cp -r ../src ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env > env.log + + if [ $# == 2 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & + fi + + if [ $# == 3 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log & + fi + + cd .. +done \ No newline at end of file diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh new file mode 100644 index 00000000000..40133b78d50 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_eval_ascend.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=4 +export RANK_ID=0 +export RANK_SIZE=1 + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ -d "eval" ]; +then + rm -rf ./eval +fi + +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +echo "start evaluation for device $DEVICE_ID" +env > env.log + +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 #&> log & + +cd .. \ No newline at end of file diff --git a/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh new file mode 100644 index 00000000000..b57f5e70b08 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/scripts/run_standalone_train_ascend.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] && [ $# != 2 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=3 +export RANK_ID=0 +export RANK_SIZE=1 + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log +if [ $# == 1 ] +then + python train.py --dataset_path=$PATH1 &> log & +fi + +if [ $# == 2 ] +then + python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & +fi + +cd .. \ No newline at end of file diff --git a/model_zoo/official/cv/cnn_direction_model/src/cnn_direction_model.py b/model_zoo/official/cv/cnn_direction_model/src/cnn_direction_model.py new file mode 100644 index 00000000000..82ea8d61a5d --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/src/cnn_direction_model.py @@ -0,0 +1,264 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""CNN direction model.""" +import math + +import mindspore.nn as nn +from mindspore.common.initializer import Uniform +from mindspore.ops import operations as P + + +class NetAddN(nn.Cell): + """ + Computes addition of all input tensors element-wise. + """ + + def __init__(self): + super(NetAddN, self).__init__() + self.addN = P.AddN() + + def construct(self, *z): + return self.addN(z) + + +class Conv(nn.Cell): + """ + A convolution layer + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + kernel (tuple): Size of the kernel. Default: (3, 3). + dilate (bool): If set to true a second convolution layer is added. Default: True. + act (string): The activation function. Default: 'relu'. + mp (int): Size of max pooling layer. Default: None. + + Returns: + Tensor, output tensor. + + Examples: + >>> Conv(3, 64) + """ + + def __init__(self, + in_channel, + out_channel, + kernel=(3, 3), + dilate=True, + act='relu', + mp=None): + super(Conv, self).__init__() + self.in_channel = in_channel + self.out_channel = out_channel + self.kernel = kernel + self.dilate = dilate + self.act = act + self.mp = mp + + self.conv1 = nn.Conv2d(self.in_channel, self.out_channel, kernel_size=self.kernel, pad_mode="same", + weight_init='he_normal') + + self.batch_norm1 = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + if self.dilate: + self.dilate_relu = P.ReLU() + self.dilate_conv = nn.Conv2d(self.out_channel, self.out_channel, kernel_size=self.kernel, + dilation=(2, 2), pad_mode='same', weight_init='he_normal') + + self.dilate_batch_norm = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + self.dilate_add = NetAddN() + + if self.act == 'relu': + self.act_layer = P.ReLU() + + if self.mp is not None: + self.mp_layer = nn.MaxPool2d(kernel_size=self.mp, stride=self.mp, pad_mode='valid') + + def construct(self, x): + + out = self.conv1(x) + out = self.batch_norm1(out) + out1 = out + + if self.dilate: + out = self.dilate_relu(out) + out = self.dilate_conv(out) + out = self.dilate_batch_norm(out) + out = self.dilate_add(out1, out) + + if self.act == 'relu': + out = self.act_layer(out) + + if self.mp is not None: + out = self.mp_layer(out) + + return out + + +class Block(nn.Cell): + """ + A Block of convolution operations. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + Returns: + Tensor, output tensor. + + Examples: + >>> Block(3, 64) + """ + + def __init__(self, + in_channel, + out_channel): + super(Block, self).__init__() + self.conv1 = Conv(in_channel, out_channel, act='relu') + self.conv2 = Conv(out_channel, out_channel, act=None) + self.add = NetAddN() + self.relu = P.ReLU() + + def construct(self, x): + y = self.conv1(x) + y = self.conv2(y) + out = self.add(x, y) + out = self.relu(out) + + return out + + +class ResidualBlock(nn.Cell): + """ + A residual block. + + Args: + block (Block) : The building block. + num_blocks (int): Number of blocks. + in_channel (int): Input channel. + out_channel (int): Output channel. + mp (int) : Size of the max pooling layer. Default: 2. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(Block, 1, 3, 64) + """ + + def __init__(self, + block, + num_blocks, + in_channel, + out_channel, + mp=2): + + super(ResidualBlock, self).__init__() + self.num_blocks = num_blocks + self.in_channel = in_channel + self.out_channel = out_channel + self.mp = mp + self.conv1 = Conv(self.in_channel, self.out_channel, kernel=(3, 3), dilate=False) + + layers = [] + for _ in range(self.num_blocks): + res_block = block(out_channel, out_channel) + layers.append(res_block) + + self.layer = nn.SequentialCell(layers) + + if mp is not None: + self.max_pool = nn.MaxPool2d(kernel_size=mp, stride=mp, pad_mode='valid') + + def construct(self, x): + out = self.conv1(x) + out = self.layer(out) + if self.mp is not None: + out = self.max_pool(out) + + return out + + +class CNNDirectionModel(nn.Cell): + """ + CNN direction model. + + Args: + in_channels (list): List of the dimesnions of the input channels. The first element is the input dimension + of the first Conv layer, and the rest of the elements are the input dimensions of the residual blocks, + in order. + out_channels (list): List of the dimesnions of the output channels. The first element is the ourpur dimension + of the first Conv layer, and the rest of the elements are the output dimensions of the residual blocks, in order. + dense_layers (list): Dimensions of the dense layers, inorder. + image_size (list): Size of the input images. + num_classes (int): Number of classes. Default: 2 for binary classification. + + Returns: Tensor, output tensor. + + Examples: + >>> CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512] ) + """ + + def __init__(self, + in_channels, + out_channels, + dense_layers, + image_size, + num_classes=2 + ): + super(CNNDirectionModel, self).__init__() + self.num_classes = num_classes + self.image_h = image_size[0] + self.image_w = image_size[1] + self.conv1 = Conv(in_channels[0], out_channels[0], kernel=(7, 7), dilate=False, mp=2) + self.residual_block1 = ResidualBlock(Block, 1, in_channels[1], out_channels[1]) + self.residual_block2 = ResidualBlock(Block, 1, in_channels[2], out_channels[2]) + self.residual_block3 = ResidualBlock(Block, 2, in_channels[3], out_channels[3]) + self.residual_block4 = ResidualBlock(Block, 1, in_channels[4], out_channels[4]) + + # 5 previous layers have mp=2. Height and width of the image would become 1/32. + self.avg_pool = nn.AvgPool2d(kernel_size=(int(self.image_h / 32), int(self.image_w / 32))) + + # sqrt(6 / (fan_in + fan_out)) + scale = math.sqrt(6 / (out_channels[-1] + dense_layers[0])) + # weight_init='glorot_uniform' + self.dense1 = nn.Dense(out_channels[-1], dense_layers[0], weight_init=Uniform(scale=scale), activation='relu') + + scale = math.sqrt(6 / (dense_layers[0] + dense_layers[1])) + self.dense2 = nn.Dense(dense_layers[0], dense_layers[1], weight_init=Uniform(scale=scale), activation='relu') + + scale = math.sqrt(6 / (dense_layers[1] + num_classes)) + self.dense3 = nn.Dense(dense_layers[1], num_classes, weight_init=Uniform(scale=scale), activation='softmax') + + def construct(self, x): + out = self.conv1(x) + + out = self.residual_block1(out) + + out = self.residual_block2(out) + out = self.residual_block3(out) + out = self.residual_block4(out) + + out = self.avg_pool(out) + + out = P.Reshape()(out, (out.shape[0], out.shape[1])) + + out = self.dense1(out) + out = self.dense2(out) + out = self.dense3(out) + + return out diff --git a/model_zoo/official/cv/cnn_direction_model/src/config.py b/model_zoo/official/cv/cnn_direction_model/src/config.py new file mode 100644 index 00000000000..80ddb905caa --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/src/config.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py and eval.py +""" +from easydict import EasyDict as ed + +config1 = ed({ + "batch_size": 8, + "epoch_size": 5, + "pretrain_epoch_size": 0, + "save_checkpoint": True, + "save_checkpoint_epochs": 10, + "keep_checkpoint_max": 20, + "save_checkpoint_path": "./", + "warmup_epochs": 5, + "lr_decay_mode": "poly", + "lr": 1e-4, + "work_nums": 4, + "im_size_w": 512, + "im_size_h": 64, + "pos_samples_size": 100, + "augment_severity": 0.1, + "augment_prob": 0.3 +}) diff --git a/model_zoo/official/cv/cnn_direction_model/src/dataset.py b/model_zoo/official/cv/cnn_direction_model/src/dataset.py new file mode 100644 index 00000000000..13644c663d0 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/src/dataset.py @@ -0,0 +1,246 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Data operations, will be used in train.py and eval.py +""" +import os + +import mindspore.dataset.engine as de +import mindspore.dataset.vision.c_transforms as C +from src.dataset_utils import lucky, noise_blur, noise_speckle, noise_gamma, noise_gaussian, noise_salt_pepper, \ + shift_color, enhance_brightness, enhance_sharpness, enhance_contrast, enhance_color, gaussian_blur, \ + randcrop, resize, rdistort, rgeometry, rotate_about_center, whole_rdistort, warp_perspective, random_contrast, \ + unify_img_label + +import cv2 +import numpy as np +cv2.setNumThreads(0) + +image_height = None +image_width = None + + +class Augmentor(): + """ + Augment image with random noise and transformation + + Controlled by severity level [0, 1] + + Usage: + augmentor = Augmentor(severity=0.3, + prob=0.5, + enable_transform=True, + enable_crop=False) + image_new = augmentor.process(image) + """ + + def __init__(self, severity, prob, enable_transform=True, enable_crop=False): + """ + severity: in [0, 1], from min to max level of noise/transformation + prob: in [0, 1], probability to apply each operator + enable_transform: enable all transformation operators + enable_crop: enable crop operator + """ + self.severity = np.clip(severity, 0, 1) + self.prob = np.clip(prob, 0, 1) + self.enable_transform = enable_transform + self.enable_crop = enable_crop + + def add_noise(self, im): + """randomly add noise to image""" + + severity = self.severity + prob = self.prob + + if lucky(prob): + im = noise_gamma(im, severity=severity) + if lucky(prob): + im = noise_blur(im, severity=severity) + if lucky(prob): + im = noise_gaussian(im, severity=severity) + if lucky(prob): + im = noise_salt_pepper(im, severity=severity) + if lucky(prob): + im = shift_color(im, severity=severity) + if lucky(prob): + im = gaussian_blur(im, severity=severity) + if lucky(prob): + im = noise_speckle(im, severity=severity) + if lucky(prob): + im = enhance_sharpness(im, severity=severity) + if lucky(prob): + im = enhance_contrast(im, severity=severity) + if lucky(prob): + im = enhance_brightness(im, severity=severity) + if lucky(prob): + im = enhance_color(im, severity=severity) + if lucky(prob): + im = random_contrast(im) + + return im + + def convert_color(self, im, cval): + if cval in ['median', 'md']: + cval = np.median(im, axis=(0, 1)).astype(int) + elif cval == 'mean': + cval = np.mean(im, axis=(0, 1)).astype(int) + if hasattr(cval, '__iter__'): + cval = [int(i) for i in cval] + else: + cval = int(cval) + return cval + + def transform(self, im, cval=255, **kw): + """According to the parameters initialized by the class, deform the incoming image""" + severity = self.severity + prob = self.prob + cval = self.convert_color(im, cval) + if lucky(prob): + # affine transform + im = rgeometry(im, severity=severity, cval=cval) + if lucky(prob): + im = rdistort(im, severity=severity, cval=cval) + if lucky(prob): + im = warp_perspective(im, severity=severity, cval=cval) + if lucky(prob): + im = resize(im, fx=kw.get('fx'), fy=kw.get('fy'), severity=severity) + if lucky(prob): + im = rotate_about_center(im, severity=severity, cval=cval) + if lucky(prob): + # the overall distortion of the image. + im = whole_rdistort(im, severity=severity) + if lucky(prob) and self.enable_crop: + # random crop + im = randcrop(im, severity=severity) + return im + + def process(self, im, cval='median', **kw): + """ Execute code according to the effect of initial setting, and support variable parameters""" + if self.enable_transform: + im = self.transform(im, cval=cval, **kw) + im = self.add_noise(im) + return im + + +def rotate_and_set_neg(img, label): + label = label - 1 + img_rotate = np.rot90(img) + img_rotate = np.rot90(img_rotate) + # return img_rotate, label + return img_rotate, np.array(label).astype(np.int32) + + +def rotate(img, label): + img_rotate = np.rot90(img) + img_rotate = np.rot90(img_rotate) + return img_rotate, label + + +def random_neg_with_rotate(img, label): + if lucky(0.5): + ##50% of samples set to negative samples + label = label - 1 + # rotate by 180 debgress + img_rotate = np.rot90(img) + img = np.rot90(img_rotate) + return img, np.array(label).astype(np.int32) + + +def transform_image(img, label): + data = np.array([img[...]], np.float32) + data = data / 127.5 - 1 + return data.transpose((0, 3, 1, 2))[0], label + + +def create_dataset_train(mindrecord_file_pos, config): + """ + create a train dataset + + Args: + mindrecord_file_pos(string): mindrecord file for positive samples. + config(dict): config of dataset. + + Returns: + dataset + """ + rank_size = int(os.getenv("RANK_SIZE", '1')) + rank_id = int(os.getenv("RANK_ID", '0')) + decode = C.Decode() + + ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=4, + num_shards=rank_size, shard_id=rank_id, shuffle=True) + ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8) + + augmentor = Augmentor(config.augment_severity, config.augment_prob) + operation = augmentor.process + ds = ds.map(operations=operation, input_columns=["image"], + num_parallel_workers=1, python_multiprocessing=True) + ##randomly augment half of samples to be negative samples + ds = ds.map(operations=[random_neg_with_rotate, unify_img_label, transform_image], input_columns=["image", "label"], + num_parallel_workers=8, python_multiprocessing=True) + ##for training double the dataset to accoun for positive and negative + ds = ds.repeat(2) + + # apply batch operations + ds = ds.batch(config.batch_size, drop_remainder=True) + return ds + + +def resize_image(img, label): + color_fill = 255 + scale = image_height / img.shape[0] + img = cv2.resize(img, None, fx=scale, fy=scale) + if img.shape[1] > image_width: + img = img[:, 0:image_width] + else: + blank_img = np.zeros((image_height, image_width, 3), np.uint8) + # fill the image with white + blank_img.fill(color_fill) + blank_img[:image_height, :img.shape[1]] = img + img = blank_img + data = np.array([img[...]], np.float32) + data = data / 127.5 - 1 + return data.transpose((0, 3, 1, 2))[0], label + + +def create_dataset_eval(mindrecord_file_pos, config): + """ + create an eval dataset + + Args: + mindrecord_file_pos(string): mindrecord file for positive samples. + config(dict): config of dataset. + + Returns: + dataset + """ + rank_size = int(os.getenv("RANK_SIZE", '1')) + rank_id = int(os.getenv("RANK_ID", '0')) + decode = C.Decode() + + ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=1, + num_shards=rank_size, shard_id=rank_id, shuffle=False) + ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8) + + global image_height + global image_width + image_height = config.im_size_h + image_width = config.im_size_w + ds = ds.map(operations=resize_image, input_columns=["image", "label"], num_parallel_workers=config.work_nums, + python_multiprocessing=False) + # apply batch operations + ds = ds.batch(1, drop_remainder=True) + + return ds diff --git a/model_zoo/official/cv/cnn_direction_model/src/dataset_utils.py b/model_zoo/official/cv/cnn_direction_model/src/dataset_utils.py new file mode 100644 index 00000000000..3b49d4c111a --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/src/dataset_utils.py @@ -0,0 +1,641 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from __future__ import absolute_import, division, print_function, unicode_literals + +from math import ceil, sin, pi +from random import choice, random +from random import randint, uniform + +import cv2 +import numpy as np +from numpy.random import randn +from PIL import ImageEnhance, Image +from scipy.ndimage import filters, interpolation +from scipy.ndimage.interpolation import map_coordinates +from skimage.transform import PiecewiseAffineTransform, warp + +nprandint = np.random.randint + + +def lucky(p=0.3, rand_func=random): + """ return True with probability p """ + return rand_func() < p + + +def rgeometry(im, eps=0.04, delta=0.8, cval=None, severity=1): + """ + affine transform + """ + if severity == 0: + return im + + if cval is None: + cval = [0] * im.shape[2] + elif isinstance(cval, (float, int)): + cval = [cval] * im.shape[2] + + severity = abs(severity) + eps = severity * eps + delta = severity * delta + m = np.array([[1 + eps * randn(), 0.0], [eps * randn(), 1.0 + eps * randn()]]) + c = np.array(im.shape[:2]) * 0.5 + d = c - np.dot(m, c) + np.array([randn() * delta, randn() * delta]) + + im = cv2.split(im) + im = [interpolation.affine_transform(i, m, offset=d, order=1, mode='constant', cval=cval[e]) + for e, i in enumerate(im)] + im = cv2.merge(im) + + return np.array(im) + + +def rdistort(im, distort=4.0, dsigma=10.0, cval=None, severity=1): + """distort""" + if severity == 0: + return im + + if cval is None: + cval = [0] * im.shape[2] + elif isinstance(cval, (float, int)): + cval = [cval] * im.shape[2] + + severity = abs(severity) + distort = severity * distort + dsigma = dsigma * (1 - severity) + + h, w = im.shape[:2] + hs, ws = randn(h, w), randn(h, w) + hs = filters.gaussian_filter(hs, dsigma) + ws = filters.gaussian_filter(ws, dsigma) + hs *= distort / np.abs(hs).max() + ws *= distort / np.abs(ws).max() + # When "ij" is passed in, the first array determines the column, the second array determines the row, by default, + # the first array determines the row, and the second array determines the column + ch, cw = np.meshgrid(np.arange(h), np.arange(w), indexing='ij') + coordinates = np.array([ch + hs, cw + ws]) + + im = cv2.split(im) + im = [map_coordinates(img, coordinates, order=1, cval=cval[i]) for i, img in enumerate(im)] + im = cv2.merge(im) + return np.array(im) + + +def reverse_color(im): + """ Pixel inversion """ + return 255 - im + + +def resize(im, fx=None, fy=None, delta=0.3, severity=1): + """ scaling in the two directions of width fx and height fy, + If the zoom factor is not specified, the maximum change amount of 0.3 is randomly selected from 1 to 1""" + + if fx is None: + fx = 1 + delta * severity * uniform(-1, 1) + if fy is None: + fy = 1 + delta * severity * uniform(-1, 1) + return np.array(cv2.resize(im, None, fx=fx, fy=fy)) + + +def warp_perspective(im, theta=20, delta=10, cval=0, severity=1): + """ perspective mapping """ + if severity == 0: + return im + + if cval is None: + cval = [0] * im.shape[2] + elif isinstance(cval, (float, int)): + cval = [cval] * im.shape[2] + + delta = delta * severity + rows, cols = im.shape[:2] + pts_im = np.float32([[0, 0], [cols, 0], [cols, rows], [0, rows]]) + + # Distort randomly and constrain the scope of change + pts_warp = pts_im + np.random.uniform(-1, 1, pts_im.shape) * theta * severity + pts_warp = np.maximum(pts_warp, delta) # Constrain the change to the part >=3 + pts_warp[[1, 2], 0] = np.minimum(pts_warp[[1, 2], 0], pts_im[[1, 2], 0] - delta) + pts_warp[[2, 3], 1] = np.minimum(pts_warp[[2, 3], 1], pts_im[[2, 3], 1] - delta) + pts_warp = np.float32(pts_warp) + + M = cv2.getPerspectiveTransform(pts_im, pts_warp) + res = np.array(cv2.warpPerspective(im, M, (cols, rows), borderValue=cval)) + + return res + + +def noise_salt_pepper(image, percentage=0.001, severity=1): + """ Salt and pepper noise, percentage represents the percentage of salt and pepper noise""" + percentage *= severity + amount = int(percentage * image.shape[0] * image.shape[1]) + if amount == 0: + return image + _, _, deep = image.shape + # Salt mode + coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]] + salt = nprandint(200, 255, amount) + salt = salt.repeat(deep, axis=0) + image[coords[0], coords[1], :] = salt.reshape(amount, deep) + + # pepper mode + coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]] + pepper = nprandint(0, 50, amount) + pepper = pepper.repeat(deep, axis=0) + image[coords[0], coords[1], :] = pepper.reshape(amount, deep) + return image + + +def noise_gaussian(im, sigma=20, severity=1): + """ add Gaussian noise""" + sigma = sigma * abs(severity) + return cvt_uint8(np.float32(im) + sigma * np.random.randn(*im.shape)) + + +def noise_gamma(im, extend=30, severity=1): + """ add gamma noise """ + s = int(extend * abs(severity)) + n = np.random.gamma(shape=2, scale=s, size=im.shape) + n = n - np.mean(n) + im = cvt_uint8(np.float32(im) + n) + return im + + +def noise_speckle(img, extend=40, severity=1): + """ this creates larger 'blotches' of noise which look + more realistic than just adding gaussian noise """ + severity = abs(severity) * extend + blur = filters.gaussian_filter(np.random.randn(*img.shape) * severity, 1) + return cvt_uint8(img + blur) + + +def noise_blur(im, severity=1): + """add blur by shrinking an image and then enlarging to original size""" + severity = abs(severity) + f = 1 - 0.2 * severity + h, w = im.shape[:2] + hmin = 19.0 + f = max(f, hmin / h) + im = cv2.resize(im, None, fx=f, fy=f) + return np.array(cv2.resize(im, (w, h))) + + +def add_noise(img): + """combine noises in np array""" + img0 = img + if lucky(0.1): + img = noise_salt_pepper(img, uniform(0.3, 0.6)) + if lucky(0.2): + img = noise_gaussian(img, uniform(0.3, 0.6)) + if lucky(0.5): + img = noise_blur(img, uniform(0.3, 0.6)) + if lucky(0.5): + img = noise_speckle(img, uniform(0.3, 0.6)) + if lucky(0.3): + img = img // 2 + img0 // 2 + return img + + +def gaussian_blur(im, sigma=1, kernel_size=None, severity=1): + """Gaussian blur, if kernel_size is passed in, severity will be invalid""" + if kernel_size is None: + step = 11 + kernel_size = int(step * severity) + if kernel_size < 3.0: + return im + if kernel_size % 2 == 0: + kernel_size -= 1 + return np.array(cv2.GaussianBlur(im, (kernel_size, kernel_size), sigma)) + + +def rotate_shrink(im, max_angle=6, severity=0.5, cval=255): + """rotate about center, shrink to keep the same size without cropping image""" + max_angle = int(abs(severity) * max_angle) + angle = randint(-max_angle, max_angle) + h, w = im.shape[:2] + rangle = np.deg2rad(angle) # angle in radians + # now calculate new image width and height + nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w) + nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w) + scale = min(w / nw, h / nh) + mat = cv2.getRotationMatrix2D((w // 2, h // 2), angle, scale) + im = cv2.warpAffine(im, mat, (w, h), borderValue=cval) + return np.array(im) + + +def rotate_about_center(im, angle=4, scale=1, b_mode=None, cval=None, severity=1): + """For the rotation effect, it is recommended to make b_mode not equal to None for color images, so that the + filling will copy the edge pixel filling """ + angle = severity * angle + if angle == 0: + return im + w = im.shape[1] + h = im.shape[0] + rangle = np.deg2rad(angle) # angle in radians + # now calculate new image width and height + nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale + nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale + # ask OpenCV for the rotation matrix + rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) + # calculate the move from the old center to the new center combined + # with the rotation + rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) + # the move only affects the translation, so update the translation + # part of the transform + rot_mat[0, 2] += rot_move[0] + rot_mat[1, 2] += rot_move[1] + + if cval is None: + cval = [0] * im.shape[2] + elif isinstance(cval, (int, float)): + cval = [cval] * im.shape[2] + + if b_mode is None: + src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4, + borderMode=cv2.BORDER_CONSTANT, borderValue=cval) + else: + src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4, + borderMode=cv2.BORDER_REPLICATE) + return np.array(src) + + +def randcrop(img, max_per=0.15, severity=1): + """Random crop""" + perc = max_per * severity + rows, cols = img.shape[:2] + k = int(rows * cols * perc / (rows + cols)) + roi = img[randint(0, k):rows - randint(0, k), randint(0, k):cols - randint(0, k)] + return np.array(roi) + + +def enhance_sharpness(img, r=None, severity=1): + """ + adjust the sharpness of an image. An + enhancement factor of 0.0 gives a blurred image, a factor of 1.0 gives the + original image, and a factor of 2.0 gives a sharpened image. + """ + if r is None: + severity = abs(severity) + r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) + img = Image.fromarray(img) + img = np.array(ImageEnhance.Sharpness(img).enhance(r)) + + return img + + +def enhance_contrast(img, r=None, severity=1): + """ + control the contrast of an image, similar + to the contrast control on a TV set. An enhancement factor of 0.0 + gives a solid grey image. A factor of 1.0 gives the original image. + """ + if r is None: + severity = abs(severity) + r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) + img = Image.fromarray(img) + img = np.array(ImageEnhance.Contrast(img).enhance(r)) + + return img + + +def enhance_brightness(img, r=None, severity=1): + """ + control the brightness of an image. An + enhancement factor of 0.0 gives a black image. A factor of 1.0 gives the + original image. + """ + + if r is None: + severity = abs(severity) + r = uniform(1 - 0.2 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity * 0.5) + img = Image.fromarray(img) + img = np.array(ImageEnhance.Brightness(img).enhance(r)) + + return img + + +def enhance_color(img, r=None, severity=1): + """ + adjust the colour balance of an image, in + a manner similar to the controls on a colour TV set. An enhancement + factor of 0.0 gives a black and white image. A factor of 1.0 gives + the original image. + """ + if r is None: + severity = abs(severity) + r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) + + img = Image.fromarray(img) + img = np.array(ImageEnhance.Color(img).enhance(r)) + + return img + + +def enhance(img): + """combine image enhancement in the Image type, reduce conversions to np array""" + if lucky(0.3): + img = enhance_sharpness(img) + if lucky(0.3): + img = enhance_contrast(img) + if lucky(0.3): + img = enhance_brightness(img) + return np.array(img) + + +def draw_line(im): + """draw a line randomly""" + h, w = im.shape[:2] + p1 = (randint(0, w // 3), randint(0, h - 1)) # from left 1/3 + p2 = (randint(w // 3 * 2, w - 1), randint(0, h - 1)) # to right 1/3 + color = [randint(0, 255) for i in range(3)] + lw = lucky_choice((1, 2), (0.8, 0.2)) + cv2.line(im, p1, p2, color, lw, cv2.LINE_AA) + return np.array(im) + + +def center_im(im_outter, im_inner, shrink=True, vertical='center'): + """center an image in a container image. `im_outter` can be the shape of it""" + if not isinstance(im_outter, np.ndarray): + shape = tuple(im_outter) + if im_inner.ndim > len(shape): + shape += im_inner.shape[len(shape):] + im_outter = np.zeros(shape, np.uint8) + + H, W = im_outter.shape[:2] + h, w = im_inner.shape[:2] + if h > H or w > W: + if shrink: + rate = min(H / h, W / w) + im_inner = cv2.resize(im_inner, rate) + im_inner = im_inner[:H, :W] + h, w = im_inner.shape[:2] + + vertical = vertical.lower() + if vertical == 'center': + dh = (H - h) // 2 + elif vertical == 'top': + dh = 0 + elif vertical == 'bottom': + dh = H - h + + im = im_outter.copy() + dw = (W - w) // 2 + im[dh:dh + h, dw:dw + w] = im_inner + return np.array(im) + + +def enhance_light(img): + """combine image enhancement in the Image type, reduce conversions to np array""" + if lucky(0.3): + img = enhance_sharpness(img, uniform(0.5, 1.5)) + if lucky(0.3): + img = enhance_contrast(img, uniform(0.7, 1.3)) + if lucky(0.3): + img = enhance_brightness(img, uniform(0.85, 1.15)) + return np.array(img) + + +def gaussian2d(w, h): + """The two-dimensional Gaussian distribution effect is actually an ellipse""" + h = h // 2 + w = w // 2 + x = np.arange(-w, w) + y = np.arange(-h, h) + x, y = np.meshgrid(x, y) + mean_x = np.mean(x) + mean_y = np.mean(y) + std_x = np.std(x) + std_y = np.std(y) + z = np.exp( + -((y - mean_y) ** 2 / (std_y ** 2) + (x - mean_x) ** 2 / (std_x ** 2)) / 2 + ) + z /= (np.sqrt(2 * np.pi) * std_y) + z *= 1 / (np.max(z) - np.min(z)) + return z + + +def add_stain(img, theta=200, severity=0.5, bright_spot=False, iteration=1): + """Generate black stains or white bright spots""" + + for _ in range(0, iteration): + img = np.float32(img) + theta = theta * abs(severity) + cols_big, rows_big = img.shape[:2] + temp = min([cols_big, rows_big]) + + if temp < 80: + temp = 80 + if temp > 300: + temp = 300 + + if not bright_spot: + gaussian_img = gaussian2d(randint(temp // 3, temp // 2), randint(temp // 3, temp // 2)) * theta + else: + gaussian_img = gaussian2d(randint(temp // 1.5, int(temp / 0.8)), + randint(temp // 1.5, int(temp / 0.8))) + + cols_small, rows_small = gaussian_img.shape[:2] + tmp_min = int(min(cols_small, rows_small)) + # 对椭圆效果做大幅度扭曲,cval最好不要过大。 + gaussian_img = rdistort(gaussian_img, randint(tmp_min // 10, tmp_min // 6), cval=0) + x1 = randint(0, rows_big - 5 if rows_big - 5 > 0 else 0) + y1 = randint(0, cols_big - 5 if cols_big - 5 > 0 else 0) + + if y1 + cols_small > cols_big: + y2 = int(cols_big - 1) + else: + y2 = int(y1 + cols_small) + + if x1 + rows_small > rows_big: + x2 = int(rows_big - 1) + else: + x2 = int(x1 + rows_small) + + row, col = gaussian_img.shape + gaussian_img = gaussian_img.repeat(img.shape[2], axis=1) + gaussian_img = gaussian_img.reshape(row, col, img.shape[2]) + + gaussian_img = np.float32(gaussian_img[:(y2 - y1), :(x2 - x1)]) + if not bright_spot: + img[y1:y2, x1:x2] -= gaussian_img + else: + temp1 = min([np.median(gaussian_img), 255 - np.mean(img[y1:y2, x1:x2])]) + gaussian_img = np.clip(gaussian_img - temp1, 0, 255) + img[y1:y2, x1:x2] = np.clip(img[y1:y2, x1:x2] + gaussian_img, 0, 255) + img = cvt_uint8(img) + + return np.array(img) + + +def shift_color(im, delta_max=10, severity=0.5): + """randomly shift image color""" + if severity == 0: + return im + + delta_max = int(delta_max * severity) + if isinstance(delta_max, tuple): + delta_min, delta_max = delta_max + else: + delta_min = -delta_max + + im = np.float32(im) + delta = np.random.randint(delta_min, delta_max, (1, 1, im.shape[2])) + im += delta + + return np.array(cvt_uint8(im)) + + +def random_contrast(img, contrast_delta=0.3, bright_delta=0.1): + """randomly change image contrast and brightness""" + if isinstance(contrast_delta, tuple): + contrast_delta_min, contrast_delta = contrast_delta + else: + contrast_delta_min = -contrast_delta + if isinstance(bright_delta, tuple): + bright_delta_min, bright_delta = bright_delta + else: + bright_delta_min = -bright_delta + fc = 1 + uniform(contrast_delta_min, contrast_delta) + fb = 1 + uniform(bright_delta_min, bright_delta) + im = img.astype(np.float32) + if img.ndim == 2: + im = im[:, :, None] + mn = im.mean(axis=(0, 1), keepdims=True) + im = (im - mn) * fc + mn * fb + im = im.clip(0, 255).astype(np.uint8) + return np.array(im) + + +def period_map(xi, times, extent): + if times < 1: + return None + times = float(times) + theta = randint(extent, extent + 10) * choice([1, -1]) + + def back(x): + if x < times / 2.0: + # Here only the effect of a sin function is achieved, and more effects can be added later. + return theta * sin(pi * (3 / 2.0 + x / times)) # Monotonically increasing + return theta * sin(pi * (1 / 2.0 + x / times)) + + xi = np.fabs(xi) + xi = xi % times + yi = np.array(list(map(back, xi))) + return yi + + +def whole_rdistort(im, severity=1, scop=40): + """ + Using the affine projection method in skimg, + Realize the picture through the corresponding coordinate projection + Specifies the distortion effect of the form. This function will normalize 0-1 + """ + + if severity == 0: + return im + + theta = severity * scop + rows, cols = im.shape[:2] + colpoints = max(int(cols * severity * 0.05), 3) + rowpoints = max(int(rows * severity * 0.05), 3) + + src_cols = np.linspace(0, cols, colpoints) + src_rows = np.linspace(0, rows, rowpoints) + src_rows, src_cols = np.meshgrid(src_rows, src_cols) + src = np.dstack([src_cols.flat, src_rows.flat])[0] + + # The key location for wave distortion effect + dst_rows = src[:, 1] - period_map(np.linspace(0, 100, src.shape[0]), 50, 20) + + # dst columns + dst_cols = src[:, 0] - np.sin(np.linspace(0, 3 * np.pi, src.shape[0])) * theta + + dst = np.vstack([dst_cols, dst_rows]).T + tform = PiecewiseAffineTransform() + tform.estimate(src, dst) + image = warp(im, tform, mode='edge', output_shape=(rows, cols)) * 255 + return np.array(cvt_uint8(image)) + + +def lucky_choice(seq, ps=None, rand_func=random): + """randomly choose an element from `seq` according to their probability distribution `ps`""" + if not seq: + return None + if ps is None: + return choice(seq) + cumps = np.cumsum(ps) + r = rand_func() * cumps[-1] + idx = (cumps < r).sum() + idx = min(idx, len(seq) - 1) + return seq[idx] + + +def cvt_uint8(im): + """convert image type to `np.uint8`""" + if im.dtype == np.uint8: + return im + return np.round(im).clip(0, 255).astype(np.uint8) + + +def to_image(im): + """convert `im` to `Image` type""" + if not isinstance(im, Image.Image): + if im.ndim == 3: + im = im[:, :, ::-1] # reverse channels: BGR in cv2 to RGB in Image + im = Image.fromarray(im) + return im + + +def to_array(im): + """convert `im` to `np.array` type""" + if isinstance(im, Image.Image): + im = np.array(im) + if im.ndim == 3: + im = im[:, :, ::-1] # reverse channels: RGB in Image to BGR in cv2 + return im + + +def unify_img(img, img_height=64, max_length=512, img_channel=3): + color_fill = 255 + img_shape = img.shape + + img_width = int(float(img_shape[1]) / img_shape[0] * img_height) + img = cv2.resize(img, (img_width, img_height)) + if img_width > max_length: + img = img[:, 0:max_length] + else: + blank_img = np.zeros((img_height, max_length, img_channel), np.uint8) + # fill the image with white + blank_img.fill(color_fill) + blank_img[0:img_height, 0:img_width] = img + img = blank_img + return np.array(img) + + +def unify_img_label(img, label, img_height=64, max_length=512, min_length=192, img_channel=3): + color_fill = 255 + img_shape = img.shape + + img_width = int(float(img_shape[1]) / img_shape[0] * img_height) + img = cv2.resize(img, (img_width, img_height)) + if img_width > max_length: + img = img[:, 0:max_length] + else: + blank_img = np.zeros((img_height, max_length, img_channel), np.uint8) + # fill the image with white + blank_img.fill(color_fill) + blank_img[0:img_height, 0:img_width] = img + img = blank_img + + return np.array(img), label diff --git a/model_zoo/official/cv/cnn_direction_model/train.py b/model_zoo/official/cv/cnn_direction_model/train.py new file mode 100644 index 00000000000..633c90a6893 --- /dev/null +++ b/model_zoo/official/cv/cnn_direction_model/train.py @@ -0,0 +1,108 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train CNN direction model.""" +import argparse +import os +import random + +from src.cnn_direction_model import CNNDirectionModel +from src.config import config1 as config +from src.dataset import create_dataset_train + +import numpy as np + +import mindspore as ms +from mindspore import Tensor +from mindspore import context +from mindspore import dataset as de +from mindspore.communication.management import init +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits +from mindspore.nn.metrics import Accuracy +from mindspore.nn.optim.adam import Adam +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train.model import Model, ParallelMode +from mindspore.train.serialization import load_checkpoint, load_param_into_net + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') +parser.add_argument('--device_num', type=int, default=1, help='Device num.') + +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') +parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') + +args_opt = parser.parse_args() + +random.seed(11) +np.random.seed(11) +de.config.set_seed(11) +ms.common.set_seed(11) + +if __name__ == '__main__': + + target = args_opt.device_target + ckpt_save_dir = config.save_checkpoint_path + + # init context + device_id = int(os.getenv('DEVICE_ID', '0')) + rank_id = int(os.getenv('RANK_ID', '0')) + rank_size = int(os.getenv('RANK_SIZE', '1')) + run_distribute = rank_size > 1 + context.set_context(mode=context.GRAPH_MODE, + device_target="Ascend", + device_id=device_id, save_graphs=False) + + print("train args: ", args_opt, "\ncfg: ", config, + "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) + + if run_distribute: + context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL) + init() + + # create dataset + dataset = create_dataset_train(args_opt.dataset_path + "/ocr_pos.mindrecord0", config=config) + step_size = dataset.get_dataset_size() + + # define net + net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512]) + + # init weight + if args_opt.pre_trained: + param_dict = load_checkpoint(args_opt.pre_trained) + load_param_into_net(net, param_dict) + + lr = config.lr + lr = Tensor(lr, ms.float32) + + # define opt + opt = Adam(params=net.trainable_params(), learning_rate=lr, eps=1e-07) + + # define loss, model + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum") + + model = Model(net, loss_fn=loss, optimizer=opt, metrics={"Accuracy": Accuracy()}) + + # define callbacks + time_cb = TimeMonitor(data_size=step_size) + loss_cb = LossMonitor() + cb = [time_cb, loss_cb] + if config.save_checkpoint: + config_ck = CheckpointConfig(save_checkpoint_steps=2500, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck) + cb += [ckpt_cb] + + # train model + model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False)