cnn direction model
This commit is contained in:
parent
f6450a614b
commit
830b8f3e93
|
@ -0,0 +1,69 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""train resnet."""
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
from src.cnn_direction_model import CNNDirectionModel
|
||||
from src.config import config1 as config
|
||||
from src.dataset import create_dataset_eval
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import dataset as de
|
||||
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
|
||||
parser = argparse.ArgumentParser(description='Image classification')
|
||||
|
||||
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
|
||||
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
random.seed(1)
|
||||
np.random.seed(1)
|
||||
de.config.set_seed(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# init context
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
|
||||
device_id = int(os.getenv('DEVICE_ID'))
|
||||
context.set_context(device_id=device_id)
|
||||
|
||||
# create dataset
|
||||
dataset = create_dataset_eval(args_opt.dataset_path + "/ocr_eval_pos.mindrecord", config=config)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
print("step_size ", step_size)
|
||||
|
||||
# define net
|
||||
net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512])
|
||||
|
||||
# load checkpoint
|
||||
param_dict = load_checkpoint(args_opt.checkpoint_path)
|
||||
load_param_into_net(net, param_dict)
|
||||
net.set_train(False)
|
||||
|
||||
# define loss, model
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum")
|
||||
|
||||
# define model
|
||||
model = Model(net, loss_fn=loss, metrics={'top_1_accuracy'})
|
||||
|
||||
# eval model
|
||||
res = model.eval(dataset, dataset_sink_mode=False)
|
||||
print("result:", res, "ckpt=", args_opt.checkpoint_path)
|
|
@ -0,0 +1,5 @@
|
|||
mindspore
|
||||
numpy
|
||||
Pillow
|
||||
python-opencv
|
||||
scikit-image
|
|
@ -0,0 +1,88 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
if [ $# != 2 ] && [ $# != 3 ]
|
||||
then
|
||||
echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
PATH3=$(get_real_path $3)
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH1 ]
|
||||
then
|
||||
echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d $PATH2 ]
|
||||
then
|
||||
echo "error: DATASET_PATH=$PATH2 is not a directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $# == 3 ] && [ ! -f $PATH3 ]
|
||||
then
|
||||
echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=8
|
||||
export RANK_SIZE=8
|
||||
export RANK_TABLE_FILE=$PATH1
|
||||
|
||||
export SERVER_ID=0
|
||||
rank_start=$((DEVICE_NUM * SERVER_ID))
|
||||
|
||||
for((i=0; i<${DEVICE_NUM}; i++))
|
||||
do
|
||||
export DEVICE_ID=$i
|
||||
export RANK_ID=$((rank_start + i))
|
||||
rm -rf ./train_parallel$i
|
||||
mkdir ./train_parallel$i
|
||||
cp ../*.py ./train_parallel$i
|
||||
cp *.sh ./train_parallel$i
|
||||
cp -r ../src ./train_parallel$i
|
||||
cd ./train_parallel$i || exit
|
||||
echo "start training for rank $RANK_ID, device $DEVICE_ID"
|
||||
env > env.log
|
||||
|
||||
if [ $# == 2 ]
|
||||
then
|
||||
python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
|
||||
fi
|
||||
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log &
|
||||
fi
|
||||
|
||||
cd ..
|
||||
done
|
|
@ -0,0 +1,62 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 2 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=1
|
||||
export DEVICE_ID=4
|
||||
export RANK_ID=0
|
||||
export RANK_SIZE=1
|
||||
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
|
||||
if [ ! -f $PATH2 ]
|
||||
then
|
||||
echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -d "eval" ];
|
||||
then
|
||||
rm -rf ./eval
|
||||
fi
|
||||
|
||||
mkdir ./eval
|
||||
cp ../*.py ./eval
|
||||
cp *.sh ./eval
|
||||
cp -r ../src ./eval
|
||||
cd ./eval || exit
|
||||
echo "start evaluation for device $DEVICE_ID"
|
||||
env > env.log
|
||||
|
||||
python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 #&> log &
|
||||
|
||||
cd ..
|
|
@ -0,0 +1,72 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 1 ] && [ $# != 2 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=1
|
||||
export DEVICE_ID=3
|
||||
export RANK_ID=0
|
||||
export RANK_SIZE=1
|
||||
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
|
||||
if [ $# == 2 ]
|
||||
then
|
||||
PATH2=$(get_real_path $2)
|
||||
fi
|
||||
|
||||
if [ $# == 2 ] && [ ! -f $PATH2 ]
|
||||
then
|
||||
echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -d "train" ];
|
||||
then
|
||||
rm -rf ./train
|
||||
fi
|
||||
mkdir ./train
|
||||
cp ../*.py ./train
|
||||
cp *.sh ./train
|
||||
cp -r ../src ./train
|
||||
cd ./train || exit
|
||||
echo "start training for device $DEVICE_ID"
|
||||
env > env.log
|
||||
if [ $# == 1 ]
|
||||
then
|
||||
python train.py --dataset_path=$PATH1 &> log &
|
||||
fi
|
||||
|
||||
if [ $# == 2 ]
|
||||
then
|
||||
python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log &
|
||||
fi
|
||||
|
||||
cd ..
|
|
@ -0,0 +1,264 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""CNN direction model."""
|
||||
import math
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore.common.initializer import Uniform
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetAddN(nn.Cell):
|
||||
"""
|
||||
Computes addition of all input tensors element-wise.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(NetAddN, self).__init__()
|
||||
self.addN = P.AddN()
|
||||
|
||||
def construct(self, *z):
|
||||
return self.addN(z)
|
||||
|
||||
|
||||
class Conv(nn.Cell):
|
||||
"""
|
||||
A convolution layer
|
||||
|
||||
Args:
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
kernel (tuple): Size of the kernel. Default: (3, 3).
|
||||
dilate (bool): If set to true a second convolution layer is added. Default: True.
|
||||
act (string): The activation function. Default: 'relu'.
|
||||
mp (int): Size of max pooling layer. Default: None.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> Conv(3, 64)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channel,
|
||||
out_channel,
|
||||
kernel=(3, 3),
|
||||
dilate=True,
|
||||
act='relu',
|
||||
mp=None):
|
||||
super(Conv, self).__init__()
|
||||
self.in_channel = in_channel
|
||||
self.out_channel = out_channel
|
||||
self.kernel = kernel
|
||||
self.dilate = dilate
|
||||
self.act = act
|
||||
self.mp = mp
|
||||
|
||||
self.conv1 = nn.Conv2d(self.in_channel, self.out_channel, kernel_size=self.kernel, pad_mode="same",
|
||||
weight_init='he_normal')
|
||||
|
||||
self.batch_norm1 = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99,
|
||||
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
if self.dilate:
|
||||
self.dilate_relu = P.ReLU()
|
||||
self.dilate_conv = nn.Conv2d(self.out_channel, self.out_channel, kernel_size=self.kernel,
|
||||
dilation=(2, 2), pad_mode='same', weight_init='he_normal')
|
||||
|
||||
self.dilate_batch_norm = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99,
|
||||
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
self.dilate_add = NetAddN()
|
||||
|
||||
if self.act == 'relu':
|
||||
self.act_layer = P.ReLU()
|
||||
|
||||
if self.mp is not None:
|
||||
self.mp_layer = nn.MaxPool2d(kernel_size=self.mp, stride=self.mp, pad_mode='valid')
|
||||
|
||||
def construct(self, x):
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.batch_norm1(out)
|
||||
out1 = out
|
||||
|
||||
if self.dilate:
|
||||
out = self.dilate_relu(out)
|
||||
out = self.dilate_conv(out)
|
||||
out = self.dilate_batch_norm(out)
|
||||
out = self.dilate_add(out1, out)
|
||||
|
||||
if self.act == 'relu':
|
||||
out = self.act_layer(out)
|
||||
|
||||
if self.mp is not None:
|
||||
out = self.mp_layer(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Block(nn.Cell):
|
||||
"""
|
||||
A Block of convolution operations.
|
||||
|
||||
Args:
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> Block(3, 64)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channel,
|
||||
out_channel):
|
||||
super(Block, self).__init__()
|
||||
self.conv1 = Conv(in_channel, out_channel, act='relu')
|
||||
self.conv2 = Conv(out_channel, out_channel, act=None)
|
||||
self.add = NetAddN()
|
||||
self.relu = P.ReLU()
|
||||
|
||||
def construct(self, x):
|
||||
y = self.conv1(x)
|
||||
y = self.conv2(y)
|
||||
out = self.add(x, y)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResidualBlock(nn.Cell):
|
||||
"""
|
||||
A residual block.
|
||||
|
||||
Args:
|
||||
block (Block) : The building block.
|
||||
num_blocks (int): Number of blocks.
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
mp (int) : Size of the max pooling layer. Default: 2.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResidualBlock(Block, 1, 3, 64)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
block,
|
||||
num_blocks,
|
||||
in_channel,
|
||||
out_channel,
|
||||
mp=2):
|
||||
|
||||
super(ResidualBlock, self).__init__()
|
||||
self.num_blocks = num_blocks
|
||||
self.in_channel = in_channel
|
||||
self.out_channel = out_channel
|
||||
self.mp = mp
|
||||
self.conv1 = Conv(self.in_channel, self.out_channel, kernel=(3, 3), dilate=False)
|
||||
|
||||
layers = []
|
||||
for _ in range(self.num_blocks):
|
||||
res_block = block(out_channel, out_channel)
|
||||
layers.append(res_block)
|
||||
|
||||
self.layer = nn.SequentialCell(layers)
|
||||
|
||||
if mp is not None:
|
||||
self.max_pool = nn.MaxPool2d(kernel_size=mp, stride=mp, pad_mode='valid')
|
||||
|
||||
def construct(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.layer(out)
|
||||
if self.mp is not None:
|
||||
out = self.max_pool(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class CNNDirectionModel(nn.Cell):
|
||||
"""
|
||||
CNN direction model.
|
||||
|
||||
Args:
|
||||
in_channels (list): List of the dimesnions of the input channels. The first element is the input dimension
|
||||
of the first Conv layer, and the rest of the elements are the input dimensions of the residual blocks,
|
||||
in order.
|
||||
out_channels (list): List of the dimesnions of the output channels. The first element is the ourpur dimension
|
||||
of the first Conv layer, and the rest of the elements are the output dimensions of the residual blocks, in order.
|
||||
dense_layers (list): Dimensions of the dense layers, inorder.
|
||||
image_size (list): Size of the input images.
|
||||
num_classes (int): Number of classes. Default: 2 for binary classification.
|
||||
|
||||
Returns: Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512] )
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
dense_layers,
|
||||
image_size,
|
||||
num_classes=2
|
||||
):
|
||||
super(CNNDirectionModel, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.image_h = image_size[0]
|
||||
self.image_w = image_size[1]
|
||||
self.conv1 = Conv(in_channels[0], out_channels[0], kernel=(7, 7), dilate=False, mp=2)
|
||||
self.residual_block1 = ResidualBlock(Block, 1, in_channels[1], out_channels[1])
|
||||
self.residual_block2 = ResidualBlock(Block, 1, in_channels[2], out_channels[2])
|
||||
self.residual_block3 = ResidualBlock(Block, 2, in_channels[3], out_channels[3])
|
||||
self.residual_block4 = ResidualBlock(Block, 1, in_channels[4], out_channels[4])
|
||||
|
||||
# 5 previous layers have mp=2. Height and width of the image would become 1/32.
|
||||
self.avg_pool = nn.AvgPool2d(kernel_size=(int(self.image_h / 32), int(self.image_w / 32)))
|
||||
|
||||
# sqrt(6 / (fan_in + fan_out))
|
||||
scale = math.sqrt(6 / (out_channels[-1] + dense_layers[0]))
|
||||
# weight_init='glorot_uniform'
|
||||
self.dense1 = nn.Dense(out_channels[-1], dense_layers[0], weight_init=Uniform(scale=scale), activation='relu')
|
||||
|
||||
scale = math.sqrt(6 / (dense_layers[0] + dense_layers[1]))
|
||||
self.dense2 = nn.Dense(dense_layers[0], dense_layers[1], weight_init=Uniform(scale=scale), activation='relu')
|
||||
|
||||
scale = math.sqrt(6 / (dense_layers[1] + num_classes))
|
||||
self.dense3 = nn.Dense(dense_layers[1], num_classes, weight_init=Uniform(scale=scale), activation='softmax')
|
||||
|
||||
def construct(self, x):
|
||||
out = self.conv1(x)
|
||||
|
||||
out = self.residual_block1(out)
|
||||
|
||||
out = self.residual_block2(out)
|
||||
out = self.residual_block3(out)
|
||||
out = self.residual_block4(out)
|
||||
|
||||
out = self.avg_pool(out)
|
||||
|
||||
out = P.Reshape()(out, (out.shape[0], out.shape[1]))
|
||||
|
||||
out = self.dense1(out)
|
||||
out = self.dense2(out)
|
||||
out = self.dense3(out)
|
||||
|
||||
return out
|
|
@ -0,0 +1,37 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
network config setting, will be used in train.py and eval.py
|
||||
"""
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
config1 = ed({
|
||||
"batch_size": 8,
|
||||
"epoch_size": 5,
|
||||
"pretrain_epoch_size": 0,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 10,
|
||||
"keep_checkpoint_max": 20,
|
||||
"save_checkpoint_path": "./",
|
||||
"warmup_epochs": 5,
|
||||
"lr_decay_mode": "poly",
|
||||
"lr": 1e-4,
|
||||
"work_nums": 4,
|
||||
"im_size_w": 512,
|
||||
"im_size_h": 64,
|
||||
"pos_samples_size": 100,
|
||||
"augment_severity": 0.1,
|
||||
"augment_prob": 0.3
|
||||
})
|
|
@ -0,0 +1,246 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Data operations, will be used in train.py and eval.py
|
||||
"""
|
||||
import os
|
||||
|
||||
import mindspore.dataset.engine as de
|
||||
import mindspore.dataset.vision.c_transforms as C
|
||||
from src.dataset_utils import lucky, noise_blur, noise_speckle, noise_gamma, noise_gaussian, noise_salt_pepper, \
|
||||
shift_color, enhance_brightness, enhance_sharpness, enhance_contrast, enhance_color, gaussian_blur, \
|
||||
randcrop, resize, rdistort, rgeometry, rotate_about_center, whole_rdistort, warp_perspective, random_contrast, \
|
||||
unify_img_label
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
cv2.setNumThreads(0)
|
||||
|
||||
image_height = None
|
||||
image_width = None
|
||||
|
||||
|
||||
class Augmentor():
|
||||
"""
|
||||
Augment image with random noise and transformation
|
||||
|
||||
Controlled by severity level [0, 1]
|
||||
|
||||
Usage:
|
||||
augmentor = Augmentor(severity=0.3,
|
||||
prob=0.5,
|
||||
enable_transform=True,
|
||||
enable_crop=False)
|
||||
image_new = augmentor.process(image)
|
||||
"""
|
||||
|
||||
def __init__(self, severity, prob, enable_transform=True, enable_crop=False):
|
||||
"""
|
||||
severity: in [0, 1], from min to max level of noise/transformation
|
||||
prob: in [0, 1], probability to apply each operator
|
||||
enable_transform: enable all transformation operators
|
||||
enable_crop: enable crop operator
|
||||
"""
|
||||
self.severity = np.clip(severity, 0, 1)
|
||||
self.prob = np.clip(prob, 0, 1)
|
||||
self.enable_transform = enable_transform
|
||||
self.enable_crop = enable_crop
|
||||
|
||||
def add_noise(self, im):
|
||||
"""randomly add noise to image"""
|
||||
|
||||
severity = self.severity
|
||||
prob = self.prob
|
||||
|
||||
if lucky(prob):
|
||||
im = noise_gamma(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = noise_blur(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = noise_gaussian(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = noise_salt_pepper(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = shift_color(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = gaussian_blur(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = noise_speckle(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = enhance_sharpness(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = enhance_contrast(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = enhance_brightness(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = enhance_color(im, severity=severity)
|
||||
if lucky(prob):
|
||||
im = random_contrast(im)
|
||||
|
||||
return im
|
||||
|
||||
def convert_color(self, im, cval):
|
||||
if cval in ['median', 'md']:
|
||||
cval = np.median(im, axis=(0, 1)).astype(int)
|
||||
elif cval == 'mean':
|
||||
cval = np.mean(im, axis=(0, 1)).astype(int)
|
||||
if hasattr(cval, '__iter__'):
|
||||
cval = [int(i) for i in cval]
|
||||
else:
|
||||
cval = int(cval)
|
||||
return cval
|
||||
|
||||
def transform(self, im, cval=255, **kw):
|
||||
"""According to the parameters initialized by the class, deform the incoming image"""
|
||||
severity = self.severity
|
||||
prob = self.prob
|
||||
cval = self.convert_color(im, cval)
|
||||
if lucky(prob):
|
||||
# affine transform
|
||||
im = rgeometry(im, severity=severity, cval=cval)
|
||||
if lucky(prob):
|
||||
im = rdistort(im, severity=severity, cval=cval)
|
||||
if lucky(prob):
|
||||
im = warp_perspective(im, severity=severity, cval=cval)
|
||||
if lucky(prob):
|
||||
im = resize(im, fx=kw.get('fx'), fy=kw.get('fy'), severity=severity)
|
||||
if lucky(prob):
|
||||
im = rotate_about_center(im, severity=severity, cval=cval)
|
||||
if lucky(prob):
|
||||
# the overall distortion of the image.
|
||||
im = whole_rdistort(im, severity=severity)
|
||||
if lucky(prob) and self.enable_crop:
|
||||
# random crop
|
||||
im = randcrop(im, severity=severity)
|
||||
return im
|
||||
|
||||
def process(self, im, cval='median', **kw):
|
||||
""" Execute code according to the effect of initial setting, and support variable parameters"""
|
||||
if self.enable_transform:
|
||||
im = self.transform(im, cval=cval, **kw)
|
||||
im = self.add_noise(im)
|
||||
return im
|
||||
|
||||
|
||||
def rotate_and_set_neg(img, label):
|
||||
label = label - 1
|
||||
img_rotate = np.rot90(img)
|
||||
img_rotate = np.rot90(img_rotate)
|
||||
# return img_rotate, label
|
||||
return img_rotate, np.array(label).astype(np.int32)
|
||||
|
||||
|
||||
def rotate(img, label):
|
||||
img_rotate = np.rot90(img)
|
||||
img_rotate = np.rot90(img_rotate)
|
||||
return img_rotate, label
|
||||
|
||||
|
||||
def random_neg_with_rotate(img, label):
|
||||
if lucky(0.5):
|
||||
##50% of samples set to negative samples
|
||||
label = label - 1
|
||||
# rotate by 180 debgress
|
||||
img_rotate = np.rot90(img)
|
||||
img = np.rot90(img_rotate)
|
||||
return img, np.array(label).astype(np.int32)
|
||||
|
||||
|
||||
def transform_image(img, label):
|
||||
data = np.array([img[...]], np.float32)
|
||||
data = data / 127.5 - 1
|
||||
return data.transpose((0, 3, 1, 2))[0], label
|
||||
|
||||
|
||||
def create_dataset_train(mindrecord_file_pos, config):
|
||||
"""
|
||||
create a train dataset
|
||||
|
||||
Args:
|
||||
mindrecord_file_pos(string): mindrecord file for positive samples.
|
||||
config(dict): config of dataset.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
rank_size = int(os.getenv("RANK_SIZE", '1'))
|
||||
rank_id = int(os.getenv("RANK_ID", '0'))
|
||||
decode = C.Decode()
|
||||
|
||||
ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=4,
|
||||
num_shards=rank_size, shard_id=rank_id, shuffle=True)
|
||||
ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8)
|
||||
|
||||
augmentor = Augmentor(config.augment_severity, config.augment_prob)
|
||||
operation = augmentor.process
|
||||
ds = ds.map(operations=operation, input_columns=["image"],
|
||||
num_parallel_workers=1, python_multiprocessing=True)
|
||||
##randomly augment half of samples to be negative samples
|
||||
ds = ds.map(operations=[random_neg_with_rotate, unify_img_label, transform_image], input_columns=["image", "label"],
|
||||
num_parallel_workers=8, python_multiprocessing=True)
|
||||
##for training double the dataset to accoun for positive and negative
|
||||
ds = ds.repeat(2)
|
||||
|
||||
# apply batch operations
|
||||
ds = ds.batch(config.batch_size, drop_remainder=True)
|
||||
return ds
|
||||
|
||||
|
||||
def resize_image(img, label):
|
||||
color_fill = 255
|
||||
scale = image_height / img.shape[0]
|
||||
img = cv2.resize(img, None, fx=scale, fy=scale)
|
||||
if img.shape[1] > image_width:
|
||||
img = img[:, 0:image_width]
|
||||
else:
|
||||
blank_img = np.zeros((image_height, image_width, 3), np.uint8)
|
||||
# fill the image with white
|
||||
blank_img.fill(color_fill)
|
||||
blank_img[:image_height, :img.shape[1]] = img
|
||||
img = blank_img
|
||||
data = np.array([img[...]], np.float32)
|
||||
data = data / 127.5 - 1
|
||||
return data.transpose((0, 3, 1, 2))[0], label
|
||||
|
||||
|
||||
def create_dataset_eval(mindrecord_file_pos, config):
|
||||
"""
|
||||
create an eval dataset
|
||||
|
||||
Args:
|
||||
mindrecord_file_pos(string): mindrecord file for positive samples.
|
||||
config(dict): config of dataset.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
rank_size = int(os.getenv("RANK_SIZE", '1'))
|
||||
rank_id = int(os.getenv("RANK_ID", '0'))
|
||||
decode = C.Decode()
|
||||
|
||||
ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=1,
|
||||
num_shards=rank_size, shard_id=rank_id, shuffle=False)
|
||||
ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8)
|
||||
|
||||
global image_height
|
||||
global image_width
|
||||
image_height = config.im_size_h
|
||||
image_width = config.im_size_w
|
||||
ds = ds.map(operations=resize_image, input_columns=["image", "label"], num_parallel_workers=config.work_nums,
|
||||
python_multiprocessing=False)
|
||||
# apply batch operations
|
||||
ds = ds.batch(1, drop_remainder=True)
|
||||
|
||||
return ds
|
|
@ -0,0 +1,641 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from math import ceil, sin, pi
|
||||
from random import choice, random
|
||||
from random import randint, uniform
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
from PIL import ImageEnhance, Image
|
||||
from scipy.ndimage import filters, interpolation
|
||||
from scipy.ndimage.interpolation import map_coordinates
|
||||
from skimage.transform import PiecewiseAffineTransform, warp
|
||||
|
||||
nprandint = np.random.randint
|
||||
|
||||
|
||||
def lucky(p=0.3, rand_func=random):
|
||||
""" return True with probability p """
|
||||
return rand_func() < p
|
||||
|
||||
|
||||
def rgeometry(im, eps=0.04, delta=0.8, cval=None, severity=1):
|
||||
"""
|
||||
affine transform
|
||||
"""
|
||||
if severity == 0:
|
||||
return im
|
||||
|
||||
if cval is None:
|
||||
cval = [0] * im.shape[2]
|
||||
elif isinstance(cval, (float, int)):
|
||||
cval = [cval] * im.shape[2]
|
||||
|
||||
severity = abs(severity)
|
||||
eps = severity * eps
|
||||
delta = severity * delta
|
||||
m = np.array([[1 + eps * randn(), 0.0], [eps * randn(), 1.0 + eps * randn()]])
|
||||
c = np.array(im.shape[:2]) * 0.5
|
||||
d = c - np.dot(m, c) + np.array([randn() * delta, randn() * delta])
|
||||
|
||||
im = cv2.split(im)
|
||||
im = [interpolation.affine_transform(i, m, offset=d, order=1, mode='constant', cval=cval[e])
|
||||
for e, i in enumerate(im)]
|
||||
im = cv2.merge(im)
|
||||
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def rdistort(im, distort=4.0, dsigma=10.0, cval=None, severity=1):
|
||||
"""distort"""
|
||||
if severity == 0:
|
||||
return im
|
||||
|
||||
if cval is None:
|
||||
cval = [0] * im.shape[2]
|
||||
elif isinstance(cval, (float, int)):
|
||||
cval = [cval] * im.shape[2]
|
||||
|
||||
severity = abs(severity)
|
||||
distort = severity * distort
|
||||
dsigma = dsigma * (1 - severity)
|
||||
|
||||
h, w = im.shape[:2]
|
||||
hs, ws = randn(h, w), randn(h, w)
|
||||
hs = filters.gaussian_filter(hs, dsigma)
|
||||
ws = filters.gaussian_filter(ws, dsigma)
|
||||
hs *= distort / np.abs(hs).max()
|
||||
ws *= distort / np.abs(ws).max()
|
||||
# When "ij" is passed in, the first array determines the column, the second array determines the row, by default,
|
||||
# the first array determines the row, and the second array determines the column
|
||||
ch, cw = np.meshgrid(np.arange(h), np.arange(w), indexing='ij')
|
||||
coordinates = np.array([ch + hs, cw + ws])
|
||||
|
||||
im = cv2.split(im)
|
||||
im = [map_coordinates(img, coordinates, order=1, cval=cval[i]) for i, img in enumerate(im)]
|
||||
im = cv2.merge(im)
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def reverse_color(im):
|
||||
""" Pixel inversion """
|
||||
return 255 - im
|
||||
|
||||
|
||||
def resize(im, fx=None, fy=None, delta=0.3, severity=1):
|
||||
""" scaling in the two directions of width fx and height fy,
|
||||
If the zoom factor is not specified, the maximum change amount of 0.3 is randomly selected from 1 to 1"""
|
||||
|
||||
if fx is None:
|
||||
fx = 1 + delta * severity * uniform(-1, 1)
|
||||
if fy is None:
|
||||
fy = 1 + delta * severity * uniform(-1, 1)
|
||||
return np.array(cv2.resize(im, None, fx=fx, fy=fy))
|
||||
|
||||
|
||||
def warp_perspective(im, theta=20, delta=10, cval=0, severity=1):
|
||||
""" perspective mapping """
|
||||
if severity == 0:
|
||||
return im
|
||||
|
||||
if cval is None:
|
||||
cval = [0] * im.shape[2]
|
||||
elif isinstance(cval, (float, int)):
|
||||
cval = [cval] * im.shape[2]
|
||||
|
||||
delta = delta * severity
|
||||
rows, cols = im.shape[:2]
|
||||
pts_im = np.float32([[0, 0], [cols, 0], [cols, rows], [0, rows]])
|
||||
|
||||
# Distort randomly and constrain the scope of change
|
||||
pts_warp = pts_im + np.random.uniform(-1, 1, pts_im.shape) * theta * severity
|
||||
pts_warp = np.maximum(pts_warp, delta) # Constrain the change to the part >=3
|
||||
pts_warp[[1, 2], 0] = np.minimum(pts_warp[[1, 2], 0], pts_im[[1, 2], 0] - delta)
|
||||
pts_warp[[2, 3], 1] = np.minimum(pts_warp[[2, 3], 1], pts_im[[2, 3], 1] - delta)
|
||||
pts_warp = np.float32(pts_warp)
|
||||
|
||||
M = cv2.getPerspectiveTransform(pts_im, pts_warp)
|
||||
res = np.array(cv2.warpPerspective(im, M, (cols, rows), borderValue=cval))
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def noise_salt_pepper(image, percentage=0.001, severity=1):
|
||||
""" Salt and pepper noise, percentage represents the percentage of salt and pepper noise"""
|
||||
percentage *= severity
|
||||
amount = int(percentage * image.shape[0] * image.shape[1])
|
||||
if amount == 0:
|
||||
return image
|
||||
_, _, deep = image.shape
|
||||
# Salt mode
|
||||
coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]]
|
||||
salt = nprandint(200, 255, amount)
|
||||
salt = salt.repeat(deep, axis=0)
|
||||
image[coords[0], coords[1], :] = salt.reshape(amount, deep)
|
||||
|
||||
# pepper mode
|
||||
coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]]
|
||||
pepper = nprandint(0, 50, amount)
|
||||
pepper = pepper.repeat(deep, axis=0)
|
||||
image[coords[0], coords[1], :] = pepper.reshape(amount, deep)
|
||||
return image
|
||||
|
||||
|
||||
def noise_gaussian(im, sigma=20, severity=1):
|
||||
""" add Gaussian noise"""
|
||||
sigma = sigma * abs(severity)
|
||||
return cvt_uint8(np.float32(im) + sigma * np.random.randn(*im.shape))
|
||||
|
||||
|
||||
def noise_gamma(im, extend=30, severity=1):
|
||||
""" add gamma noise """
|
||||
s = int(extend * abs(severity))
|
||||
n = np.random.gamma(shape=2, scale=s, size=im.shape)
|
||||
n = n - np.mean(n)
|
||||
im = cvt_uint8(np.float32(im) + n)
|
||||
return im
|
||||
|
||||
|
||||
def noise_speckle(img, extend=40, severity=1):
|
||||
""" this creates larger 'blotches' of noise which look
|
||||
more realistic than just adding gaussian noise """
|
||||
severity = abs(severity) * extend
|
||||
blur = filters.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
|
||||
return cvt_uint8(img + blur)
|
||||
|
||||
|
||||
def noise_blur(im, severity=1):
|
||||
"""add blur by shrinking an image and then enlarging to original size"""
|
||||
severity = abs(severity)
|
||||
f = 1 - 0.2 * severity
|
||||
h, w = im.shape[:2]
|
||||
hmin = 19.0
|
||||
f = max(f, hmin / h)
|
||||
im = cv2.resize(im, None, fx=f, fy=f)
|
||||
return np.array(cv2.resize(im, (w, h)))
|
||||
|
||||
|
||||
def add_noise(img):
|
||||
"""combine noises in np array"""
|
||||
img0 = img
|
||||
if lucky(0.1):
|
||||
img = noise_salt_pepper(img, uniform(0.3, 0.6))
|
||||
if lucky(0.2):
|
||||
img = noise_gaussian(img, uniform(0.3, 0.6))
|
||||
if lucky(0.5):
|
||||
img = noise_blur(img, uniform(0.3, 0.6))
|
||||
if lucky(0.5):
|
||||
img = noise_speckle(img, uniform(0.3, 0.6))
|
||||
if lucky(0.3):
|
||||
img = img // 2 + img0 // 2
|
||||
return img
|
||||
|
||||
|
||||
def gaussian_blur(im, sigma=1, kernel_size=None, severity=1):
|
||||
"""Gaussian blur, if kernel_size is passed in, severity will be invalid"""
|
||||
if kernel_size is None:
|
||||
step = 11
|
||||
kernel_size = int(step * severity)
|
||||
if kernel_size < 3.0:
|
||||
return im
|
||||
if kernel_size % 2 == 0:
|
||||
kernel_size -= 1
|
||||
return np.array(cv2.GaussianBlur(im, (kernel_size, kernel_size), sigma))
|
||||
|
||||
|
||||
def rotate_shrink(im, max_angle=6, severity=0.5, cval=255):
|
||||
"""rotate about center, shrink to keep the same size without cropping image"""
|
||||
max_angle = int(abs(severity) * max_angle)
|
||||
angle = randint(-max_angle, max_angle)
|
||||
h, w = im.shape[:2]
|
||||
rangle = np.deg2rad(angle) # angle in radians
|
||||
# now calculate new image width and height
|
||||
nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)
|
||||
nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)
|
||||
scale = min(w / nw, h / nh)
|
||||
mat = cv2.getRotationMatrix2D((w // 2, h // 2), angle, scale)
|
||||
im = cv2.warpAffine(im, mat, (w, h), borderValue=cval)
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def rotate_about_center(im, angle=4, scale=1, b_mode=None, cval=None, severity=1):
|
||||
"""For the rotation effect, it is recommended to make b_mode not equal to None for color images, so that the
|
||||
filling will copy the edge pixel filling """
|
||||
angle = severity * angle
|
||||
if angle == 0:
|
||||
return im
|
||||
w = im.shape[1]
|
||||
h = im.shape[0]
|
||||
rangle = np.deg2rad(angle) # angle in radians
|
||||
# now calculate new image width and height
|
||||
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
|
||||
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
|
||||
# ask OpenCV for the rotation matrix
|
||||
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
|
||||
# calculate the move from the old center to the new center combined
|
||||
# with the rotation
|
||||
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
|
||||
# the move only affects the translation, so update the translation
|
||||
# part of the transform
|
||||
rot_mat[0, 2] += rot_move[0]
|
||||
rot_mat[1, 2] += rot_move[1]
|
||||
|
||||
if cval is None:
|
||||
cval = [0] * im.shape[2]
|
||||
elif isinstance(cval, (int, float)):
|
||||
cval = [cval] * im.shape[2]
|
||||
|
||||
if b_mode is None:
|
||||
src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4,
|
||||
borderMode=cv2.BORDER_CONSTANT, borderValue=cval)
|
||||
else:
|
||||
src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4,
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
return np.array(src)
|
||||
|
||||
|
||||
def randcrop(img, max_per=0.15, severity=1):
|
||||
"""Random crop"""
|
||||
perc = max_per * severity
|
||||
rows, cols = img.shape[:2]
|
||||
k = int(rows * cols * perc / (rows + cols))
|
||||
roi = img[randint(0, k):rows - randint(0, k), randint(0, k):cols - randint(0, k)]
|
||||
return np.array(roi)
|
||||
|
||||
|
||||
def enhance_sharpness(img, r=None, severity=1):
|
||||
"""
|
||||
adjust the sharpness of an image. An
|
||||
enhancement factor of 0.0 gives a blurred image, a factor of 1.0 gives the
|
||||
original image, and a factor of 2.0 gives a sharpened image.
|
||||
"""
|
||||
if r is None:
|
||||
severity = abs(severity)
|
||||
r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity)
|
||||
img = Image.fromarray(img)
|
||||
img = np.array(ImageEnhance.Sharpness(img).enhance(r))
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def enhance_contrast(img, r=None, severity=1):
|
||||
"""
|
||||
control the contrast of an image, similar
|
||||
to the contrast control on a TV set. An enhancement factor of 0.0
|
||||
gives a solid grey image. A factor of 1.0 gives the original image.
|
||||
"""
|
||||
if r is None:
|
||||
severity = abs(severity)
|
||||
r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity)
|
||||
img = Image.fromarray(img)
|
||||
img = np.array(ImageEnhance.Contrast(img).enhance(r))
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def enhance_brightness(img, r=None, severity=1):
|
||||
"""
|
||||
control the brightness of an image. An
|
||||
enhancement factor of 0.0 gives a black image. A factor of 1.0 gives the
|
||||
original image.
|
||||
"""
|
||||
|
||||
if r is None:
|
||||
severity = abs(severity)
|
||||
r = uniform(1 - 0.2 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity * 0.5)
|
||||
img = Image.fromarray(img)
|
||||
img = np.array(ImageEnhance.Brightness(img).enhance(r))
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def enhance_color(img, r=None, severity=1):
|
||||
"""
|
||||
adjust the colour balance of an image, in
|
||||
a manner similar to the controls on a colour TV set. An enhancement
|
||||
factor of 0.0 gives a black and white image. A factor of 1.0 gives
|
||||
the original image.
|
||||
"""
|
||||
if r is None:
|
||||
severity = abs(severity)
|
||||
r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity)
|
||||
|
||||
img = Image.fromarray(img)
|
||||
img = np.array(ImageEnhance.Color(img).enhance(r))
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def enhance(img):
|
||||
"""combine image enhancement in the Image type, reduce conversions to np array"""
|
||||
if lucky(0.3):
|
||||
img = enhance_sharpness(img)
|
||||
if lucky(0.3):
|
||||
img = enhance_contrast(img)
|
||||
if lucky(0.3):
|
||||
img = enhance_brightness(img)
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def draw_line(im):
|
||||
"""draw a line randomly"""
|
||||
h, w = im.shape[:2]
|
||||
p1 = (randint(0, w // 3), randint(0, h - 1)) # from left 1/3
|
||||
p2 = (randint(w // 3 * 2, w - 1), randint(0, h - 1)) # to right 1/3
|
||||
color = [randint(0, 255) for i in range(3)]
|
||||
lw = lucky_choice((1, 2), (0.8, 0.2))
|
||||
cv2.line(im, p1, p2, color, lw, cv2.LINE_AA)
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def center_im(im_outter, im_inner, shrink=True, vertical='center'):
|
||||
"""center an image in a container image. `im_outter` can be the shape of it"""
|
||||
if not isinstance(im_outter, np.ndarray):
|
||||
shape = tuple(im_outter)
|
||||
if im_inner.ndim > len(shape):
|
||||
shape += im_inner.shape[len(shape):]
|
||||
im_outter = np.zeros(shape, np.uint8)
|
||||
|
||||
H, W = im_outter.shape[:2]
|
||||
h, w = im_inner.shape[:2]
|
||||
if h > H or w > W:
|
||||
if shrink:
|
||||
rate = min(H / h, W / w)
|
||||
im_inner = cv2.resize(im_inner, rate)
|
||||
im_inner = im_inner[:H, :W]
|
||||
h, w = im_inner.shape[:2]
|
||||
|
||||
vertical = vertical.lower()
|
||||
if vertical == 'center':
|
||||
dh = (H - h) // 2
|
||||
elif vertical == 'top':
|
||||
dh = 0
|
||||
elif vertical == 'bottom':
|
||||
dh = H - h
|
||||
|
||||
im = im_outter.copy()
|
||||
dw = (W - w) // 2
|
||||
im[dh:dh + h, dw:dw + w] = im_inner
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def enhance_light(img):
|
||||
"""combine image enhancement in the Image type, reduce conversions to np array"""
|
||||
if lucky(0.3):
|
||||
img = enhance_sharpness(img, uniform(0.5, 1.5))
|
||||
if lucky(0.3):
|
||||
img = enhance_contrast(img, uniform(0.7, 1.3))
|
||||
if lucky(0.3):
|
||||
img = enhance_brightness(img, uniform(0.85, 1.15))
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def gaussian2d(w, h):
|
||||
"""The two-dimensional Gaussian distribution effect is actually an ellipse"""
|
||||
h = h // 2
|
||||
w = w // 2
|
||||
x = np.arange(-w, w)
|
||||
y = np.arange(-h, h)
|
||||
x, y = np.meshgrid(x, y)
|
||||
mean_x = np.mean(x)
|
||||
mean_y = np.mean(y)
|
||||
std_x = np.std(x)
|
||||
std_y = np.std(y)
|
||||
z = np.exp(
|
||||
-((y - mean_y) ** 2 / (std_y ** 2) + (x - mean_x) ** 2 / (std_x ** 2)) / 2
|
||||
)
|
||||
z /= (np.sqrt(2 * np.pi) * std_y)
|
||||
z *= 1 / (np.max(z) - np.min(z))
|
||||
return z
|
||||
|
||||
|
||||
def add_stain(img, theta=200, severity=0.5, bright_spot=False, iteration=1):
|
||||
"""Generate black stains or white bright spots"""
|
||||
|
||||
for _ in range(0, iteration):
|
||||
img = np.float32(img)
|
||||
theta = theta * abs(severity)
|
||||
cols_big, rows_big = img.shape[:2]
|
||||
temp = min([cols_big, rows_big])
|
||||
|
||||
if temp < 80:
|
||||
temp = 80
|
||||
if temp > 300:
|
||||
temp = 300
|
||||
|
||||
if not bright_spot:
|
||||
gaussian_img = gaussian2d(randint(temp // 3, temp // 2), randint(temp // 3, temp // 2)) * theta
|
||||
else:
|
||||
gaussian_img = gaussian2d(randint(temp // 1.5, int(temp / 0.8)),
|
||||
randint(temp // 1.5, int(temp / 0.8)))
|
||||
|
||||
cols_small, rows_small = gaussian_img.shape[:2]
|
||||
tmp_min = int(min(cols_small, rows_small))
|
||||
# 对椭圆效果做大幅度扭曲,cval最好不要过大。
|
||||
gaussian_img = rdistort(gaussian_img, randint(tmp_min // 10, tmp_min // 6), cval=0)
|
||||
x1 = randint(0, rows_big - 5 if rows_big - 5 > 0 else 0)
|
||||
y1 = randint(0, cols_big - 5 if cols_big - 5 > 0 else 0)
|
||||
|
||||
if y1 + cols_small > cols_big:
|
||||
y2 = int(cols_big - 1)
|
||||
else:
|
||||
y2 = int(y1 + cols_small)
|
||||
|
||||
if x1 + rows_small > rows_big:
|
||||
x2 = int(rows_big - 1)
|
||||
else:
|
||||
x2 = int(x1 + rows_small)
|
||||
|
||||
row, col = gaussian_img.shape
|
||||
gaussian_img = gaussian_img.repeat(img.shape[2], axis=1)
|
||||
gaussian_img = gaussian_img.reshape(row, col, img.shape[2])
|
||||
|
||||
gaussian_img = np.float32(gaussian_img[:(y2 - y1), :(x2 - x1)])
|
||||
if not bright_spot:
|
||||
img[y1:y2, x1:x2] -= gaussian_img
|
||||
else:
|
||||
temp1 = min([np.median(gaussian_img), 255 - np.mean(img[y1:y2, x1:x2])])
|
||||
gaussian_img = np.clip(gaussian_img - temp1, 0, 255)
|
||||
img[y1:y2, x1:x2] = np.clip(img[y1:y2, x1:x2] + gaussian_img, 0, 255)
|
||||
img = cvt_uint8(img)
|
||||
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def shift_color(im, delta_max=10, severity=0.5):
|
||||
"""randomly shift image color"""
|
||||
if severity == 0:
|
||||
return im
|
||||
|
||||
delta_max = int(delta_max * severity)
|
||||
if isinstance(delta_max, tuple):
|
||||
delta_min, delta_max = delta_max
|
||||
else:
|
||||
delta_min = -delta_max
|
||||
|
||||
im = np.float32(im)
|
||||
delta = np.random.randint(delta_min, delta_max, (1, 1, im.shape[2]))
|
||||
im += delta
|
||||
|
||||
return np.array(cvt_uint8(im))
|
||||
|
||||
|
||||
def random_contrast(img, contrast_delta=0.3, bright_delta=0.1):
|
||||
"""randomly change image contrast and brightness"""
|
||||
if isinstance(contrast_delta, tuple):
|
||||
contrast_delta_min, contrast_delta = contrast_delta
|
||||
else:
|
||||
contrast_delta_min = -contrast_delta
|
||||
if isinstance(bright_delta, tuple):
|
||||
bright_delta_min, bright_delta = bright_delta
|
||||
else:
|
||||
bright_delta_min = -bright_delta
|
||||
fc = 1 + uniform(contrast_delta_min, contrast_delta)
|
||||
fb = 1 + uniform(bright_delta_min, bright_delta)
|
||||
im = img.astype(np.float32)
|
||||
if img.ndim == 2:
|
||||
im = im[:, :, None]
|
||||
mn = im.mean(axis=(0, 1), keepdims=True)
|
||||
im = (im - mn) * fc + mn * fb
|
||||
im = im.clip(0, 255).astype(np.uint8)
|
||||
return np.array(im)
|
||||
|
||||
|
||||
def period_map(xi, times, extent):
|
||||
if times < 1:
|
||||
return None
|
||||
times = float(times)
|
||||
theta = randint(extent, extent + 10) * choice([1, -1])
|
||||
|
||||
def back(x):
|
||||
if x < times / 2.0:
|
||||
# Here only the effect of a sin function is achieved, and more effects can be added later.
|
||||
return theta * sin(pi * (3 / 2.0 + x / times)) # Monotonically increasing
|
||||
return theta * sin(pi * (1 / 2.0 + x / times))
|
||||
|
||||
xi = np.fabs(xi)
|
||||
xi = xi % times
|
||||
yi = np.array(list(map(back, xi)))
|
||||
return yi
|
||||
|
||||
|
||||
def whole_rdistort(im, severity=1, scop=40):
|
||||
"""
|
||||
Using the affine projection method in skimg,
|
||||
Realize the picture through the corresponding coordinate projection
|
||||
Specifies the distortion effect of the form. This function will normalize 0-1
|
||||
"""
|
||||
|
||||
if severity == 0:
|
||||
return im
|
||||
|
||||
theta = severity * scop
|
||||
rows, cols = im.shape[:2]
|
||||
colpoints = max(int(cols * severity * 0.05), 3)
|
||||
rowpoints = max(int(rows * severity * 0.05), 3)
|
||||
|
||||
src_cols = np.linspace(0, cols, colpoints)
|
||||
src_rows = np.linspace(0, rows, rowpoints)
|
||||
src_rows, src_cols = np.meshgrid(src_rows, src_cols)
|
||||
src = np.dstack([src_cols.flat, src_rows.flat])[0]
|
||||
|
||||
# The key location for wave distortion effect
|
||||
dst_rows = src[:, 1] - period_map(np.linspace(0, 100, src.shape[0]), 50, 20)
|
||||
|
||||
# dst columns
|
||||
dst_cols = src[:, 0] - np.sin(np.linspace(0, 3 * np.pi, src.shape[0])) * theta
|
||||
|
||||
dst = np.vstack([dst_cols, dst_rows]).T
|
||||
tform = PiecewiseAffineTransform()
|
||||
tform.estimate(src, dst)
|
||||
image = warp(im, tform, mode='edge', output_shape=(rows, cols)) * 255
|
||||
return np.array(cvt_uint8(image))
|
||||
|
||||
|
||||
def lucky_choice(seq, ps=None, rand_func=random):
|
||||
"""randomly choose an element from `seq` according to their probability distribution `ps`"""
|
||||
if not seq:
|
||||
return None
|
||||
if ps is None:
|
||||
return choice(seq)
|
||||
cumps = np.cumsum(ps)
|
||||
r = rand_func() * cumps[-1]
|
||||
idx = (cumps < r).sum()
|
||||
idx = min(idx, len(seq) - 1)
|
||||
return seq[idx]
|
||||
|
||||
|
||||
def cvt_uint8(im):
|
||||
"""convert image type to `np.uint8`"""
|
||||
if im.dtype == np.uint8:
|
||||
return im
|
||||
return np.round(im).clip(0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def to_image(im):
|
||||
"""convert `im` to `Image` type"""
|
||||
if not isinstance(im, Image.Image):
|
||||
if im.ndim == 3:
|
||||
im = im[:, :, ::-1] # reverse channels: BGR in cv2 to RGB in Image
|
||||
im = Image.fromarray(im)
|
||||
return im
|
||||
|
||||
|
||||
def to_array(im):
|
||||
"""convert `im` to `np.array` type"""
|
||||
if isinstance(im, Image.Image):
|
||||
im = np.array(im)
|
||||
if im.ndim == 3:
|
||||
im = im[:, :, ::-1] # reverse channels: RGB in Image to BGR in cv2
|
||||
return im
|
||||
|
||||
|
||||
def unify_img(img, img_height=64, max_length=512, img_channel=3):
|
||||
color_fill = 255
|
||||
img_shape = img.shape
|
||||
|
||||
img_width = int(float(img_shape[1]) / img_shape[0] * img_height)
|
||||
img = cv2.resize(img, (img_width, img_height))
|
||||
if img_width > max_length:
|
||||
img = img[:, 0:max_length]
|
||||
else:
|
||||
blank_img = np.zeros((img_height, max_length, img_channel), np.uint8)
|
||||
# fill the image with white
|
||||
blank_img.fill(color_fill)
|
||||
blank_img[0:img_height, 0:img_width] = img
|
||||
img = blank_img
|
||||
return np.array(img)
|
||||
|
||||
|
||||
def unify_img_label(img, label, img_height=64, max_length=512, min_length=192, img_channel=3):
|
||||
color_fill = 255
|
||||
img_shape = img.shape
|
||||
|
||||
img_width = int(float(img_shape[1]) / img_shape[0] * img_height)
|
||||
img = cv2.resize(img, (img_width, img_height))
|
||||
if img_width > max_length:
|
||||
img = img[:, 0:max_length]
|
||||
else:
|
||||
blank_img = np.zeros((img_height, max_length, img_channel), np.uint8)
|
||||
# fill the image with white
|
||||
blank_img.fill(color_fill)
|
||||
blank_img[0:img_height, 0:img_width] = img
|
||||
img = blank_img
|
||||
|
||||
return np.array(img), label
|
|
@ -0,0 +1,108 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""train CNN direction model."""
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
|
||||
from src.cnn_direction_model import CNNDirectionModel
|
||||
from src.config import config1 as config
|
||||
from src.dataset import create_dataset_train
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore import dataset as de
|
||||
from mindspore.communication.management import init
|
||||
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
||||
from mindspore.nn.metrics import Accuracy
|
||||
from mindspore.nn.optim.adam import Adam
|
||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||
from mindspore.train.model import Model, ParallelMode
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
|
||||
parser = argparse.ArgumentParser(description='Image classification')
|
||||
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
|
||||
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
|
||||
|
||||
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
|
||||
parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
|
||||
parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
|
||||
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
random.seed(11)
|
||||
np.random.seed(11)
|
||||
de.config.set_seed(11)
|
||||
ms.common.set_seed(11)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
target = args_opt.device_target
|
||||
ckpt_save_dir = config.save_checkpoint_path
|
||||
|
||||
# init context
|
||||
device_id = int(os.getenv('DEVICE_ID', '0'))
|
||||
rank_id = int(os.getenv('RANK_ID', '0'))
|
||||
rank_size = int(os.getenv('RANK_SIZE', '1'))
|
||||
run_distribute = rank_size > 1
|
||||
context.set_context(mode=context.GRAPH_MODE,
|
||||
device_target="Ascend",
|
||||
device_id=device_id, save_graphs=False)
|
||||
|
||||
print("train args: ", args_opt, "\ncfg: ", config,
|
||||
"\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))
|
||||
|
||||
if run_distribute:
|
||||
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL)
|
||||
init()
|
||||
|
||||
# create dataset
|
||||
dataset = create_dataset_train(args_opt.dataset_path + "/ocr_pos.mindrecord0", config=config)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# define net
|
||||
net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512])
|
||||
|
||||
# init weight
|
||||
if args_opt.pre_trained:
|
||||
param_dict = load_checkpoint(args_opt.pre_trained)
|
||||
load_param_into_net(net, param_dict)
|
||||
|
||||
lr = config.lr
|
||||
lr = Tensor(lr, ms.float32)
|
||||
|
||||
# define opt
|
||||
opt = Adam(params=net.trainable_params(), learning_rate=lr, eps=1e-07)
|
||||
|
||||
# define loss, model
|
||||
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum")
|
||||
|
||||
model = Model(net, loss_fn=loss, optimizer=opt, metrics={"Accuracy": Accuracy()})
|
||||
|
||||
# define callbacks
|
||||
time_cb = TimeMonitor(data_size=step_size)
|
||||
loss_cb = LossMonitor()
|
||||
cb = [time_cb, loss_cb]
|
||||
if config.save_checkpoint:
|
||||
config_ck = CheckpointConfig(save_checkpoint_steps=2500,
|
||||
keep_checkpoint_max=config.keep_checkpoint_max)
|
||||
ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck)
|
||||
cb += [ckpt_cb]
|
||||
|
||||
# train model
|
||||
model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False)
|
Loading…
Reference in New Issue