forked from mindspore-Ecosystem/mindspore
76 lines
3.1 KiB
Python
Executable File
76 lines
3.1 KiB
Python
Executable File
# Copyright 2020 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ============================================================================
|
|
"""Generate train and test dataset"""
|
|
import os
|
|
import shutil
|
|
import math as m
|
|
import random
|
|
from multiprocessing import Process
|
|
from captcha.image import ImageCaptcha
|
|
|
|
|
|
def _generate_captcha_per_process(path, total, start, end, img_width, img_height, max_digits):
|
|
captcha = ImageCaptcha(width=img_width, height=img_height)
|
|
filename_head = '{:0>' + str(len(str(total))) + '}-'
|
|
for i in range(start, end):
|
|
digits = ''
|
|
digits_length = random.randint(1, max_digits)
|
|
for _ in range(0, digits_length):
|
|
integer = random.randint(0, 9)
|
|
digits += str(integer)
|
|
captcha.write(digits, os.path.join(path, filename_head.format(i) + digits + '.png'))
|
|
|
|
|
|
def generate_captcha(name, img_num, img_width, img_height, max_digits, process_num=16):
|
|
"""
|
|
generate captcha images
|
|
|
|
Args:
|
|
name(str): name of folder, under which captcha images are saved in
|
|
img_num(int): number of generated captcha images
|
|
img_width(int): width of generated captcha images
|
|
img_height(int): height of generated captcha images
|
|
max_digits(int): max number of digits in each captcha images. For each captcha images, number of digits is in
|
|
range [1,max_digits]
|
|
process_num(int): number of process to generate captcha images, default is 16
|
|
"""
|
|
cur_script_path = os.path.dirname(os.path.realpath(__file__))
|
|
path_data = os.path.join(cur_script_path, "data")
|
|
if not os.path.exists(path_data):
|
|
os.mkdir(path_data)
|
|
path = os.path.join(path_data, name)
|
|
print("Generating dataset [{}] under {}...".format(name, path))
|
|
if os.path.exists(path):
|
|
shutil.rmtree(path)
|
|
os.mkdir(path)
|
|
img_num_per_thread = m.ceil(img_num / process_num)
|
|
|
|
processes = []
|
|
for i in range(process_num):
|
|
start = i * img_num_per_thread
|
|
end = start + img_num_per_thread if i != (process_num - 1) else img_num
|
|
p = Process(target=_generate_captcha_per_process,
|
|
args=(path, img_num, start, end, img_width, img_height, max_digits))
|
|
p.start()
|
|
processes.append(p)
|
|
for p in processes:
|
|
p.join()
|
|
print("Generating dataset [{}] finished, total number is {}!".format(name, img_num))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
generate_captcha("test", img_num=10000, img_width=160, img_height=64, max_digits=4)
|
|
generate_captcha("train", img_num=50000, img_width=160, img_height=64, max_digits=4)
|