仅学习交流
其中中文字体为方正黑体,身份证号字体为OCR-B 10 BT.ttf
add_txt()中 size字体大小 draw_x, draw_y坐标 根据自己图片设置
import os
import cv2
import random
import numpy as np
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageFont
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
class Person(object):
def __init__(self, name, sex, national, years, month, day, address1, address2, id_card, public):
self.name = str(name),
self.sex = str(sex),
self.national = str(national),
self.years = str(years),
self.month = str(month),
self.day = str(day),
self.address1 = str(address1),
self.address2 = str(address2),
self.id_card = str(id_card),
self.public = str(public)
def first_name():
"""
功能 : 随机姓氏 百家姓
"""
with open(ROOT + 'first_name.txt', 'r', encoding='utf-8') as f:
first_name_list = [line.rstrip('\n') for line in f]
total_lines = len(first_name_list)
idx = random.randint(0, total_lines - 1)
return first_name_list[idx]
def name():
"""
功能 : 随机汉字txt 参考PaddleOCR自带的识别中文txt
"""
with open(ROOT + 'name.txt', 'r', encoding='utf-8') as f:
name_list = [line.rstrip('\n') for line in f]
total_lines = len(name_list)
idx = random.randint(0, total_lines - 1)
return name_list[idx]
def GBK2312():
"""
功能 : 随机生成一个汉字GBK 所有字都随机可使用这一函数
"""
head = random.randint(0xb0, 0xf7)
body = random.randint(0xa1, 0xf9) # 在head区号为55的那一块最后5个汉字是乱码,为了方便缩减下范围
val = f'{head:x}{body:x}'
st = bytes.fromhex(val).decode('gb2312')
return st
def second_name():
"""
功能 : 随机取数组中字符, 取到空字符则没有second_name '\u00B7'为新疆名字中间的·
"""
second_name_list = [name(), '', '\u00B7']
_rand = random.randint(0, 30)
if (_rand < 15):
n = 0
elif (_rand < 29):
n = 1
else:
n = 2
return second_name_list[n]
def last_name():
"""
功能 : 随机生成名字最后一位字
"""
return name()
def create_name():
"""
功能 : 随机生成名字
"""
name = first_name() + second_name() + last_name()
return name
def sex_word():
"""
功能 : 随机生成性别
"""
nums = random.randint(0, 1)
sex_list = ['男', '女']
return sex_list[nums]
def national_name():
"""
功能 : 随机生成民族
"""
with open(ROOT + 'nation.txt', 'r', encoding='utf-8') as f:
national_list = [line.rstrip('\n') for line in f]
total_lines = len(national_list)
idx = random.randint(0, total_lines - 1)
return national_list[idx]
def address_line1():
"""
功能 : 随机生成省,市,地址 可随机 可自制省市address.txt
"""
address_list = ''
with open(ROOT + 'address.txt', 'r', encoding='utf-8') as f:
address = [line.rstrip('\n') for line in f]
idx = random.randint(0, 327)
address_list = address[idx]
a = 11 - len(address_list) # 地址第一行11个字
for _ in range(a):
address_list += name()
return address_list
def address_line2():
"""
功能 : 随机生成地址
"""
address_list = ''
_rand = random.randint(5, 7)
for _ in range(_rand):
address_list += name()
return address_list
def random_id_card():
"""
功能 : 随机生成18位身份证ID
"""
num_str = ''
for _ in range(17): #
num_str += str(random.randint(0, 9))
_rand = random.randint(0, 10)
if _rand == 10:
num_str += 'X'
else:
num_str += str(random.randint(0, 9))
return num_str
def public():
"""
功能 : 国徽面随机生成机关
"""
public_list = ''
_rand = random.randint(3, 6)
for _ in range(_rand):
public_list += name()
public_list += '公安局'
return public_list
def to_str(per):
"""
功能 : 将元祖转为str
"""
_str =''.join(per)
return _str
# 定义写字函数
def add_txt(image, size, draw_x, draw_y, txt, Font='方正黑体.ttf'):
setFont = ImageFont.truetype(ROOT + 'IDTemplate/%s' % Font, size)
draw = ImageDraw.Draw(image)
draw.text((draw_x, draw_y), txt, font=setFont, fill=(0, 0, 0))
return image
def draw_txt(ori_image, img):
mask_image_txt = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(mask_image_txt, cv2.COLOR_BGR2GRAY)
# 高斯模糊,制造边缘模糊效果哦
gray_Gaussianblur = cv2.GaussianBlur(gray, (3, 3), 0)
# 使用阈值对图片进行二值化
_, res = cv2.threshold(gray_Gaussianblur, 200, 255, cv2.THRESH_BINARY)
res_inv = cv2.bitwise_not(res)
# 写字的模板保留文字部分
img_bg = cv2.bitwise_and(mask_image_txt, mask_image_txt, mask=res_inv)
# 原图保留除文字的其他部分
img_fg = cv2.bitwise_and(ori_image, ori_image, mask=res)
# 将两张图直接进行相加,即可
final = cv2.add(img_bg, img_fg)
return final
# 照片面
def make_maskA(person, nums, template_path, output_path):
# 生成一个空白的模板mask
ori_image = cv2.imread('%s/IDA{}.jpg'.format(random.randint(1, 2)) % template_path)
mask_image = np.ones_like(ori_image)
mask_image *= 255
# print(mask_image.shape,' {}.jpg'.format(nums))
# 往空白模板上写字(只能用PIL写,OpenCV写中文会乱码)
img = Image.fromarray(cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB))
img = add_txt(img, 58, 210, 95, to_str(person.name))
img = add_txt(img, 50, 210, 205, to_str(person.sex))
img = add_txt(img, 50, 490, 205, to_str(person.national))
img = add_txt(img, 50, 210, 308, to_str(person.years))
if (int(person.month[0]) > 9):
img = add_txt(img, 50, 410, 308, to_str(person.month))
else:
img = add_txt(img, 50, 425, 308, to_str(person.month))
if (int(person.day[0]) > 9):
img = add_txt(img, 50, 535, 308, to_str(person.day))
else:
img = add_txt(img, 52, 550, 308, to_str(person.day))
img = add_txt(img, 50, 210, 415, to_str(person.address1))
img = add_txt(img, 50, 210, 485, to_str(person.address2))
img = add_txt(img, 58, 420, 682, to_str(person.id_card), 'OCR-B 10 BT.ttf')
final = draw_txt(ori_image, img)
cv2.imwrite('%s/A_{}.jpg'.format(nums) % output_path, final)
# 国徽面
def make_maskB(person, nums, template_path, output_path):
ori_image = cv2.imread('%s/IDB{}.jpg'.format(random.randint(1, 2)) % template_path)
mask_image = np.ones_like(ori_image)
mask_image *= 255
# print(mask_image.shape,' {}.jpg'.format(nums))
img = Image.fromarray(cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB))
img = add_txt(img, 48, 515, 583, to_str(person.public))
if (int(person.month[0]) > 9):
month = to_str(person.month)
else:
month = ('0' + to_str(person.month))
if (int(person.day[0]) > 9):
day = to_str(person.day)
else:
day = ('0' + to_str(person.day))
date = to_str(person.years) + '.' + month + '.' + day + '-' + str(int(person.years[0]) + random.randint(10, 30)) + '.' + month + '.' + day
img = add_txt(img, 48, 515, 688, date)
final = draw_txt(ori_image, img)
cv2.imwrite('%s/B_{}.jpg'.format(nums) % output_path, final)
if __name__ == '__main__':
ROOT = '../PaddleOCR-2.7.1/dataset/'
template_path = ROOT + 'IDTemplate' # 存放模板
output_path = ROOT + 'test' # train or test
mkdir(template_path)
mkdir(output_path)
for i in tqdm(range(0, 1000)):
person = Person(name=create_name(), sex=sex_word(), national=national_name(),
years=random.randint(1940, 2024), month=random.randint(1, 12),
day=random.randint(1, 31), address1=address_line1(), address2=address_line2(),
id_card=random_id_card(), public=public())
make_maskA(person, str(i).zfill(5), template_path, output_path)
make_maskB(person, str(i).zfill(5), template_path, output_path)