今日有空,写了下随机生成文字定位训练集,一个一个标注太费事了,用别人现成的又不好修改,还是自己动手,丰衣足食,废话不多说,直接上代码:
#conding:utf-8
import cv2,os
import numpy as np
from PIL import Image,ImageDraw,ImageFont,ImageOps
import random,math,time
import sys,codecs,json
totalnum=10 #字条
mw=450 #文本显示区域宽高
mh=450
width=60 #字体显示区域宽高
height=60
hnum=2 #同一关键词循环生成次数
zoomint=0.5 #字体缩放
#逆时针旋转
def anticlockwise_rotate(point, center, angle):
src_x, src_y = point
center_x, center_y = center
radian = math.radians(angle)
dest_x = round((src_x - center_x) * math.cos(radian) + (src_y - center_y) * math.sin(radian) + center_x)
dest_y = round((src_y - center_y) * math.cos(radian) - (src_x - center_x) * math.sin(radian) + center_y)
return [int(dest_x), int(dest_y)]
#顺时针旋转
def clockwise_rotate(point, center, angle):
src_x, src_y = point
center_x, center_y = center
radian = math.radians(angle)
dest_x = round((src_x - center_x) * math.cos(radian) - (src_y - center_y) * math.sin(radian) + center_x)
dest_y = round((src_x - center_x) * math.sin(radian) + (src_y - center_y) * math.cos(radian) + center_y)
return [int(dest_x), int(dest_y)]
#扭曲图片,文字向左右扭曲5像素
def distortion(img,value=5):
fontlabels=[]
fontlabels.append(img)
#左切
pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
pts2 = np.float32([[-value, 0],[value, height],[width+value, height],[width-value, 0]])
warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, warp_mat, (width, height))
fontlabels.append(dst)
#cv2.imshow('zuo', dst)
#右切
pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
pts2 = np.float32([[value, 0],[-value, height],[width-value, height],[width+value, 0]])
warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, warp_mat, (width, height))
fontlabels.append(dst)
#cv2.imshow('you', dst)
#右下切
pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
pts2 = np.float32([[0, -value],[0, height-value],[width, height+value],[width, value]])
warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, warp_mat, (width, height))
fontlabels.append(dst)
#cv2.imshow('dst2', dst)
#左下切
pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
pts2 = np.float32([[0, value],[0, height+value],[width, height-value],[width, -value]])
warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, warp_mat, (width, height))
fontlabels.append(dst)
# 拉伸
pts1 = np.float32([[0, 0], [0, height], [width, height], [width, 0]])
pts2 = np.float32([[0, 0], [0, height - int(value/2)], [width, height+ int(value/2)], [width, 0]])
warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, warp_mat, (width, height))
fontlabels.append(dst)
#cv2.imshow('dst2', dst)
return fontlabels
# 随机选择一张背景图片
def create_an_image(bground_path):
bground_list = os.listdir(bground_path)
bground_choice_url = random.choice(bground_list)
bground = cv2.imread(bground_path+bground_choice_url,1)
return bground
#旋转图片
def createfont(fontlabels,fontsize):
#for i, fts in enumerate(fontlabels):
angle=random.randint(0,50)
fx=random.randint(0,1)
fnum=random.randint(0,len(fontlabels)-1)
if fx==0:
angle=angle
else:
angle=-angle
#print(angle,width,height)
center=(width/2,height/2) #中心点
M = cv2.getRotationMatrix2D(center, angle, 1)
rotated = cv2.warpAffine(fontlabels[fnum], M, (width+fontsize, height+fontsize), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated,angle
#随机选择文字,组成词条
def get_label_dict(totalnum):
reuslt_labels={}
label_dict = []
with open("char_std_5990.txt", encoding="utf-8") as file:
for line in file:
label_dict.append(line.strip())
for i in range(totalnum):
labelnum = random.randint(4, 8)
dict_slice = random.sample(label_dict, labelnum)
dict_slice="".join(dict_slice)
reuslt_labels[i]=dict_slice
return reuslt_labels
#获取旋转后四角坐标
def rotate_point(center,angle,wh):
wh = wh / 3
#左上角
point_tl = (center[0] - wh-5, center[1] - wh-5)
#右上角
point_tr = (center[0] + wh-5, center[1] - wh-5)
#左下角
point_bl = (center[0] - wh-5, center[1] + wh-5)
#右下角
point_br = (center[0] + wh-5, center[1] + wh-5)
if angle>=0:
#逆时针
point_tl_result = anticlockwise_rotate(point_tl, center, angle)
point_tr_result = anticlockwise_rotate(point_tr, center, angle)
point_bl_result = anticlockwise_rotate(point_bl, center, angle)
point_br_result = anticlockwise_rotate(point_br, center, angle)
else:
angle=-angle
#顺时针
point_tl_result = clockwise_rotate(point_tl, center, angle)
point_tr_result = clockwise_rotate(point_tr, center, angle)
point_bl_result = clockwise_rotate(point_bl, center, angle)
point_br_result = clockwise_rotate(point_br, center, angle)
# points=[]
# points.append(point_tl_result)
# points.append(point_tr_result)
# points.append(point_br_result)
# points.append(point_bl_result)
points=point_tl_result+point_tr_result+point_br_result+point_bl_result
points=",".join(str(i) for i in points)
return points
#程序开始执行
labels=get_label_dict(totalnum)
#字体
font_path="fonts/msyh.ttf"
#按宽度比例显示文字
font = ImageFont.truetype(font_path, int(width * zoomint))
#文件路径
path="D:/newfont"
#执行时间戳
curtime=int(time.time() * 1000)
#开始循环
for i in labels:
# 循环次数
for k in range(hnum):
# path=os.path.join(dir,labels[i])
if not os.path.isdir(path):
os.makedirs(path)
output = create_an_image('./img/')
#output = cv2.cvtColor(backgroud, cv2.COLOR_BGR2RGB)
#开始循环单个文字
for num in range(len(labels[i])):
img = Image.new("RGB", (width, height),'black') # 黑色背景
draw = ImageDraw.Draw(img)
mcolor=(random.randint(0,180),random.randint(43,255),random.randint(46,255))
draw.text((10, 10), labels[i][num], mcolor,font=font)
img = np.array(img)
#斜切文字随机数
ranqie=random.randint(4,9)
fontlabels=distortion(img,value=ranqie)
#字体大小波动随机数
fontsize=random.randint(5,8)
rotated,angle=createfont(fontlabels,fontsize)
mask = 255 * np.ones((height+fontsize, width+fontsize), np.uint8)
#计算字体中心点坐标
cx=int(mw/len(labels[i]))
cx=cx+cx*(num)
cy = random.randint(40, mh-40) #上下浮动范围
center = (cx, cy)
#获取旋转后字的四个角坐标
points=rotate_point(center,angle,rotated.shape[0])
print(points)
# pts = np.array(points, np.int32) # 顶点集
# pts = pts.reshape((-1, 1, 2))
# cv2.polylines(output, [pts], True, (255, 255, 255), 1)
#cv2.imshow('image', output)
#融合文字
output = cv2.seamlessClone(rotated, output, mask, center, cv2.MIXED_CLONE)
pu2="{}_{}_{}.png".format(i,k,curtime)
#判断文件夹
train_images=path+"/train_images"
if not os.path.exists(train_images):
os.makedirs(train_images)
train_gts=path + "/train_gts"
if not os.path.exists(train_gts):
os.makedirs(train_gts)
purl1 = os.path.join(train_images, pu2)
pu2txt = "{}.txt".format(pu2, curtime)
purltxt=os.path.join(train_gts, pu2txt)
file = open(purltxt, 'a+', encoding='utf-8')
file.write('' + points + ' ' + labels[i][num] + '\n')
#txtwrite(pu2,labels[i])
#outputlast=output[15:mh-20,0:mw-40]
cv2.imencode('.png', output)[1].tofile(purl1)
purlist = os.path.join(path, "train_list.txt")
pu2 = "{}_{}_{}.png".format(i, k, curtime)
file = open(purlist, 'a+', encoding='utf-8')
file.write('' + pu2 + '\n')
#cv2.waitKey(0)
字体我用的是系统的微软雅黑,文字char_std_5990.txt,我就不上传了,大约6000个文字,自己搜集下,格式如下:
效果如下:
通过模型训练后检测下识别的定位结果,感觉还不错^_^: