深度学习之自动生成文字定位坐标训练集

今日有空,写了下随机生成文字定位训练集,一个一个标注太费事了,用别人现成的又不好修改,还是自己动手,丰衣足食,废话不多说,直接上代码:

#conding:utf-8
import cv2,os
import numpy as np
from PIL import Image,ImageDraw,ImageFont,ImageOps
import random,math,time
import sys,codecs,json

totalnum=10     #字条
mw=450         #文本显示区域宽高
mh=450
width=60       #字体显示区域宽高
height=60
hnum=2    #同一关键词循环生成次数
zoomint=0.5    #字体缩放

#逆时针旋转
def anticlockwise_rotate(point, center, angle):
    src_x, src_y = point
    center_x, center_y = center
    radian = math.radians(angle)
    dest_x = round((src_x - center_x) * math.cos(radian) + (src_y - center_y) * math.sin(radian) + center_x)
    dest_y = round((src_y - center_y) * math.cos(radian) - (src_x - center_x) * math.sin(radian) + center_y)
    return [int(dest_x), int(dest_y)]

#顺时针旋转
def clockwise_rotate(point, center, angle):
    src_x, src_y = point
    center_x, center_y = center
    radian = math.radians(angle)
    dest_x = round((src_x - center_x) * math.cos(radian) - (src_y - center_y) * math.sin(radian) + center_x)
    dest_y = round((src_x - center_x) * math.sin(radian) + (src_y - center_y) * math.cos(radian) + center_y)
    return [int(dest_x), int(dest_y)]

#扭曲图片,文字向左右扭曲5像素
def distortion(img,value=5):
    fontlabels=[]
    fontlabels.append(img)
    #左切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[-value, 0],[value, height],[width+value, height],[width-value, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('zuo', dst)
    #右切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[value, 0],[-value, height],[width-value, height],[width+value, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('you', dst)
    #右下切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[0, -value],[0, height-value],[width, height+value],[width, value]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('dst2', dst)
    #左下切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[0, value],[0, height+value],[width, height-value],[width, -value]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    # 拉伸
    pts1 = np.float32([[0, 0], [0, height], [width, height], [width, 0]])
    pts2 = np.float32([[0, 0], [0, height - int(value/2)], [width, height+ int(value/2)], [width, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('dst2', dst)
    return fontlabels

# 随机选择一张背景图片
def create_an_image(bground_path):
    bground_list = os.listdir(bground_path)
    bground_choice_url = random.choice(bground_list)
    bground = cv2.imread(bground_path+bground_choice_url,1)
    return bground

#旋转图片
def createfont(fontlabels,fontsize):
    #for i, fts in enumerate(fontlabels):
    angle=random.randint(0,50)
    fx=random.randint(0,1)
    fnum=random.randint(0,len(fontlabels)-1)
    if fx==0:
        angle=angle
    else:
        angle=-angle
    #print(angle,width,height)
    center=(width/2,height/2)   #中心点
    M = cv2.getRotationMatrix2D(center, angle, 1)
    rotated = cv2.warpAffine(fontlabels[fnum], M, (width+fontsize, height+fontsize), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated,angle

#随机选择文字,组成词条
def get_label_dict(totalnum):
    reuslt_labels={}
    label_dict = []
    with open("char_std_5990.txt", encoding="utf-8") as file:
        for line in file:
            label_dict.append(line.strip())
    for i in range(totalnum):
        labelnum = random.randint(4, 8)
        dict_slice = random.sample(label_dict, labelnum)
        dict_slice="".join(dict_slice)
        reuslt_labels[i]=dict_slice
    return reuslt_labels

#获取旋转后四角坐标
def rotate_point(center,angle,wh):
    wh = wh / 3
    #左上角
    point_tl = (center[0] - wh-5, center[1] - wh-5)
    #右上角
    point_tr = (center[0] + wh-5, center[1] - wh-5)
    #左下角
    point_bl = (center[0] - wh-5, center[1] + wh-5)
    #右下角
    point_br = (center[0] + wh-5, center[1] + wh-5)
    if angle>=0:
        #逆时针
        point_tl_result = anticlockwise_rotate(point_tl, center, angle)
        point_tr_result = anticlockwise_rotate(point_tr, center, angle)
        point_bl_result = anticlockwise_rotate(point_bl, center, angle)
        point_br_result = anticlockwise_rotate(point_br, center, angle)

    else:
        angle=-angle
        #顺时针
        point_tl_result = clockwise_rotate(point_tl, center, angle)
        point_tr_result = clockwise_rotate(point_tr, center, angle)
        point_bl_result = clockwise_rotate(point_bl, center, angle)
        point_br_result = clockwise_rotate(point_br, center, angle)
    # points=[]
    # points.append(point_tl_result)
    # points.append(point_tr_result)
    # points.append(point_br_result)
    # points.append(point_bl_result)
    points=point_tl_result+point_tr_result+point_br_result+point_bl_result
    points=",".join(str(i) for i in points)
    return points

#程序开始执行
labels=get_label_dict(totalnum)
#字体
font_path="fonts/msyh.ttf"
#按宽度比例显示文字
font = ImageFont.truetype(font_path, int(width * zoomint))
#文件路径
path="D:/newfont"
#执行时间戳
curtime=int(time.time() * 1000)
#开始循环
for i in labels:
    # 循环次数
    for k in range(hnum):
        # path=os.path.join(dir,labels[i])
        if not os.path.isdir(path):
            os.makedirs(path)
        output = create_an_image('./img/')
        #output = cv2.cvtColor(backgroud, cv2.COLOR_BGR2RGB)
        #开始循环单个文字
        for num in range(len(labels[i])):
            img = Image.new("RGB", (width, height),'black')  # 黑色背景
            draw = ImageDraw.Draw(img)
            mcolor=(random.randint(0,180),random.randint(43,255),random.randint(46,255))
            draw.text((10, 10), labels[i][num], mcolor,font=font)
            img = np.array(img)
            #斜切文字随机数
            ranqie=random.randint(4,9)
            fontlabels=distortion(img,value=ranqie)
            #字体大小波动随机数
            fontsize=random.randint(5,8)
            rotated,angle=createfont(fontlabels,fontsize)
            mask = 255 * np.ones((height+fontsize, width+fontsize), np.uint8)
            #计算字体中心点坐标
            cx=int(mw/len(labels[i]))
            cx=cx+cx*(num)
            cy = random.randint(40, mh-40)  #上下浮动范围
            center = (cx, cy)
            #获取旋转后字的四个角坐标
            points=rotate_point(center,angle,rotated.shape[0])
            print(points)
            # pts = np.array(points, np.int32)  # 顶点集
            # pts = pts.reshape((-1, 1, 2))
            # cv2.polylines(output, [pts], True, (255, 255, 255), 1)
            #cv2.imshow('image', output)
            #融合文字
            output = cv2.seamlessClone(rotated, output, mask, center, cv2.MIXED_CLONE)
            pu2="{}_{}_{}.png".format(i,k,curtime)
            #判断文件夹
            train_images=path+"/train_images"
            if not os.path.exists(train_images):
                os.makedirs(train_images)
            train_gts=path + "/train_gts"
            if not os.path.exists(train_gts):
                os.makedirs(train_gts)
            purl1 = os.path.join(train_images, pu2)
            pu2txt = "{}.txt".format(pu2, curtime)
            purltxt=os.path.join(train_gts, pu2txt)
            file = open(purltxt, 'a+', encoding='utf-8')
            file.write('' + points + ' ' + labels[i][num] + '\n')
            #txtwrite(pu2,labels[i])
            #outputlast=output[15:mh-20,0:mw-40]
            cv2.imencode('.png', output)[1].tofile(purl1)
        purlist = os.path.join(path, "train_list.txt")
        pu2 = "{}_{}_{}.png".format(i, k, curtime)
        file = open(purlist, 'a+', encoding='utf-8')
        file.write('' + pu2 + '\n')
#cv2.waitKey(0)

字体我用的是系统的微软雅黑,文字char_std_5990.txt,我就不上传了,大约6000个文字,自己搜集下,格式如下:

效果如下:

通过模型训练后检测下识别的定位结果,感觉还不错^_^:

 

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值