深度学习之自动生成文字定位坐标训练集

最新推荐文章于 2023-01-03 18:45:00 发布

飞奔的猫

最新推荐文章于 2023-01-03 18:45:00 发布

阅读量1.1k

点赞数 1

分类专栏： python 深度学习文章标签： python 深度学习

本文链接：https://blog.csdn.net/jylonger/article/details/105712516

版权

python 同时被 2 个专栏收录

62 篇文章 4 订阅

订阅专栏

深度学习

4 篇文章 0 订阅

订阅专栏

今日有空，写了下随机生成文字定位训练集，一个一个标注太费事了，用别人现成的又不好修改，还是自己动手，丰衣足食，废话不多说，直接上代码：

#conding:utf-8
import cv2,os
import numpy as np
from PIL import Image,ImageDraw,ImageFont,ImageOps
import random,math,time
import sys,codecs,json

totalnum=10     #字条
mw=450         #文本显示区域宽高
mh=450
width=60       #字体显示区域宽高
height=60
hnum=2    #同一关键词循环生成次数
zoomint=0.5    #字体缩放

#逆时针旋转
def anticlockwise_rotate(point, center, angle):
    src_x, src_y = point
    center_x, center_y = center
    radian = math.radians(angle)
    dest_x = round((src_x - center_x) * math.cos(radian) + (src_y - center_y) * math.sin(radian) + center_x)
    dest_y = round((src_y - center_y) * math.cos(radian) - (src_x - center_x) * math.sin(radian) + center_y)
    return [int(dest_x), int(dest_y)]

#顺时针旋转
def clockwise_rotate(point, center, angle):
    src_x, src_y = point
    center_x, center_y = center
    radian = math.radians(angle)
    dest_x = round((src_x - center_x) * math.cos(radian) - (src_y - center_y) * math.sin(radian) + center_x)
    dest_y = round((src_x - center_x) * math.sin(radian) + (src_y - center_y) * math.cos(radian) + center_y)
    return [int(dest_x), int(dest_y)]

#扭曲图片，文字向左右扭曲5像素
def distortion(img,value=5):
    fontlabels=[]
    fontlabels.append(img)
    #左切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[-value, 0],[value, height],[width+value, height],[width-value, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('zuo', dst)
    #右切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[value, 0],[-value, height],[width-value, height],[width+value, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('you', dst)
    #右下切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[0, -value],[0, height-value],[width, height+value],[width, value]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('dst2', dst)
    #左下切
    pts1 = np.float32([[0, 0],[0, height],[width, height],[width, 0]])
    pts2 = np.float32([[0, value],[0, height+value],[width, height-value],[width, -value]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    # 拉伸
    pts1 = np.float32([[0, 0], [0, height], [width, height], [width, 0]])
    pts2 = np.float32([[0, 0], [0, height - int(value/2)], [width, height+ int(value/2)], [width, 0]])
    warp_mat = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img, warp_mat, (width, height))
    fontlabels.append(dst)
    #cv2.imshow('dst2', dst)
    return fontlabels

# 随机选择一张背景图片
def create_an_image(bground_path):
    bground_list = os.listdir(bground_path)
    bground_choice_url = random.choice(bground_list)
    bground = cv2.imread(bground_path+bground_choice_url,1)
    return bground

#旋转图片
def createfont(fontlabels,fontsize):
    #for i, fts in enumerate(fontlabels):
    angle=random.randint(0,50)
    fx=random.randint(0,1)
    fnum=random.randint(0,len(fontlabels)-1)
    if fx==0:
        angle=angle
    else:
        angle=-angle
    #print(angle,width,height)
    center=(width/2,height/2)   #中心点
    M = cv2.getRotationMatrix2D(center, angle, 1)
    rotated = cv2.warpAffine(fontlabels[fnum], M, (width+fontsize, height+fontsize), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated,angle

#随机选择文字，组成词条
def get_label_dict(totalnum):
    reuslt_labels={}
    label_dict = []
    with open("char_std_5990.txt", encoding="utf-8") as file:
        for line in file:
            label_dict.append(line.strip())
    for i in range(totalnum):
        labelnum = random.randint(4, 8)
        dict_slice = random.sample(label_dict, labelnum)
        dict_slice="".join(dict_slice)
        reuslt_labels[i]=dict_slice
    return reuslt_labels

#获取旋转后四角坐标
def rotate_point(center,angle,wh):
    wh = wh / 3
    #左上角
    point_tl = (center[0] - wh-5, center[1] - wh-5)
    #右上角
    point_tr = (center[0] + wh-5, center[1] - wh-5)
    #左下角
    point_bl = (center[0] - wh-5, center[1] + wh-5)
    #右下角
    point_br = (center[0] + wh-5, center[1] + wh-5)
    if angle>=0:
        #逆时针
        point_tl_result = anticlockwise_rotate(point_tl, center, angle)
        point_tr_result = anticlockwise_rotate(point_tr, center, angle)
        point_bl_result = anticlockwise_rotate(point_bl, center, angle)
        point_br_result = anticlockwise_rotate(point_br, center, angle)

    else:
        angle=-angle
        #顺时针
        point_tl_result = clockwise_rotate(point_tl, center, angle)
        point_tr_result = clockwise_rotate(point_tr, center, angle)
        point_bl_result = clockwise_rotate(point_bl, center, angle)
        point_br_result = clockwise_rotate(point_br, center, angle)
    # points=[]
    # points.append(point_tl_result)
    # points.append(point_tr_result)
    # points.append(point_br_result)
    # points.append(point_bl_result)
    points=point_tl_result+point_tr_result+point_br_result+point_bl_result
    points=",".join(str(i) for i in points)
    return points

#程序开始执行
labels=get_label_dict(totalnum)
#字体
font_path="fonts/msyh.ttf"
#按宽度比例显示文字
font = ImageFont.truetype(font_path, int(width * zoomint))
#文件路径
path="D:/newfont"
#执行时间戳
curtime=int(time.time() * 1000)
#开始循环
for i in labels:
    # 循环次数
    for k in range(hnum):
        # path=os.path.join(dir,labels[i])
        if not os.path.isdir(path):
            os.makedirs(path)
        output = create_an_image('./img/')
        #output = cv2.cvtColor(backgroud, cv2.COLOR_BGR2RGB)
        #开始循环单个文字
        for num in range(len(labels[i])):
            img = Image.new("RGB", (width, height),'black')  # 黑色背景
            draw = ImageDraw.Draw(img)
            mcolor=(random.randint(0,180),random.randint(43,255),random.randint(46,255))
            draw.text((10, 10), labels[i][num], mcolor,font=font)
            img = np.array(img)
            #斜切文字随机数
            ranqie=random.randint(4,9)
            fontlabels=distortion(img,value=ranqie)
            #字体大小波动随机数
            fontsize=random.randint(5,8)
            rotated,angle=createfont(fontlabels,fontsize)
            mask = 255 * np.ones((height+fontsize, width+fontsize), np.uint8)
            #计算字体中心点坐标
            cx=int(mw/len(labels[i]))
            cx=cx+cx*(num)
            cy = random.randint(40, mh-40)  #上下浮动范围
            center = (cx, cy)
            #获取旋转后字的四个角坐标
            points=rotate_point(center,angle,rotated.shape[0])
            print(points)
            # pts = np.array(points, np.int32)  # 顶点集
            # pts = pts.reshape((-1, 1, 2))
            # cv2.polylines(output, [pts], True, (255, 255, 255), 1)
            #cv2.imshow('image', output)
            #融合文字
            output = cv2.seamlessClone(rotated, output, mask, center, cv2.MIXED_CLONE)
            pu2="{}_{}_{}.png".format(i,k,curtime)
            #判断文件夹
            train_images=path+"/train_images"
            if not os.path.exists(train_images):
                os.makedirs(train_images)
            train_gts=path + "/train_gts"
            if not os.path.exists(train_gts):
                os.makedirs(train_gts)
            purl1 = os.path.join(train_images, pu2)
            pu2txt = "{}.txt".format(pu2, curtime)
            purltxt=os.path.join(train_gts, pu2txt)
            file = open(purltxt, 'a+', encoding='utf-8')
            file.write('' + points + ' ' + labels[i][num] + '\n')
            #txtwrite(pu2,labels[i])
            #outputlast=output[15:mh-20,0:mw-40]
            cv2.imencode('.png', output)[1].tofile(purl1)
        purlist = os.path.join(path, "train_list.txt")
        pu2 = "{}_{}_{}.png".format(i, k, curtime)
        file = open(purlist, 'a+', encoding='utf-8')
        file.write('' + pu2 + '\n')
#cv2.waitKey(0)

字体我用的是系统的微软雅黑，文字char_std_5990.txt，我就不上传了，大约6000个文字，自己搜集下，格式如下：

效果如下：

通过模型训练后检测下识别的定位结果，感觉还不错^_^:

飞奔的猫

关注

1
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
深度学习之自动生成文字定位坐标训练集

今日有空，写了下随机生成文字定位训练集，一个一个标注太费事了，用别人现成的又不好修改，还是自己动手，丰衣足食，废话不多说，直接上代码：#conding:utf-8import cv2,osimport numpy as npfrom PIL import Image,ImageDraw,ImageFont,ImageOpsimport random,math,timeimport ...
复制链接

扫一扫

专栏目录