端对端文字识别训练集生成

最新推荐文章于 2023-08-28 09:36:21 发布

飞奔的猫

最新推荐文章于 2023-08-28 09:36:21 发布

阅读量745

点赞数 1

分类专栏： python 深度学习文章标签： python

本文链接：https://blog.csdn.net/jylonger/article/details/91047922

版权

python 同时被 2 个专栏收录

62 篇文章 4 订阅

订阅专栏

深度学习

4 篇文章 0 订阅

订阅专栏

借用了网上的方法修改了下，做个备份：

from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
import glob
import numpy as np
import os,re
import cv2


'''
1. 从文字库随机选择10个字符
2. 生成图片
3. 随机使用函数
'''

# 从文字库中随机选择n个字符
def sto_choice_from_info_str(quantity=10):
    start = random.randint(0, len(info_str)-11)
    end = start + 10
    random_word = info_str[start:end]

    return random_word

def random_word_color():
    font_color_choice = [[54,54,54],[54,54,54],[105,105,105]]
    font_color = random.choice(font_color_choice)

    noise = np.array([random.randint(0,10),random.randint(0,10),random.randint(0,10)])
    font_color = (np.array(font_color) + noise).tolist()

    #print('font_color：',font_color)

    return tuple(font_color)

# 生成一张图片
def create_an_image(bground_path, width, height):
    bground_list = os.listdir(bground_path)
    bground_choice = random.choice(bground_list)
    bground = Image.open(bground_path+bground_choice)
    #print('background:',bground_choice)
    # print(bground.size[0],bground.size[1])
    x, y = random.randint(0,bground.size[0]-width), random.randint(0, bground.size[1]-height)
    bground = bground.crop((x, y, x+width, y+height))

    return bground

# 选取作用函数
def random_choice_in_process_func():
    pass

# 模糊函数
def darken_func(image):
    #.SMOOTH
    #.SMOOTH_MORE
    #.GaussianBlur(radius=2 or 1)
    # .MedianFilter(size=3)
    # 随机选取模糊参数
    filter_ = random.choice(
                            [ImageFilter.SMOOTH,
                            ImageFilter.SMOOTH_MORE,
                            ImageFilter.GaussianBlur(radius=1.3)]
                            )
    image = image.filter(filter_)
    #image = img.resize((290,32))
    return image


# 随机选取文字贴合起始的坐标, 根据背景的尺寸和字体的大小选择
def random_x_y(bground_size, font_size):
    width, height = bground_size
    #print(bground_size)
    # 为防止文字溢出图片，x，y要预留宽高
    x = random.randint(0, width-font_size*10)
    y = random.randint(0, int((height-font_size)/4))
    #y = height//2
    return x, y

def random_font_size():
    font_size = random.randint(20,26)
    return font_size

def random_font(font_path):
    font_list = os.listdir(font_path)
    random_font = random.choice(font_list)

    return font_path + random_font

#旋转图片
def createfont(fontlabels,fontsize,width,height):
    #for i, fts in enumerate(fontlabels):
    angle=random.randint(0,100)
    fx=random.randint(0,1)
    fnum=random.randint(0,len(fontlabels)-1)
    if fx==0:
        angle=angle/100
    else:
        angle=-angle/100
    center=(width/2,height/2)   #中心点
    M = cv2.getRotationMatrix2D(center, angle, 1)
    rotated = cv2.warpAffine(fontlabels[fnum], M, (width+fontsize, height+fontsize), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated


def readfile(filename):
    res = []
    with open(filename, 'r',encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line=re.sub('\n','',line)
            res.append(line)
    dic = {}
    for i,p in enumerate(res):
        dic[i] = p
    return dic

#获取文字编码
def getnum(words,dic):
    pnums=''
    for i in range(len(words)):
        for p in dic:
            if words[i]==dic[p]:
                pnums=pnums+str(p)+' '
    return pnums.strip()


def main(save_path, num, file,dic):
    mw=280
    mh=150
    # 随机选取10个字符
    random_word = sto_choice_from_info_str(10)
    # 生成一张背景图片，已经剪裁好，宽高为32*280
    raw_image = create_an_image('./background/', mw, mh)
    # 随机选取字体大小
    font_size = random_font_size()
    # 随机选取字体
    font_name = random_font('./font/')
    # 随机选取字体颜色
    font_color = random_word_color()

    # 随机选取文字贴合的坐标 x,y
    draw_y = (mh-font_size)//2
    # 将文本贴到背景图片
    font = ImageFont.truetype(font_name, font_size)
    draw = ImageDraw.Draw(raw_image)
    draw.text((10, draw_y), random_word, fill=font_color, font=font)

    # 随机选取作用函数和数量作用于图片
    #random_choice_in_process_func()
    raw_image = darken_func(raw_image)
    output=createfont([np.array(raw_image)],font_size,mw-font_size,mh-font_size)
    #raw_image = raw_image.rotate(1)
    # 保存文本信息和对应图片名称
    random_word=getnum(random_word,dic)
    #with open(save_path[:-1]+'.txt', 'a+', encoding='utf-8') as file:
    file.write('' + str(num)+ '.png ' + random_word + '\n')
    outputlast = output[draw_y-1:draw_y+31,0:mw]
    cv2.imencode('.png', outputlast)[1].tofile(save_path+str(num)+'.png')

if __name__ == '__main__':
    rint = 0.1
    dic = readfile('char_std_5990.txt')
    # 处理具有工商信息语义信息的语料库，去除空格等不必要符号
    with open('info.txt', 'r', encoding='utf-8') as file:
        info_list = [part.strip().replace('\t', '') for part in file.readlines()]
        info_str = ''.join(info_list)
    # 图片标签
    file  = open('data_set/val_set.txt', 'a+', encoding='utf-8')
    file2 = open('data_set/train_set.txt', 'a+', encoding='utf-8')
    total = 400000
    for num in range(0,total):
        numname='{}_{}'.format(random.randint(10000,99999999),10000000+num)
        if num<total*rint:
            main('data_set/val_set/', numname, file,dic)
        else:
            main('data_set/train_set/', numname, file2, dic)
        if num % 1000 == 0:
            print('[%d/%d]'%(num,total))
    file.close()

效果如下：

飞奔的猫

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
端对端文字识别训练集生成

借用了网上的方法修改了下，做个备份：from PIL import Image, ImageDraw, ImageFont, ImageFilterimport randomimport globimport numpy as npimport os,reimport cv2'''1. 从文字库随机选择10个字符2. 生成图片3. 随机使用函数'''# 从文字库中...
复制链接

扫一扫