python随机合成字符串图像，并将字符串坐标保存为yolo模式的样式

HowHardYouAre

已于 2022-06-08 15:53:22 修改

阅读量389

点赞数

分类专栏：图像处理文章标签： python 计算机视觉

于 2022-06-07 16:59:00 首次发布

本文链接：https://blog.csdn.net/feelingjun/article/details/125168911

版权

图像处理专栏收录该内容

24 篇文章 2 订阅

订阅专栏

import os
from cv2 import WINDOW_NORMAL
import numpy as np
import cv2 as cv
import random
import PIL
from PIL import Image, ImageFont, ImageDraw
from tqdm import tqdm

# 设置合成图像的宽和高,用于设定图像的尺寸
img_width = 1280
img_height = 960

# 待合成的字符，包括26个字母的大小写和0-9数字
words = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
         '0', '1', '2', '3', '4', '5', '6', '7', '9', '-']

# 使用opencv库在图像上绘制字符串
# opencv绘图不好掌握合成的文字实际占的位置和大小
def draw_text_cv(img, gen_word):
    # 生成随机字体
    font = cv.FONT_HERSHEY_SIMPLEX
    font_list = [cv.FONT_HERSHEY_COMPLEX, cv.FONT_HERSHEY_DUPLEX, cv.FONT_HERSHEY_PLAIN, cv.FONT_HERSHEY_SIMPLEX, cv.FONT_HERSHEY_TRIPLEX]
    random_font = font_list[random.randrange(len(font_list))]
    print('random font:', random_font)
    scale_word = 6
    cv.putText(img, gen_word, (100, 200), random_font, scale_word, (10, 1, 100), 4)
    cv.namedWindow('img', WINDOW_NORMAL)
    cv.imshow('img', img)
    cv.waitKey(0)
    return img

# 在图像上绘制字符串
def draw_text_PIL(img, gen_word):
    
    # 字体样式从列表中随机选择
    # local_font_list = [仿宋常规，
    #                    黑体常规 ，
    #                    symbol常规， 'C:\\Windows\\Fonts\\symbol.ttf',  该字体不支持
    #                    Arial Unicode MS常规 'C:\\Windows\\Fonts\\ARIALUNI.TTF',
    #                    楷体常规,
    #                    隶书常规]
    local_font_list = ['C:\\Windows\\Fonts\\simfang.ttf', 
                       'C:\\Windows\\Fonts\\simhei.ttf',
                       'C:\\Windows\\Fonts\\simkai.ttf',
                       'C:\\Windows\\Fonts\\SIMLI.TTF'
                       ]
    local_font = local_font_list[random.randrange(len(local_font_list))]
    
    # 字体尺寸从100到200，随机生成
    scale_font_list = [x for x in np.arange(100, 240)]
    random_scale_font = scale_font_list[random.randrange(len(scale_font_list))]
    font = ImageFont.truetype(local_font, random_scale_font)

    Img = Image.fromarray(img)
    # draw = ImageDraw.Draw(Image.fromarray(Img))
    draw = ImageDraw.Draw(Img)
    # 随机生成合成字符串在图像中的起点位置
    x_list = [x for x in np.arange(100, 300)]
    y_list = [y for y in np.arange(100, 600)]
    x,y=(x_list[random.randrange(len(x_list))], y_list[random.randrange(len(y_list))])
    draw.text((x, y), gen_word, fill=(0, 0, 0) ,font = font)
    # print(f'\n{word}')
    offsetx, offsety = font.getoffset(gen_word)
    width, height = font.getsize(gen_word)
    xywh = [np.int16(offsetx+x+width/2), np.int16(offsety+y+height/2), width, height]
    return Img, xywh

# 生成随机字符串
def gen_word(words):
    gen_word = ''
    # 生成字符串的长度
    word_len_list = [6, 7, 8]
    word_len_random_index = random.randrange(len(word_len_list))
    word_len = word_len_list[word_len_random_index]

    for i in range(word_len):
        # 随机生成字符串gen_word
        v = np.random.uniform()
        u = np.random.uniform()
        inde = np.uint8(v * len(words))
        gen_word += str(words[inde-1])
    
    return gen_word




if __name__ =='__main__':
    synthetic_img_num = 2000
    save_syn_img_path = '.\\img\\'
    if not os.path.exists(save_syn_img_path):
        os.makedirs(save_syn_img_path)

    for i in tqdm(np.arange(synthetic_img_num)):
    # for i in np.arange(synthetic_img_num):
        img = np.ones((img_height, img_width, 3), dtype=np.uint8)
        background_value_list = [x for x in np.arange(200, 255)]
        b_value = background_value_list[random.randrange(len(background_value_list))]
        g_value = background_value_list[random.randrange(len(background_value_list))]
        r_value = background_value_list[random.randrange(len(background_value_list))] 
        img[:, :, 0] = b_value * img[:, :, 0]
        img[:, :, 1] = g_value * img[:, :, 1] 
        img[:, :, 2] = r_value * img[:, :, 2]
        word = gen_word(words) 
        out_img, xywh = draw_text_PIL(img, word)
        if ((xywh[0] + xywh[2]/2) > out_img.size[0]) or ((xywh[1] + xywh[3]/2) > out_img.size[1]):
            continue
        data = str(0) + ' '+ str(xywh[0]/img_width) +' '+ str(xywh[1]/img_height) + ' '+ str(xywh[2]/img_width) + ' ' + str(xywh[3]/img_height) +'\n'
        # print(data)
        out_Img = np.array(out_img)   # PIL.Image类型转array类型
        synthetic_img_name = 'img_' + str(i) + '.jpg'
        synthetic_img_txt = 'img_' + str(i) + '.txt'
        cv.imwrite(save_syn_img_path + synthetic_img_name, out_Img)
        with open(save_syn_img_path + synthetic_img_txt, 'w') as f:
            f.write(data)

第二种合成方法是结合已有的图像来添加字符串

import os
from cv2 import WINDOW_NORMAL
import numpy as np
import cv2 as cv
import random
import PIL
from PIL import Image, ImageFont, ImageDraw
from tqdm import tqdm

# 设置合成图像的宽和高
img_width = 1280
img_height = 960

# 待合成的字符，包括26个字母的大小写和0-9数字
words = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
         '0', '1', '2', '3', '4', '5', '6', '7', '9', '-']

def get_img_name_by_dir(file_dir, file_property):
    file_name = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if os.path.splitext(file)[1] in file_property:
                file_name.append(file)
    return file_name

# 使用opencv库在图像上绘制字符串
# opencv绘图不好掌握合成的文字实际占的位置和大小
def draw_text_cv(img, gen_word):
    # 生成随机字体
    font = cv.FONT_HERSHEY_SIMPLEX
    font_list = [cv.FONT_HERSHEY_COMPLEX, cv.FONT_HERSHEY_DUPLEX, cv.FONT_HERSHEY_PLAIN, cv.FONT_HERSHEY_SIMPLEX, cv.FONT_HERSHEY_TRIPLEX]
    random_font = font_list[random.randrange(len(font_list))]
    print('random font:', random_font)
    scale_word = 6
    cv.putText(img, gen_word, (100, 200), random_font, scale_word, (10, 1, 100), 4)
    cv.namedWindow('img', WINDOW_NORMAL)
    cv.imshow('img', img)
    cv.waitKey(0)
    return img

# 在图像上绘制字符串
def draw_text_PIL(img, gen_word):
    
    # 字体样式从列表中随机选择
    # local_font_list = [仿宋常规，
    #                    黑体常规 ，
    #                    symbol常规， 'C:\\Windows\\Fonts\\symbol.ttf',  该字体不支持
    #                    Arial Unicode MS常规 'C:\\Windows\\Fonts\\ARIALUNI.TTF',
    #                    楷体常规,
    #                    隶书常规]
    local_font_list = ['C:\\Windows\\Fonts\\simfang.ttf', 
                       'C:\\Windows\\Fonts\\simhei.ttf',
                       'C:\\Windows\\Fonts\\simkai.ttf',
                       'C:\\Windows\\Fonts\\SIMLI.TTF'
                       ]
    local_font = local_font_list[random.randrange(len(local_font_list))]
    
    # 字体尺寸从100到200，随机生成
    scale_font_list = [x for x in np.arange(100, 240)]
    random_scale_font = scale_font_list[random.randrange(len(scale_font_list))]
    font = ImageFont.truetype(local_font, random_scale_font)
    
    # Img = Image.fromarray(img)
    Img = img
    # draw = ImageDraw.Draw(Image.fromarray(Img))
    draw = ImageDraw.Draw(Img)
    # 随机生成合成字符串在图像中的起点位置
    x_list = [x for x in np.arange(100, 300)]
    y_list = [y for y in np.arange(100, 600)]
    x,y=(x_list[random.randrange(len(x_list))], y_list[random.randrange(len(y_list))])
    draw.text((x, y), gen_word, fill=(0, 0, 0) ,font = font)
    offsetx, offsety = font.getoffset(gen_word)
    width, height = font.getsize(gen_word)
    xywh = [np.int16(offsetx+x+width/2), np.int16(offsety+y+height/2), width, height]
    # draw.rectangle(xy=(offsetx+x, offsety+y, offsetx+x+width, offsety+y+height),fill=None, outline=(0, 0, 255))
    # Img.show()
    return Img, xywh

# 生成随机字符串
def gen_word(words):
    gen_word = ''
    # 生成字符串的长度
    word_len_list = [6, 7, 8]
    word_len_random_index = random.randrange(len(word_len_list))
    word_len = word_len_list[word_len_random_index]

    for i in range(word_len):
        # 随机生成字符串gen_word
        v = np.random.uniform()
        u = np.random.uniform()
        inde = np.uint8(v * len(words))
        gen_word += str(words[inde-1])
    
    return gen_word




if __name__ =='__main__':
    synthetic_img_num = 5000
    save_syn_img_path = '.\\img1\\'
    if not os.path.exists(save_syn_img_path):
        os.makedirs(save_syn_img_path)
    file_property = ['.jpg']
    ori_img_path= '.\\oriImg\\'
    files = get_img_name_by_dir(ori_img_path, file_property)
    for file in tqdm(files):
        for i in np.arange(5):
            img_name = ori_img_path + file
            img = Image.open(img_name)
            img = img.transpose(2)
            # img = img.rotate(90)
            # img.show()
            word = gen_word(words) 
            out_img, xywh = draw_text_PIL(img, word)
            draw = ImageDraw.Draw(out_img)
            draw.rectangle(xy=(xywh[0]-xywh[2]/2, xywh[1]-xywh[3]/2, xywh[0]+xywh[2]/2, xywh[1]-xywh[3]/2),fill=None, outline=(0, 0, 255))
            # print(f'img width:{out_img.size[0]}\n img_height:{out_img.size[1]} ')
            if ((xywh[0] + xywh[2]/2) > out_img.size[0]) or ((xywh[1] + xywh[3]/2) > out_img.size[1]):
                continue
            data = str(0) + ' '+ str(xywh[0]/out_img.size[0]) +' '+ str(xywh[1]/out_img.size[1]) + ' '+ str(xywh[2]/out_img.size[0]) + ' ' + str(xywh[3]/out_img.size[1]) +'\n'
            # print(data)
            out_Img = np.array(out_img)   # PIL.Image类型转array类型
            out_Img = cv.cvtColor(out_Img,cv.COLOR_RGB2BGR)
            synthetic_img_name = file[:-4] + '_' + str(i) + '.jpg'
            synthetic_img_txt = file[:-4] + '_' + str(i) + '.txt'
            cv.imwrite(save_syn_img_path + synthetic_img_name, out_Img)
            with open(save_syn_img_path + synthetic_img_txt, 'w') as f:
                f.write(data)