import os
from cv2 import WINDOW_NORMAL
import numpy as np
import cv2 as cv
import random
import PIL
from PIL import Image, ImageFont, ImageDraw
from tqdm import tqdm
# 设置合成图像的宽和高,用于设定图像的尺寸
img_width = 1280
img_height = 960
# 待合成的字符,包括26个字母的大小写和0-9数字
words = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '9', '-']
# 使用opencv库在图像上绘制字符串
# opencv绘图不好掌握合成的文字实际占的位置和大小
def draw_text_cv(img, gen_word):
# 生成随机字体
font = cv.FONT_HERSHEY_SIMPLEX
font_list = [cv.FONT_HERSHEY_COMPLEX, cv.FONT_HERSHEY_DUPLEX, cv.FONT_HERSHEY_PLAIN, cv.FONT_HERSHEY_SIMPLEX, cv.FONT_HERSHEY_TRIPLEX]
random_font = font_list[random.randrange(len(font_list))]
print('random font:', random_font)
scale_word = 6
cv.putText(img, gen_word, (100, 200), random_font, scale_word, (10, 1, 100), 4)
cv.namedWindow('img', WINDOW_NORMAL)
cv.imshow('img', img)
cv.waitKey(0)
return img
# 在图像上绘制字符串
def draw_text_PIL(img, gen_word):
# 字体样式从列表中随机选择
# local_font_list = [仿宋常规,
# 黑体常规 ,
# symbol常规, 'C:\\Windows\\Fonts\\symbol.ttf', 该字体不支持
# Arial Unicode MS常规 'C:\\Windows\\Fonts\\ARIALUNI.TTF',
# 楷体常规,
# 隶书常规]
local_font_list = ['C:\\Windows\\Fonts\\simfang.ttf',
'C:\\Windows\\Fonts\\simhei.ttf',
'C:\\Windows\\Fonts\\simkai.ttf',
'C:\\Windows\\Fonts\\SIMLI.TTF'
]
local_font = local_font_list[random.randrange(len(local_font_list))]
# 字体尺寸从100到200,随机生成
scale_font_list = [x for x in np.arange(100, 240)]
random_scale_font = scale_font_list[random.randrange(len(scale_font_list))]
font = ImageFont.truetype(local_font, random_scale_font)
Img = Image.fromarray(img)
# draw = ImageDraw.Draw(Image.fromarray(Img))
draw = ImageDraw.Draw(Img)
# 随机生成合成字符串在图像中的起点位置
x_list = [x for x in np.arange(100, 300)]
y_list = [y for y in np.arange(100, 600)]
x,y=(x_list[random.randrange(len(x_list))], y_list[random.randrange(len(y_list))])
draw.text((x, y), gen_word, fill=(0, 0, 0) ,font = font)
# print(f'\n{word}')
offsetx, offsety = font.getoffset(gen_word)
width, height = font.getsize(gen_word)
xywh = [np.int16(offsetx+x+width/2), np.int16(offsety+y+height/2), width, height]
return Img, xywh
# 生成随机字符串
def gen_word(words):
gen_word = ''
# 生成字符串的长度
word_len_list = [6, 7, 8]
word_len_random_index = random.randrange(len(word_len_list))
word_len = word_len_list[word_len_random_index]
for i in range(word_len):
# 随机生成字符串gen_word
v = np.random.uniform()
u = np.random.uniform()
inde = np.uint8(v * len(words))
gen_word += str(words[inde-1])
return gen_word
if __name__ =='__main__':
synthetic_img_num = 2000
save_syn_img_path = '.\\img\\'
if not os.path.exists(save_syn_img_path):
os.makedirs(save_syn_img_path)
for i in tqdm(np.arange(synthetic_img_num)):
# for i in np.arange(synthetic_img_num):
img = np.ones((img_height, img_width, 3), dtype=np.uint8)
background_value_list = [x for x in np.arange(200, 255)]
b_value = background_value_list[random.randrange(len(background_value_list))]
g_value = background_value_list[random.randrange(len(background_value_list))]
r_value = background_value_list[random.randrange(len(background_value_list))]
img[:, :, 0] = b_value * img[:, :, 0]
img[:, :, 1] = g_value * img[:, :, 1]
img[:, :, 2] = r_value * img[:, :, 2]
word = gen_word(words)
out_img, xywh = draw_text_PIL(img, word)
if ((xywh[0] + xywh[2]/2) > out_img.size[0]) or ((xywh[1] + xywh[3]/2) > out_img.size[1]):
continue
data = str(0) + ' '+ str(xywh[0]/img_width) +' '+ str(xywh[1]/img_height) + ' '+ str(xywh[2]/img_width) + ' ' + str(xywh[3]/img_height) +'\n'
# print(data)
out_Img = np.array(out_img) # PIL.Image类型转array类型
synthetic_img_name = 'img_' + str(i) + '.jpg'
synthetic_img_txt = 'img_' + str(i) + '.txt'
cv.imwrite(save_syn_img_path + synthetic_img_name, out_Img)
with open(save_syn_img_path + synthetic_img_txt, 'w') as f:
f.write(data)
第二种合成方法是结合已有的图像来添加字符串
import os
from cv2 import WINDOW_NORMAL
import numpy as np
import cv2 as cv
import random
import PIL
from PIL import Image, ImageFont, ImageDraw
from tqdm import tqdm
# 设置合成图像的宽和高
img_width = 1280
img_height = 960
# 待合成的字符,包括26个字母的大小写和0-9数字
words = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '9', '-']
def get_img_name_by_dir(file_dir, file_property):
file_name = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] in file_property:
file_name.append(file)
return file_name
# 使用opencv库在图像上绘制字符串
# opencv绘图不好掌握合成的文字实际占的位置和大小
def draw_text_cv(img, gen_word):
# 生成随机字体
font = cv.FONT_HERSHEY_SIMPLEX
font_list = [cv.FONT_HERSHEY_COMPLEX, cv.FONT_HERSHEY_DUPLEX, cv.FONT_HERSHEY_PLAIN, cv.FONT_HERSHEY_SIMPLEX, cv.FONT_HERSHEY_TRIPLEX]
random_font = font_list[random.randrange(len(font_list))]
print('random font:', random_font)
scale_word = 6
cv.putText(img, gen_word, (100, 200), random_font, scale_word, (10, 1, 100), 4)
cv.namedWindow('img', WINDOW_NORMAL)
cv.imshow('img', img)
cv.waitKey(0)
return img
# 在图像上绘制字符串
def draw_text_PIL(img, gen_word):
# 字体样式从列表中随机选择
# local_font_list = [仿宋常规,
# 黑体常规 ,
# symbol常规, 'C:\\Windows\\Fonts\\symbol.ttf', 该字体不支持
# Arial Unicode MS常规 'C:\\Windows\\Fonts\\ARIALUNI.TTF',
# 楷体常规,
# 隶书常规]
local_font_list = ['C:\\Windows\\Fonts\\simfang.ttf',
'C:\\Windows\\Fonts\\simhei.ttf',
'C:\\Windows\\Fonts\\simkai.ttf',
'C:\\Windows\\Fonts\\SIMLI.TTF'
]
local_font = local_font_list[random.randrange(len(local_font_list))]
# 字体尺寸从100到200,随机生成
scale_font_list = [x for x in np.arange(100, 240)]
random_scale_font = scale_font_list[random.randrange(len(scale_font_list))]
font = ImageFont.truetype(local_font, random_scale_font)
# Img = Image.fromarray(img)
Img = img
# draw = ImageDraw.Draw(Image.fromarray(Img))
draw = ImageDraw.Draw(Img)
# 随机生成合成字符串在图像中的起点位置
x_list = [x for x in np.arange(100, 300)]
y_list = [y for y in np.arange(100, 600)]
x,y=(x_list[random.randrange(len(x_list))], y_list[random.randrange(len(y_list))])
draw.text((x, y), gen_word, fill=(0, 0, 0) ,font = font)
offsetx, offsety = font.getoffset(gen_word)
width, height = font.getsize(gen_word)
xywh = [np.int16(offsetx+x+width/2), np.int16(offsety+y+height/2), width, height]
# draw.rectangle(xy=(offsetx+x, offsety+y, offsetx+x+width, offsety+y+height),fill=None, outline=(0, 0, 255))
# Img.show()
return Img, xywh
# 生成随机字符串
def gen_word(words):
gen_word = ''
# 生成字符串的长度
word_len_list = [6, 7, 8]
word_len_random_index = random.randrange(len(word_len_list))
word_len = word_len_list[word_len_random_index]
for i in range(word_len):
# 随机生成字符串gen_word
v = np.random.uniform()
u = np.random.uniform()
inde = np.uint8(v * len(words))
gen_word += str(words[inde-1])
return gen_word
if __name__ =='__main__':
synthetic_img_num = 5000
save_syn_img_path = '.\\img1\\'
if not os.path.exists(save_syn_img_path):
os.makedirs(save_syn_img_path)
file_property = ['.jpg']
ori_img_path= '.\\oriImg\\'
files = get_img_name_by_dir(ori_img_path, file_property)
for file in tqdm(files):
for i in np.arange(5):
img_name = ori_img_path + file
img = Image.open(img_name)
img = img.transpose(2)
# img = img.rotate(90)
# img.show()
word = gen_word(words)
out_img, xywh = draw_text_PIL(img, word)
draw = ImageDraw.Draw(out_img)
draw.rectangle(xy=(xywh[0]-xywh[2]/2, xywh[1]-xywh[3]/2, xywh[0]+xywh[2]/2, xywh[1]-xywh[3]/2),fill=None, outline=(0, 0, 255))
# print(f'img width:{out_img.size[0]}\n img_height:{out_img.size[1]} ')
if ((xywh[0] + xywh[2]/2) > out_img.size[0]) or ((xywh[1] + xywh[3]/2) > out_img.size[1]):
continue
data = str(0) + ' '+ str(xywh[0]/out_img.size[0]) +' '+ str(xywh[1]/out_img.size[1]) + ' '+ str(xywh[2]/out_img.size[0]) + ' ' + str(xywh[3]/out_img.size[1]) +'\n'
# print(data)
out_Img = np.array(out_img) # PIL.Image类型转array类型
out_Img = cv.cvtColor(out_Img,cv.COLOR_RGB2BGR)
synthetic_img_name = file[:-4] + '_' + str(i) + '.jpg'
synthetic_img_txt = file[:-4] + '_' + str(i) + '.txt'
cv.imwrite(save_syn_img_path + synthetic_img_name, out_Img)
with open(save_syn_img_path + synthetic_img_txt, 'w') as f:
f.write(data)