本次代码开发调试基于简化版的CycleGAN,相关代码说明见文档:
CycleGAN二次解读_benben044的博客-CSDN博客
一、直接使用高仿车牌生成车牌
高仿车牌生成的代码见:python+opencv生成较真实的车牌号码图片_benben044的博客-CSDN博客
从网上下载了一批真实的质量较高的车牌,每张车牌都有车牌号码的文字信息,如下图所示:
首先,根据上图中的车牌号码生成高仿的车牌,作为训练集的A目录。
然后,将真实车牌作为训练集的B目录。
我们将上述训练集通过cycleGAN进行训练,200次迭代后,效果依然不佳。
效果大致如下:
然后将迭代次数提高到400次,以及增加ResnetBlock的个数,效果依然不佳。
所以提出了疑问:
1、cycleGAN的示例中的图片(比如苹果<->橘子)是不涉及文字的,那么这种涉及文字的是否可行?
2、自己训练集的图片是矩形的,而代码中会resize为正方形,这块是否有影响。
带着这两个疑问,我们重新设计了训练集进行测试。
二、最简单的带文字的GAN试验
cycleGAN源码中图片最后会被resize到256*256,所以这次我们直接生成256*256的图片。
源图片和目标图片样式分别见下面两张图:
生成代码:
#!/usr/bin/env python
#coding=utf-8
import cv2
import os
import numpy as np
from PIL import ImageDraw, ImageFont, Image
char_province_list = ["京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖","闽","赣","鲁","豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新"]
char_alphbet_list = ["0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"]
def generate_license_plate_number():
"""
随机生成车牌号码
:return:
"""
lp_list = []
for province in char_province_list:
for _ in range(100):
lp_str = province
number_list = np.random.choice(char_alphbet_list, 6)
lp_str += "".join(number_list)
lp_list.append(lp_str)
return lp_list
def write_chinese(img, font_type, font_size,color, position, content):
# 图像从OpenCV格式转换成PIL格式
img_PIL = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# 字体 字体*.ttc的存放路径一般是: /usr/share/fonts/opentype/noto/ 查找指令locate *.ttc
font = ImageFont.truetype(font_type, font_size)
# 字体颜色
# 文字输出位置
# 输出内容
draw = ImageDraw.Draw(img_PIL)
draw.text(position, content, font=font, fill=color)
# 转换回OpenCV格式
img_OpenCV = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
return img_OpenCV
def generate_plate_rectangle(plate_str):
"""
绘制矩形边框
:param plate_str:
:return:
"""
width = 256
height = 256
img = np.ones((height, width, 3), dtype=np.uint8)
img *= 255 # white background
cv2.rectangle(img, (20, 20), (236, 236), (255, 0, 0), 4)
img = write_chinese(img, 'font/SimHei.ttf', 42, (0, 0, 0), (50, 100), plate_str)
return img
def generate_plate_filled_rectangle(plate_str):
width = 256
height = 256
img = np.ones((height, width, 3), dtype=np.uint8)
img *= 255 # white background
cv2.rectangle(img, (20, 20), (236, 236), (255, 0, 0), -1)
img = write_chinese(img, 'font/SimHei.ttf', 42, (0, 0, 0), (50, 100), plate_str)
return img
def generate_plate_tuple(plate_str, index):
dir_path = "D:\\workspace\\ncz-python-algo\\com\\ncz\\algo\\license-plate-generator\\simple_images"
path_A = os.path.join(dir_path, "A", str(index) + "_" + plate_str + ".png")
path_B = os.path.join(dir_path, "B", str(index) + "_" + plate_str + ".png")
img1 = generate_plate_rectangle(plate_str)
img2 = generate_plate_filled_rectangle(plate_str)
cv2.imencode('.png', img1)[1].tofile(path_A)
cv2.imencode('.png', img2)[1].tofile(path_B)
# # 生成个别图片
# license_plate_str = '浙A5B5T3'
# generate_plate_tuple(license_plate_str, 1)
# 随机生成一堆图片
license_plate_list = generate_license_plate_number()
for index, license_plate in enumerate(license_plate_list):
print(index + 1, license_plate)
generate_plate_tuple(license_plate, index + 1)
经过200次迭代训练之后,对应的loss信息如下:
将模型进行测试效果如下(本车牌未在训练集中):
B生成A的图片:
A生成B的图片:
可以看到,GAN生成的图片除了少部分瑕疵,基本上是符合预期的,同时文字是清晰可见的,没有出现第一部分试验所看到的文字错乱的情况。
针对上述好的迹象,我们再进一步一点点,看下B图中文字倾斜的话,文字是否可以正确生成。
三、倾斜的带文字的GAN试验
训练集A图示例:
训练集B图示例:
生成训练集代码(在第二步的基础上稍作修改):
#!/usr/bin/env python
#coding=utf-8
import cv2
import os
import numpy as np
from PIL import ImageDraw, ImageFont, Image
char_province_list = ["京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖","闽","赣","鲁","豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新"]
char_alphbet_list = ["0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"]
def generate_license_plate_number():
"""
随机生成车牌号码
:return:
"""
lp_list = []
for province in char_province_list:
for _ in range(100):
lp_str = province
number_list = np.random.choice(char_alphbet_list, 6)
lp_str += "".join(number_list)
lp_list.append(lp_str)
return lp_list
def write_chinese(img, font_type, font_size,color, position, content, degree):
# 图像从OpenCV格式转换成PIL格式
img_PIL = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# 字体 字体*.ttc的存放路径一般是: /usr/share/fonts/opentype/noto/ 查找指令locate *.ttc
font = ImageFont.truetype(font_type, font_size)
# 字体颜色
# 文字输出位置
# 输出内容
draw = ImageDraw.Draw(img_PIL)
draw.text(position, content, font=font, fill=color)
img_PIL = img_PIL.rotate(degree)
# 转换回OpenCV格式
img_OpenCV = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
return img_OpenCV
def generate_plate_rectangle(plate_str):
"""
绘制矩形边框
:param plate_str:
:return:
"""
width = 256
height = 256
img = np.ones((height, width, 3), dtype=np.uint8)
img *= 255 # white background
cv2.rectangle(img, (20, 20), (236, 236), (255, 0, 0), 4)
img = write_chinese(img, 'font/SimHei.ttf', 42, (0, 0, 0), (50, 100), plate_str, 0)
return img
def generate_plate_filled_rectangle(plate_str):
width = 256
height = 256
img = np.ones((height, width, 3), dtype=np.uint8)
img *= 255 # white background
cv2.rectangle(img, (20, 20), (236, 236), (255, 0, 0), -1)
img = write_chinese(img, 'font/SimHei.ttf', 42, (0, 0, 0), (50, 100), plate_str, 20)
return img
def generate_plate_tuple(plate_str, index):
dir_path = "D:\\workspace\\ncz-python-algo\\com\\ncz\\algo\\license-plate-generator\\simple_images"
path_A = os.path.join(dir_path, "A", str(index) + "_" + plate_str + ".png")
path_B = os.path.join(dir_path, "B", str(index) + "_" + plate_str + ".png")
img1 = generate_plate_rectangle(plate_str)
img2 = generate_plate_filled_rectangle(plate_str)
cv2.imencode('.png', img1)[1].tofile(path_A)
cv2.imencode('.png', img2)[1].tofile(path_B)
# 生成个别图片
# license_plate_str = '浙A5B5T3'
# generate_plate_tuple(license_plate_str, 1)
#随机生成一堆图片
license_plate_list = generate_license_plate_number()
for index, license_plate in enumerate(license_plate_list):
print(index + 1, license_plate)
generate_plate_tuple(license_plate, index + 1)
参考文档:基于亚像素卷积的改进型CycleGAN手写汉字生成研究_参考网
对model稍作修改如下:
算法迭代了30+次后数据和效果如下:
算法迭代了130+次后数据和效果如下:
本次试验失败!!
四、只有文字倾斜的GAN
进一步查看GAN是否能够生成倾斜的字体,背景设置为黑色。
训练了200次迭代之后,其损失如下:
最后,通过模型test效果如下: