前几天想训练一个可以识别字符的模型,但是,苦于找不到训练材料,今天,做网站验证码时候发现了一个思路,写下来分享一下。
from captcha.image import ImageCaptcha
import random,string
import numpy as np
cq=0
str_1=""
#这里我修改了一下源码源码width为160
while cq<10:
cq+=1
chr_all = string.ascii_letters + string.digits
#生成验证码的个数
chr_4 = ''.join(random.sample(chr_all, 1))
image = ImageCaptcha().generate_image(chr_4)
#对图片进行降噪处理
from PIL import Image
#获取图片对象
#转换为灰度图片
imgGray = image.convert('L')
#查看图片
#imgGray.show()
#设置阈值
threshold = 200
#加载像素点
pixdata = imgGray.load()
#获取图片的宽高
width, height = imgGray.size
for y in range(height):
for x in range(width):
if pixdata[x, y] < threshold:
pixdata[x, y] = 0
else:
pixdata[x, y] = 255
binImg = imgGray
a=0
while a<=15:
a+=1
pixdata = binImg.load()
width, height = binImg.size
for y in range(1, height- 1):
for x in range(1, width- 1):
count = 0
if pixdata[x, y - 1] > 245:
count = count + 1
if pixdata[x, y + 1] > 245:
count = count + 1
if pixdata[x - 1, y] > 245:
count = count + 1
if pixdata[x + 1, y] > 245:
count = count + 1
if pixdata[x - 1, y - 1] > 245:
count = count + 1
if pixdata[x - 1, y + 1] > 245:
count = count + 1
if pixdata[x + 1, y - 1] > 245:
count = count + 1
if pixdata[x + 1, y + 1] > 245:
count = count + 1
if count > 4:
pixdata[x, y] = 255
nrImg = binImg
c=np.array(nrImg)
strs=""
for arr in c:
strs =strs+ ','.join(str(i) for i in arr)+","
#print(strs,len(strs))
str_1=str_1+chr_4+","+strs+"\n"
with open("train_test_letter_exam.csv","w") as f:
f.write(str_1)
生成的图片如下:
本文是将这些图片转换为csv形式储存下来。