引用:https://www.jianshu.com/p/aa075424d2d2
本文整合了多个验证码识别教程,包含:生成验证码、使用自己的验证码训练集和测试集、模型训练和模型测试。
1. 实验环境
本次实验使用:python3.7,tensorflow1.14.0
共有6个py文件,需要在同一个文件夹下创建py文件。
2. 实验过程
2.1 生成验证码
由下面代码可以通过captcha库生成验证码
# coding:utf-8
# name:captcha_gen.py
import random
import numpy as np
from PIL import Image
from captcha.image import ImageCaptcha
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
LOW_CASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z']
UP_CASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z']
CAPTCHA_LIST = NUMBER
CAPTCHA_LEN = 4 # 验证码长度
CAPTCHA_HEIGHT = 60 # 验证码高度
CAPTCHA_WIDTH = 160 # 验证码宽度
def random_captcha_text(char_set=CAPTCHA_LIST, captcha_size=CAPTCHA_LEN):
"""
随机生成定长字符串
:param char_set: 备选字符串列表
:param captcha_size: 字符串长度
:return: 字符串
"""
captcha_text = [random.choice(char_set) for _ in range(captcha_size)]
return ''.join(captcha_text)
def gen_captcha_text_and_image(width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT, save=None):
"""
生成随机验证码
:param width: 验证码图片宽度
:param height: 验证码图片高度
:param save: 是否保存(None)
:return: 验证码字符串,验证码图像np数组
"""
image = ImageCaptcha(width=width, height=height)
# 验证码文本
captcha_text = random_captcha_text()
captcha = image.generate(captcha_text)
# 保存
if save:
image.write(captcha_text, './img/' + captcha_text + '.jpg')
captcha_image = Image.open(captcha)
# 转化为np数组
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
if __name__ == '__main__':
t, im = gen_captcha_text_and_image(save=True)
print(t, im.shape) # (60, 160, 3)
2.2 使用自己的验证码
可以直接通过生成的验证码进行训练,也可以先生成大量的验证码,自行分为train文件夹和test文件夹,然后进行模型训练。
本文偏向于该类,其中已根据修改的生成验证码程序,将图片的真是标签放入train_label.csv文件中,该部分读者可自行修改
# coding:utf-8
# name:captcha_get.py
import random
import numpy as np
import pandas as pd
from PIL import Image
from captcha_process import get_gray_image, get_binary_image, del_noise
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
LOW_CASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z']
UP_CASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z']
CAPTCHA_LIST = NUMBER + LOW_CASE + UP_CASE
CAPTCHA_LEN = 4 # 验证码长度
CAPTCHA_HEIGHT = 35 # 验证码高度
CAPTCHA_WIDTH = 105 # 验证码宽度
CAPTCHA_AMOUNT = 20000 # 验证码数量
CAPTCHA_PATH = './train/' # 验证码路径
def get_captcha_text_and_image(path=CAPTCHA_PATH):
"""
获取验证码
:param path: 验证码路径
:return: 验证码字符串,验证码图像np数组
"""
# 随机一张验证码
image = random.randint(1, CAPTCHA_AMOUNT)
# 验证码文本
labels = pd.read_csv(path + 'train_label.csv')
captcha_text = labels['label'][image - 1]
# 获取该验证码
captcha_image = Image.open(path + str(image) + '.jpg')
# 图片灰值化
captcha_image = get_gray_image(captcha_image)
# 图片二值化
captcha_image = get_binary_image(captcha_image)
# 图片降噪
captcha_image = del_noise(captcha_image)
# captcha_image.show()
# 转化为np数组
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
if __name__ == '__main__':
t, im = get_captcha_text_and_image(CAPTCHA_PATH)
print(t, im.shape)
2.3 数据预处理
生成的验证码图片可以进行灰值化、二值化、降噪等处理。读者也可以自行加入裁剪、重置尺寸等操作。
# coding:utf-8
# name:captcha_process.py
# 图片灰度处理
def get_gray_image(image):
"""
图片灰度处理
:param image: 需要处理的图像
:return: 灰度处理后的图像
"""
image = image.convert('L')
return image
def get_binary_image(image):
"""
图片二值化
:param image: 需要处理的图像
:return: 二值化后的图像
"""
# 自定义灰度界限,大于这个值为黑色,小于这个值为白色
threshold = 200
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
# 图片二值化
image = image.point(table, '1')
return image
def del_noise(image):
"""
图像降噪
:param image: 需要降噪的图像
:return: 降噪后的图像
"""
data = image.getdata()
w, h = image.size
# im.show()
black_point = 0
for x in range(1, w - 1):
for y in range(1, h - 1):
mid_pixel = data[w * y + x] # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右四个方向像素点像素值
top_pixel = data[w * (y - 1) + x]
left_pixel = data[w * y + (x - 1)]
down_pixel = data[w * (y + 1) + x]
right_pixel = data[w * y + (x + 1)]
# 判断上下左右的黑色像素点总个数
if top_pixel == 0:
black_point += 1
if left_pixel == 0:
black_point += 1
if down_pixel == 0:
black_point += 1
if right_pixel == 0:
black_point += 1
if black_point >= 3:
image.putpixel((x, y), 0)
# print black_point
black_point = 0
return image
2.4 工具库
可以调用该自行编辑的工具库,实现预处理等方面的工作
# coding:utf-8
# name:util.py
import numpy as np
from captcha_get import get_captcha_text_and_image
from captcha_get import CAPTCHA_LIST, CAPTCHA_LEN, CAPTCHA_HEIGHT, CAPTCHA_WIDTH
def convert2gray(img):
"""
图片转为黑白,3维转1维
:param img: np
:return: 灰度图的np
"""
if len(img.shape) > 2:
img = np.mean(img, -1)
return img
def text2vec(text, captcha_len=CAPTCHA_LEN, captcha_list=CAPTCHA_LIST):
"""
验证码文本转为向量
:param text:
:param captcha_len:
:param captcha_list:
:return: vector 文本对应的向量形式
"""
text_len = len(text) # 欲生成验证码的字符长度
if text_len > captcha_len:
raise ValueError('验证码最长4个字符')
vector = np.zeros(captcha_len * len(captcha_list)) # 生成一个一维向量 验证码长度*字符列表长度
for i in range(text_len):
vector[captcha_list.index(text[i])+i*len(captcha_list)] = 1 # 找到字符对应在字符列表中的下标值+字符列表长度*i 的 一维向量 赋值为 1
return vector
def vec2text(vec, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):
"""
验证码向量转为文本
:param vec:
:param captcha_list:
:param captcha_len:
:return: 向量的字符串形式
"""
vec_idx = vec
text_list = [captcha_list[int(v)] for v in vec_idx]
return ''.join(text_list)
def wrap_gen_captcha_text_and_image(shape=(CAPTCHA_HEIGHT, CAPTCHA_WIDTH)):
"""
返回特定shape图片
:param shape:
:return:
"""
while True:
t, im = get_captcha_text_and_image()
if im.shape == shape:
return t, im
def get_next_batch(batch_count=60, width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT):
"""
获取训练图片组
:param batch_count: default 60
:param width: 验证码宽度
:param height: 验证码高度
:return: batch_x, batch_yc
"""
batch_x = np.zeros([batch_count, width * height])
batch_y = np.zeros([batch_count, CAPTCHA_LEN * len(CAPTCHA_LIST)])
for i in range(batch_count): # 生成对应的训练集
text, image = wrap_gen_captcha_text_and_image()
image = convert2gray(image) # 转灰度numpy
# 将图片数组一维化 同时将文本也对应在两个二维组的同一行
batch_x[i, :] = image.flatten() / 255
batch_y[i, :] = text2vec(text) # 验证码文本的向量形式
# 返回该训练批次
return batch_x, batch_y
if __name__ == '__main__':
x, y = get_next_batch(batch_count=1) # 默认为1用于测试集
print(x, y)
2.5 模型训练
该py文件中第175、176行可以自行修改步数和上次训练的最大准确率值,可以在上次的基础上进行断点续训。
训练程序将准确率超过0.92的模型保存到./model/文件夹下
# -*- coding:utf-8 -*-
# name: model_train.py
import tensorflow as tf
from datetime import datetime
from util import get_next_batch
from captcha_get import CAPTCHA_HEIGHT, CAPTCHA_WIDTH, CAPTCHA_LEN, CAPTCHA_LIST
def weight_variable(shape, w_alpha=0.01):
"""
初始化权值
:param shape:
:param w_alpha:
:return:
"""
initial = w_alpha * tf.random_normal(shape)
return tf.Variable(initial)
def bias_variable(shape, b_alpha=0.1):
"""
初始化偏置项
:param shape:
:param b_alpha:
:return:
"""
initial = b_alpha * tf.random_normal(shape)
return tf.Variable(initial)
def conv2d(x, w):
"""
卷基层 :局部变量线性组合,步长为1,模式‘SAME’代表卷积后图片尺寸不变,即零边距
:param x:
:param w:
:return:
"""
return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""
池化层:max pooling,取出区域内最大值为代表特征, 2x2 的pool,图片尺寸变为1/2
:param x:
:return:
"""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def cnn_graph(x, keep_prob, size, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):
"""
三层卷积神经网络
:param x: 训练集 image x
:param keep_prob: 神经元利用率
:param size: 大小 (高,宽)
:param captcha_list:
:param captcha_len:
:return: y_conv
"""
# 需要将图片reshape为4维向量
image_height, image_width = size
x_image = tf.reshape(x, shape=[-1, image_height, image_width, 1])
# 第一层
# filter定义为3x3x1, 输出32个特征, 即32个filter
w_conv1 = weight_variable([3, 3, 1, 32]) # 3*3的采样窗口,32个(通道)卷积核从1个平面抽取特征得到32个特征平面
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1) # rulu激活函数
h_pool1 = max_pool_2x2(h_conv1) # 池化
h_drop1 = tf.nn.dropout(h_pool1, keep_prob) # dropout防止过拟合
# 第二层
w_conv2 = weight_variable([3, 3, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_drop1, w_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_drop2 = tf.nn.dropout(h_pool2, keep_prob)
# 第三层
w_conv3 = weight_variable([3, 3, 64, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_drop2, w_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
h_drop3 = tf.nn.dropout(h_pool3, keep_prob)
"""
原始:60*160图片 第一次卷积后 60*160 第一池化后 30*80
第二次卷积后 30*80 ,第二次池化后 15*40
第三次卷积后 15*40 ,第三次池化后 7.5*20 = > 向下取整 7*20
经过上面操作后得到7*20的平面
"""
# 全连接层
image_height = int(h_drop3.shape[1])
image_width = int(h_drop3.shape[2])
w_fc = weight_variable([image_height*image_width*64, 1024]) # 上一层有64个神经元 全连接层有1024个神经元
b_fc = bias_variable([1024])
h_drop3_re = tf.reshape(h_drop3, [-1, image_height*image_width*64])
h_fc = tf.nn.relu(tf.matmul(h_drop3_re, w_fc) + b_fc)
h_drop_fc = tf.nn.dropout(h_fc, keep_prob)
# 输出层
w_out = weight_variable([1024, len(captcha_list)*captcha_len])
b_out = bias_variable([len(captcha_list)*captcha_len])
y_conv = tf.matmul(h_drop_fc, w_out) + b_out
return y_conv
def optimize_graph(y, y_conv):
"""
优化计算图
:param y: 正确值
:param y_conv: 预测值
:return: optimizer
"""
# 交叉熵代价函数计算loss 注意logits输入是在函数内部进行sigmod操作
# sigmod_cross适用于每个类别相互独立但不互斥,如图中可以有字母和数字
# softmax_cross适用于每个类别独立且排斥的情况,如数字和字母不可以同时出现
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_conv))
# 最小化loss优化 AdaminOptimizer优化
optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss)
return optimizer
def accuracy_graph(y, y_conv, width=len(CAPTCHA_LIST), height=CAPTCHA_LEN):
"""
偏差计算图,正确值和预测值,计算准确度
:param y: 正确值 标签
:param y_conv: 预测值
:param width: 验证码预备字符列表长度
:param height: 验证码的大小,默认为4
:return: 正确率
"""
# 这里区分了大小写 实际上验证码一般不区分大小写,有四个值,不同于手写体识别
# 预测值
predict = tf.reshape(y_conv, [-1, height, width]) #
max_predict_idx = tf.argmax(predict, 2)
# 标签
label = tf.reshape(y, [-1, height, width])
max_label_idx = tf.argmax(label, 2)
correct_p = tf.equal(max_predict_idx, max_label_idx) # 判断是否相等
accuracy = tf.reduce_mean(tf.cast(correct_p, tf.float32))
return accuracy
def train(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, y_size=len(CAPTCHA_LIST)*CAPTCHA_LEN):
"""
cnn训练
:param height: 验证码高度
:param width: 验证码宽度
:param y_size: 验证码预备字符列表长度*验证码长度(默认为4)
:return:
"""
# cnn在图像大小是2的倍数时性能最高, 如果图像大小不是2的倍数,可以在图像边缘补无用像素
# 在图像上补2行,下补3行,左补2行,右补2行
# np.pad(image,((2,3),(2,2)), 'constant', constant_values=(255,))
acc_rate = 0.92 # 预设模型准确率标准
# 按照图片大小申请占位符
x = tf.placeholder(tf.float32, [None, height * width])
y = tf.placeholder(tf.float32, [None, y_size])
# 防止过拟合 训练时启用 测试时不启用 神经元使用率
keep_prob = tf.placeholder(tf.float32)
# cnn模型
y_conv = cnn_graph(x, keep_prob, (height, width))
# 优化
optimizer = optimize_graph(y, y_conv)
# 计算准确率
accuracy = accuracy_graph(y, y_conv)
# 启动会话.开始训练
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer()) # 初始化
step = 0 # 步数
max_acc = 0
# 断点续训
MODEL_SAVE_PATH = "./model/"
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
while 1:
batch_x, batch_y = get_next_batch(64)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})
# 每训练一百次测试一次
if step % 100 == 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0})
print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc)
# 准确率满足要求,保存模型
if acc > max_acc:
max_acc = acc
model_path = "./model/captcha.model"
saver.save(sess, model_path, global_step=step)
if acc > acc_rate: # 准确率达到92%则退出
break
step += 1
sess.close()
if __name__ == '__main__':
train()
2.6 模型测试
该部分根据需要进行修改,以读取test的验证码即可。
# -*- coding:utf-8 -*-
# name: model_test.py
import os
import csv
import cv2
import numpy as np
import tensorflow as tf
from model_train import cnn_graph
from captcha_gen import gen_captcha_text_and_image
from util import vec2text, convert2gray
from util import CAPTCHA_LIST, CAPTCHA_WIDTH, CAPTCHA_HEIGHT, CAPTCHA_LEN
from PIL import Image
def captcha2text(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
"""
:param height:
:param width:
"""
path = './test/' # 验证码文件夹路径
filelist = os.listdir(path) # 打开验证码文件夹
x = tf.placeholder(tf.float32, [None, height * width])
keep_prob = tf.placeholder(tf.float32)
y_conv = cnn_graph(x, keep_prob, (height, width))
saver = tf.train.Saver()
count = 0 # 正确值计数
rows = [] # 将正确标签和预测标签放入列表中,从而放在csv文件中
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('model/'))
predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
# 遍历验证码
for file in filelist:
text = file[:4] # 验证码标签
image = cv2.imread(path + file)
image = cv2.resize(image, (120, 40), interpolation=cv2.INTER_CUBIC)
# image = Image.open(path + file) # 打开图片
# image = np.array(image)
image = convert2gray(image)
image = image.flatten() / 255
vector_list = sess.run(predict, feed_dict={x: [image], keep_prob: 1})
vector_list = vector_list.tolist()
text_list = [vec2text(vector) for vector in vector_list]
# print("验证码正确值:", text, ' 模型预测值:', text_list)
rows.append((file, str(text_list)[2:6]))
if text == text_list[0]:
count += 1
# print("正确率为{}/5000,即{}%".format(count, count/5000*100))
# 写入csv文件中
headers = ['ID', 'label']
with open('test_label.csv', 'w', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
f_csv.writerows(rows)
if __name__ == '__main__':
# text, image = gen_captcha_text_and_image() # 随机生成测试标签与图片
captcha2text() # 调用测试函数