最初的代码不是我写的,别人给我的,源代码的作者已经不知道了,抱歉,在简书上找到类似的TensorFlow练习20: 使用深度学习破解字符验证码。这里我作为一个初学者,打算从零开始记录我学习的全过程。
一、验证码识别
1.下载图片存储并转为数组
import os,requests,re
import random
import numpy as np
from PIL import Image
from io import BytesIO
headers = {
'Accept':'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
}
def get_captcha(url,i=1):
try:
resp = requests.get(url,headers=headers).content
# [Python中BytesIO的使用场景,什么时间应该使用BytesIO?](https://www.zhihu.com/question/49102468)
# [Python图像处理库PIL的Image模块介绍(一)](https://blog.csdn.net/icamera0/article/details/50654910)
pic = Image.open(BytesIO(resp))
# print(i)
name ='F:\验证码\\test\\' + str(i) + '.jpg'
pic.save(name)
#
# Image.ANTIALIAS 高质量
captcha_image = pic.resize((160, 60),Image.ANTIALIAS)
# [图像数组为什么要转换成字符数组](https://zhidao.baidu.com/question/1513565352058747300.html)
#[图片怎么转为矩阵数组](https://zhidao.baidu.com/question/1580093335915387140.html)
captcha_image = np.array(captcha_image)
return name,captcha_image
except Exception as e:
print('错误类型:',e)
2.将彩色图片转为灰色图像
色彩对识别验证码没什么用,转成灰色,去除多余的杂音,同时也对图片进行降维度。
通过ImageCaptcha生成的图片,可以看到img.shape
的值是<class 'tuple'>: (60, 160, 3)
python PIL 图像处理 (二)中图像数组表示,第一个元组表示图像数组的大小(行、列、颜色通道),那么这里的3就是颜色通道,而60*160应该就是像素。
def convert2gray(img):
if len(img.shape) > 2:
#
gray = np.mean(img, -1)
return gray
else:
plt.imshow(img)
plt.show()
return img
3 将图片一唯化
image = image.flatten() / 255
4 定义CNN模型
看了下面的代码,懵逼了,不知道在说什么。
先从这篇深度学习之卷积神经网络CNN及tensorflow代码实现示例文章了解到什么是CNN,
《TensorFlow 官方文档》中提到卷积神经网络是为图像识别量身定做的一个模型
卷积神经网络CNN原理以及TensorFlow实现
卷积神经网络全面解析
CNN笔记:通俗理解卷积神经网络
理解CNN模型本身就是件困难的事情,那么另外的思路就是搞清楚,为什么要这么传参,
# 申请三个占位符
# [【Tensorflow】tf.placeholder函数](https://blog.csdn.net/zj360202/article/details/70243127)
X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32) # dropout
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
# [TensorFlow的reshape操作 tf.reshape](https://blog.csdn.net/lxg0807/article/details/53021859)
x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
# 3 conv layer # 3 个 转换层
# [tf.random_normal()函数](https://blog.csdn.net/dcrmg/article/details/79028043),从服从指定正太分布的数值中取出指定个数的值
# [3, 3, 1, 32]是[batch, height, width, channels]这样的shape
w_c1 = tf.Variable(w_alpha*tf.random_normal([3, 3, 1, 32]))
b_c1 = tf.Variable(b_alpha*tf.random_normal([32]))
# [tf.nn.bias_add和tf.add、tf.add_n](https://blog.csdn.net/weixin_38698649/article/details/80100737),bias必须是一唯的
# [TF-卷积函数 tf.nn.conv2d 介绍](https://www.cnblogs.com/qggg/p/6832342.html)
# [tensorflow激活函数relu()的详解](https://blog.csdn.net/jiao_mrswang/article/details/74537066)
# [ReLu(Rectified Linear Units)激活函数](http://www.mamicode.com/info-detail-873243.html)
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
# [【TensorFlow】tf.nn.max_pool实现池化操作](https://blog.csdn.net/mao_xiao_feng/article/details/53453926)
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# [TensorFlow学习---tf.nn.dropout防止过拟合](https://blog.csdn.net/huahuazhu/article/details/73649389)
conv1 = tf.nn.dropout(conv1, keep_prob)
w_c2 = tf.Variable(w_alpha*tf.random_normal([3, 3, 32, 64]))
b_c2 = tf.Variable(b_alpha*tf.random_normal([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, keep_prob)
w_c3 = tf.Variable(w_alpha*tf.random_normal([3, 3, 64, 64]))
b_c3 = tf.Variable(b_alpha*tf.random_normal([64]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
# Fully connected layer # 最后连接层
w_d = tf.Variable(w_alpha*tf.random_normal([8*20*64, 1024]))
b_d = tf.Variable(b_alpha*tf.random_normal([1024]))
dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
dense = tf.nn.dropout(dense, keep_prob)
# 输出层
w_out = tf.Variable(w_alpha*tf.random_normal([1024, MAX_CAPTCHA*CHAR_SET_LEN]))
b_out = tf.Variable(b_alpha*tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))
# [mdumplingtf.multiply()和tf.matmul()区别](https://www.cnblogs.com/mdumpling/p/8094310.html)
out = tf.add(tf.matmul(dense, w_out), b_out)
#out = tf.nn.softmax(out)
return out
5 执行CNN模型
def crack_captcha(captcha_image):
output = crack_captcha_cnn()
# [Tensorflow系列——Saver的用法](https://blog.csdn.net/u011500062/article/details/51728830)
saver = tf.train.Saver()
with tf.Session() as sess:
# 重载模型的参数,继续训练或用于测试数据
# tf.train.latest_checkpoint自动获取最后一次的模型
saver.restore(sess, tf.train.latest_checkpoint('.\\'))
# [tf.argmax()以及axis解析](https://blog.csdn.net/qq575379110/article/details/70538051/),返回最大的那个数值所在的下标
predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1})
text = text_list[0].tolist()
vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
i = 0
for n in text:
vector[i*CHAR_SET_LEN + n] = 1
i += 1
return vec2text(vector)
6 向量转文本
调试后,我很纳闷,为啥下图中text中已经得到了验证码的内容,还要大费周折,做向量转文本呢?
我猜唯一的解释就是如果既有数字,又有字母通用,下次遇到既有数字,又有字母验证码的我再调试看看。
Python ord() 函数用于获取 ASCII 数值
text, image = get_image_text()
# print("验证码图像channel:", image.shape) # (60, 160, 3)
# 图像大小
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
MAX_CAPTCHA = len(text) # 如果验证码的长度是固定的,这个可以设置成固定值
number = ['0','1','2','3','4','5','6','7','8','9']
char_set = number +['_'] # 如果验证码长度小于4, '_'用来补齐
CHAR_SET_LEN = len(char_set)
def vec2text(vec):
char_pos = vec.nonzero()[0]
text=[]
for i, c in enumerate(char_pos):
char_at_pos = i #c/63
char_idx = c % CHAR_SET_LEN
if char_idx < 10:
char_code = char_idx + ord('0')
elif char_idx <36:
char_code = char_idx - 10 + ord('A')
elif char_idx < 62:
char_code = char_idx- 36 + ord('a')
elif char_idx == 62:
char_code = ord('_')
else:
raise ValueError('error')
text.append(chr(char_code))
return "".join(text)
为什么是160160的图片呢?查看from captcha.image import ImageCaptcha
代码,产生的验证码像素默认就是16060
二、训练模型
1、构建CNN模型
上述已经描述了
2、构建训练模型
def train_crack_captcha_cnn():
output = crack_captcha_cnn()
# [tensorflow学习之常用函数总结:tensorflow官方例子中的诸如tf.reduce_mean()这类函数](https://blog.csdn.net/qq_32166627/article/details/52734387)
#loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
# [Tensorflow四种交叉熵函数计算公式:tf.nn.cross_entropy](https://blog.csdn.net/QW_sunny/article/details/72885403)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
# 最后一层用来分类的softmax和sigmoid有什么不同?
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
# [TensorFlow学习(四):优化器Optimizer](https://blog.csdn.net/xierhacker/article/details/53174558)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
# correct_pred = tf.equal(tf.argmax(y_conv,1), y_)
# [【Tensorflow】tf.cast 类型转换 函数](https://blog.csdn.net/zj360202/article/details/70260265)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while True:
batch_x, batch_y = get_next_batch(64)
_, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
print(step, loss_)
# 每100 step计算一次准确率
if step % 100 == 0:
batch_x_test, batch_y_test = get_next_batch(50)
acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
print('正确率:',step, acc)
# 如果准确率大于0.99995,保存模型,完成训练
if acc > 0.99995:
saver.save(sess, ".\\crack_capcha.model", global_step=step)
break
step += 1
3、执行训练模型
# 生成一个训练batch
def get_next_batch(batch_size=128):
batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
# 有时生成图像大小不是(60, 160, 3)
def wrap_gen_captcha_text_and_image():
''' 获取一张图,判断其是否符合(60,160,3)的规格'''
while True:
try:
text, image = get_image_text()
except:
pass
if image.shape == (60, 160, 3):#此部分应该与开头部分图片宽高吻合
return text, image
for i in range(batch_size):
text, image = wrap_gen_captcha_text_and_image()
#灰度化
image = convert2gray(image)
# 将图片数组一维化 同时将文本也对应在两个二维组的同一行
batch_x[i,:] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0
batch_y[i,:] = text2vec(text)
# 返回该训练批次
return batch_x, batch_y
4、评估训练模型
《TensorFlow官方文档》中关于评估模型,提到tf.argmax
,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。由于标签向量是由0、1组成,因此最大值1所在的索引位置就是类别标签。
acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
这段代码用于计算所学习到的模型在测试数据集上面的正确率。
5 tensorflow2的验证码识别
升级到tensorflow2.11.0
,代码如下
import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os
import random
# print("验证码图像channel:", image.shape) # (33, 100, 3)
# 图像大小,根据验证码的像素设置参数
IMAGE_HEIGHT = 33
IMAGE_WIDTH = 100
# MAX_CAPTCHA = len(text)
MAX_CAPTCHA = 4
"""
cnn在图像大小是2的倍数时性能最高, 如果你用的图像大小不是2的倍数,可以在图像边缘补无用像素。
np.pad(image【,((2,3),(2,2)), 'constant', constant_values=(255,)) # 在图像上补2行,下补3行,左补2行,右补2行
"""
number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
hanzi = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z']
other = []
char_set = number + hanzi + other
CHAR_SET_LEN = len(char_set)
value2code = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12,
'd': 13, 'e': 14, 'f': 15,
'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26,
'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32,
'x': 33, 'y': 34, 'z': 35
}
# 文本转向量
def text2vec(text):
text_len = len(text)
if text_len > MAX_CAPTCHA:
raise ValueError('验证码最长4个字符')
vector = np.zeros(MAX_CAPTCHA * CHAR_SET_LEN)
def char2pos(c):
k = value2code.get(c)
if k != None:
return k
for i, c in enumerate(text):
idx = i * CHAR_SET_LEN + char2pos(c)
vector[idx] = 1
return vector
code2value = {v: k for k, v in value2code.items()}
# 向量转回文本
def vec2text(vec):
char_pos = vec.nonzero()[0]
text = []
for i, c in enumerate(char_pos):
# c/62
char_idx = c % CHAR_SET_LEN
char_pos = code2value.get(char_idx)
if char_pos:
char_pos = str(char_pos)
else:
raise ValueError('error')
text.append(char_pos)
return "".join(text)
# 把彩色图像转为灰度图像(色彩对识别验证码没有什么用)
def convert2gray(img):
if len(img.shape) > 2:
gray = np.mean(img, -1)
# 上面的转法较快,正规转法如下
# r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
# gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
# plt.imshow(img)
# plt.show()
return gray
else:
plt.imshow(img)
plt.show()
return img
####################################################################
tf.compat.v1.reset_default_graph()
# 在session中执行,但在使用T的ensorFlow 2.0及以上版本中,默认情况下开启了紧急执行模式,即定义即执行。
tf.compat.v1.disable_eager_execution()
X = tf.compat.v1.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])
Y = tf.compat.v1.placeholder(tf.float32,[None,MAX_CAPTCHA*CHAR_SET_LEN])
keep_prob = tf.compat.v1.placeholder(tf.float32) # dropout
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
x = tf.reshape(X,shape=[-1,IMAGE_HEIGHT,IMAGE_WIDTH,1])
# 3 conv layer # 3 个 转换层
w_c1 = tf.compat.v1.Variable(w_alpha*tf.compat.v1.random_normal([3,3,1,32])) # 第一层权重
b_c1 = tf.compat.v1.Variable(b_alpha*tf.compat.v1.random_normal([32])) # 第一层偏置
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x,w_c1,strides=[1,1,1,1],padding='SAME'),b_c1)) # 卷积
conv1 = tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') # 池化
conv1 = tf.nn.dropout(conv1,keep_prob) # Dropout
print("conv1.shape------",conv1.shape)
w_c2 = tf.compat.v1.Variable(w_alpha * tf.compat.v1.random_normal([3, 3, 32, 64]))
b_c2 = tf.compat.v1.Variable(b_alpha * tf.compat.v1.random_normal([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, keep_prob)
print("conv2.shape------",conv2.shape)
w_c3 = tf.compat.v1.Variable(w_alpha * tf.compat.v1.random_normal([3, 3, 64, 64]))
b_c3 = tf.compat.v1.Variable(b_alpha * tf.compat.v1.random_normal([64]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
print("conv3.shape------",conv3.shape)
# Fully connected layer
# 最后连接层,5*13*64这个数据来自上层的数据
w_d = tf.compat.v1.Variable(w_alpha * tf.compat.v1.random_normal([5*13*64, 1024]))
b_d = tf.compat.v1.Variable(b_alpha * tf.compat.v1.random_normal([1024]))
dense = tf.reshape(conv3,[-1,w_d.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense,w_d),b_d))
dense = tf.nn.dropout(dense,keep_prob)
print("dense.shape------",dense.shape)
# 输出层
w_out = tf.compat.v1.Variable(w_alpha * tf.compat.v1.random_normal([1024,MAX_CAPTCHA*CHAR_SET_LEN]))
b_out = tf.compat.v1.Variable(b_alpha * tf.compat.v1.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))
out = tf.add(tf.matmul(dense,w_out),b_out)
print("out.shape------",out.shape)
return out
def crack_captcha(output,captcha_image):
saver = tf.compat.v1.train.Saver()
with tf.compat.v1.Session() as sess:
## 模型的存放路径,执行路径
saver.restore(sess, tf.train.latest_checkpoint('.'))
predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1})
text = text_list[0].tolist()
vector = np.zeros(MAX_CAPTCHA * CHAR_SET_LEN)
i = 0
for n in text:
vector[i * CHAR_SET_LEN + n] = 1
i += 1
return vec2text(vector)
output = crack_captcha_cnn()
def cnn_predict(image):
image = image.resize((100, 33), Image.ANTIALIAS)
image = np.array(image)
image = convert2gray(image)
# 将图片一维化
image = image.flatten() / 255
predict_text = crack_captcha(output, image) # 导入模型识别
print("\033[1;31;40m 预测值: {} \033[0m".format(predict_text))
return predict_text
def run(img):
predict_text = cnn_predict(img)
print(predict_text)
return predict_text
def get_name_and_image():
file_path = r'D:/appworks/aiwork/bsai/images/bw.bwjf.com/'
all_image = os.listdir(file_path)
random_file = random.randint(0,471)
base = os.path.basename(file_path+all_image[random_file])
name = os.path.splitext(base)[0]
image = Image.open(file_path+all_image[random_file])
# image = image.resize((100, 40), Image.ANTIALIAS)
image = np.array(image)
return name,image
def get_next_batch(batch_size=64):
'''
默认一次采集64张验证码作为一次训练
:param batch_size:
:return:
'''
batch_x = np.zeros([batch_size,IMAGE_WIDTH*IMAGE_HEIGHT])
batch_y = np.zeros([batch_size,MAX_CAPTCHA*CHAR_SET_LEN])
for i in range(batch_size):
name,image = get_name_and_image()
image = convert2gray(image)
# flatten:eg,turn the [60,160,3]=60*160*3
# 通过1*(image.flatten())函数转变成只含0和1的1行114*450列的矩阵。
batch_x[i,:] = image.flatten()/255
# batch_x[i,:] = 1*(image.flatten())
batch_y[i,:] = text2vec(name)
return batch_x,batch_y
def train_crack_captcha_cnn():
output = crack_captcha_cnn()
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output,[-1,MAX_CAPTCHA,CHAR_SET_LEN])
max_idx_p = tf.argmax(predict,2)
max_idx_l = tf.argmax(tf.reshape(Y,[-1,MAX_CAPTCHA,CHAR_SET_LEN]),2)
correct_pred = tf.equal(max_idx_p,max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
saver = tf.compat.v1.train.Saver()
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
step = 0
while True:
batch_x,batch_y = get_next_batch(64)
accuracy_,loss_ = sess.run([optimizer,loss],feed_dict={X:batch_x,Y:batch_y,keep_prob:0.3})
print("step{} loss={} accuracy={}".format(step,loss_,accuracy_))
if step%100==0:
batch_x_test,batch_y_test = get_next_batch(50)
acc = sess.run(accuracy,feed_dict={X:batch_x_test,Y:batch_y_test,keep_prob:1})
print(step,acc)
if acc>0.98:
saver.save(sess,"./model/crack_capcha.model", global_step=step)
break
step += 1
if __name__ == '__main__':
# name,image = get_name_and_image()
# print(name,image.shape)
train_crack_captcha_cnn()
运行发现如下问题
File "C:\Python\Python310\lib\site-packages\tensorflow\python\client\session.py", line 1454, in _call_tf_sessionrun
return tf_session.TF_SessionRun_wrapper(self._session, options, feed_dict,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [26,248] vs. [64,248]
[[{{node logistic_loss/mul}}]]
During handling of the above exception, another exception occurred: