源代码链接:http://blog.topspeedsnail.com/archives/10858
验证码识别:维度推导
5000张验证码和csv的地址:http://pan.baidu.com/s/1eSH4ACa
#! /usr/bin/env python
# -*- coding=utf-8 -*-
import random
from PIL import Image
import numpy as np
import tensorflow as tf
import logging
FORMAT = '[%(asctime)s, %(levelname)-7s]: %(message)s'
#日志打印到文件中
logging.basicConfig(format=FORMAT,filename="train_128.log",filemode='w')
logger = logging.getLogger('Train_128')
logger.setLevel(logging.INFO)
# 图像大小
#MAX_CAPTCHA:验证码的位数
IMAGE_HEIGHT = 64
IMAGE_WIDTH = 128
MAX_CAPTCHA = 4
print("验证码文本最长字符数", MAX_CAPTCHA) # 验证码最长4字符; 我全部固定为4,可以不固定. 如果验证码长度小于4,用'_'补齐
# 把彩色图像转为灰度图像(色彩对识别验证码没有什么用)
def convert2gray(img):
if len(img.shape) > 2:
gray = np.mean(img, -1)
# 上面的转法较快,正规转法如下
# r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
# gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
else:
return img
"""
cnn在图像大小是2的倍数时性能最高, 如果你用的图像大小不是2的倍数,可以在图像边缘补无用像素。
np.pad(image,((2,3),(2,2)), 'constant', constant_values=(255,)) # 在图像上补2行,下补3行,左补2行,右补2行
"""
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
# 文本转向量
char_set = alphabet
# 如果验证码长度小于4, '_'用来补齐,MAX_CAPTCHA=26
CHAR_SET_LEN = len(char_set)
#每一个返回的vector都是一个shape为4*26维度的向量;
def text2vec(text):
text = text.strip()
text_len = len(text)
#print text
#print text_len
#print MAX_CAPTCHA
if text_len > MAX_CAPTCHA:
raise ValueError('验证码最长4个字符')
vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
def char2pos(c):
k = ord(c)-97
return k
for i, c in enumerate(text):
idx = i * CHAR_SET_LEN + char2pos(c)
vector[idx] = 1
return vector
# 向量转回文本
def vec2text(vec):
char_pos = vec.nonzero()[0]
text=[]
for i, c in enumerate(char_pos):
char_at_pos = i #c/63
char_idx = c % CHAR_SET_LEN
char_code = char_idx + 97
text.append(chr(char_code))
return "".join(text)
"""
#向量(大小MAX_CAPTCHA*CHAR_SET_LEN)用0,1编码每26个编码一个字符,这样位置也有,字符也有
"""
#数据类型,两列,第一列是图片的name,第二列是图片的label(一个长度为4的字符串)
fff = open("test.csv")
x_test=[]
y_test=[]
while 1:
line = fff.readline()
if not line:
break
value = line.split(",")
img = value[0]
label = value[1]
x_test.append(img)
y_test.append(label)
test_data = zip(x_test,y_test)
y_train = []
x_train = []
ff = open("train.csv")
while 1:
line = ff.readline()
if not line:
break
value = line.split(",")
img = value[0]
label = value[1]
x_train.append(img)
y_train.append(label)
ff.close()
train_data = zip(x_train,y_train)
max_train = len(train_data)
print max_train
ffle = "all_data_128_64"
# 生成一个训练batch
def get_next_batch(T_d,start , batch_size=64):
logger.info("start to creat batch: from:%d to %d"%(start,batch_size))
logger.info("the image of the shape:%d %d"%(IMAGE_HEIGHT,IMAGE_WIDTH))
batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
for i in range(batch_size):
image,text = T_d[start+i][0],T_d[start+i][1].strip()
img = Image.open('%s/%s' % (ffle, image))
arr = np.asarray(img, dtype="float32")/255.0
batch_x[i,:] = np.mean(arr,-1).flatten() # (image.flatten()-128)/128 mean为0
batch_y[i,:] = text2vec(text)
return batch_x, batch_y
X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32) # dropout
# 定义CNN
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
#w_c1_alpha = np.sqrt(2.0/(IMAGE_HEIGHT*IMAGE_WIDTH)) #
#w_c2_alpha = np.sqrt(2.0/(3*3*32))
#w_c3_alpha = np.sqrt(2.0/(3*3*64))
#w_d1_alpha = np.sqrt(2.0/(8*32*64))
#out_alpha = np.sqrt(2.0/1024)
# 3 conv layer
print "xxxx"
print X.get_shape() # 图片大小 64*128 = 8192
print x.get_shape() #64 * 64 * 128 * 1
#layer_1
w_c1 = tf.Variable(w_alpha*tf.random_normal([3, 3, 1, 32]))
b_c1 = tf.Variable(b_alpha*tf.random_normal([32]))
#tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None),参数属性
#x[64,64,128,1] w_c1[3,3,1,32]
# 卷积后:32个feature,每个shape还是(?,64,128,32)
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
print conv1.get_shape()
#max_pool之后,就变成了,(?, 32, 64, 32)
#max_pool不同于卷积,它能保证位置不变性,降低计算参数
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
print conv1.get_shape()
conv1 = tf.nn.dropout(conv1, keep_prob)
print conv1.get_shape()
#layer_2
w_c2 = tf.Variable(w_alpha*tf.random_normal([3, 3, 32, 64]))
b_c2 = tf.Variable(b_alpha*tf.random_normal([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
#做完conv2d,(?, 32, 64, 64),
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#做完pool之后(?,16, 32, 64)
conv2 = tf.nn.dropout(conv2, keep_prob)
#layer_3
w_c3 = tf.Variable(w_alpha*tf.random_normal([3, 3, 64, 64]))
b_c3 = tf.Variable(b_alpha*tf.random_normal([64]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
#conv2d 后的数据,(?, 16, 32, 64)
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#pool后的维度 (?, 8, 16, 64)
conv3 = tf.nn.dropout(conv3, keep_prob)
print b_c3.get_shape()
# Fully connected layer 64*128,全连接层
w_d = tf.Variable(w_alpha*tf.random_normal([8192, 1024]))
b_d = tf.Variable(b_alpha*tf.random_normal([1024]))
#reshap(-1,8192),8*16*64 = 8192
dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
#dense的shape(?, 8192),w_b(8192,1024),输出为1024,
print dense.get_shape()
dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
#dense = (?,8192)*(8192,0124)+(1024)
#dense = (?,1024)
dense = tf.nn.dropout(dense, keep_prob)
#全连接层的输出是1024维度的
#全连接层后接着一层输出层:1024*(4*26)
w_out = tf.Variable(w_alpha*tf.random_normal([1024, MAX_CAPTCHA*CHAR_SET_LEN]))
b_out = tf.Variable(b_alpha*tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))
#dense = (?, 1024)
#w_out = (1024,104)
#b_out = (104)
out = tf.add(tf.matmul(dense, w_out), b_out)
#out的shape为(?,104)
#out = tf.nn.softmax(out)
return out
def eval_once():
sess = tf.Session()
ckpt = tf.train.get_checkpoint_state("checkpoint")
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
else:
print('No checkpoint file found')
sess.close()
return
coord = tf.train.Coordinator()
try:
print "aaaa"
finally:
sess.close()
# 训练#####
def train_crack_captcha_cnn():
output = crack_captcha_cnn()
# loss
#之前tensorflow没升级的时候,用的是targets,当升级到1.3时,用的是labels
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y))
# 最后一层用来分类的softmax和sigmoid有什么不同?
# 优化器,optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
#函数会返回tensor中参数指定的维度中的最大值的索引
#最后输出的是一个,4*26维度的向量,所以最大的索引值为4个,和正确的结果进行对比
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
#通过tf.equal方法可以比较预测结果与实际结果是否相等:
correct_pred = tf.equal(max_idx_p, max_idx_l)
#这行代码返回一个布尔列表。为得到哪些预测是正确的,我们可用如下代码将布尔值转换成浮点数:
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
global train_data
#Tensorflow针对这一需求提供了Saver类,保存模型,恢复模型变量。
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
step = 0
#sess.close()
#return
for kk in range(42): #epochs = 50
chee = True
num = 0
random.shuffle(train_data)
while True:
if num + 64 > max_train:
break
batch_x, batch_y = get_next_batch(train_data,num,64)
logger.info("start to get loss:epochs:%d"%(kk))
_, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
logger.info("train of the step:%d loss %f"%(step,loss_))
num = num + 64
# 每100 step计算一次准确率
if chee:
chee = False
random.shuffle(test_data)
batch_x_test, batch_y_test = get_next_batch(test_data,10,64)
#feed_dict参数来计算准确率
acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
logger.info("the true of the test:%d loss %f"%(step,acc))
# 如果准确率大于50%,保存模型,完成训练
if acc > 0.6 or kk == 41:
print "out-out-out"
saver.save(sess, "crack_capcha.model", global_step=step)
return
step += 1
train_crack_captcha_cnn()
‘’‘
使用Saver.save()方法保存模型:
sess:表示当前会话,当前会话记录了当前的变量值
checkpoint_dir + 'model.ckpt':表示存储的文件名
global_step:表示当前是第几步
’‘’