tensorflow(三):cnn验证码识别,开启tensorflow

源代码链接:http://blog.topspeedsnail.com/archives/10858

验证码识别:维度推导

5000张验证码和csv的地址:http://pan.baidu.com/s/1eSH4ACa


#! /usr/bin/env python
# -*- coding=utf-8 -*-
import random 
from PIL import Image
import numpy as np
import tensorflow as tf
import logging
FORMAT = '[%(asctime)s, %(levelname)-7s]: %(message)s'

#日志打印到文件中
logging.basicConfig(format=FORMAT,filename="train_128.log",filemode='w')
logger = logging.getLogger('Train_128')
logger.setLevel(logging.INFO)


# 图像大小
#MAX_CAPTCHA:验证码的位数
IMAGE_HEIGHT = 64
IMAGE_WIDTH = 128
MAX_CAPTCHA = 4
print("验证码文本最长字符数", MAX_CAPTCHA)   # 验证码最长4字符; 我全部固定为4,可以不固定. 如果验证码长度小于4,用'_'补齐
 
# 把彩色图像转为灰度图像(色彩对识别验证码没有什么用)
def convert2gray(img):
	if len(img.shape) > 2:
		gray = np.mean(img, -1)
		# 上面的转法较快,正规转法如下
		# r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
		# gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
		return gray
	else:
		return img
 
"""
cnn在图像大小是2的倍数时性能最高, 如果你用的图像大小不是2的倍数,可以在图像边缘补无用像素。
np.pad(image,((2,3),(2,2)), 'constant', constant_values=(255,))  # 在图像上补2行,下补3行,左补2行,右补2行
"""
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
# 文本转向量
char_set = alphabet  
# 如果验证码长度小于4, '_'用来补齐,MAX_CAPTCHA=26
CHAR_SET_LEN = len(char_set)


#每一个返回的vector都是一个shape为4*26维度的向量;
def text2vec(text):
	text = text.strip()
	text_len = len(text) 
	#print text
	#print text_len
	#print MAX_CAPTCHA
	if text_len > MAX_CAPTCHA:
		raise ValueError('验证码最长4个字符')
 
	vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
	def char2pos(c):
		k = ord(c)-97
		return k
	for i, c in enumerate(text):
		idx = i * CHAR_SET_LEN + char2pos(c)
		vector[idx] = 1
	return vector
# 向量转回文本

def vec2text(vec):
	char_pos = vec.nonzero()[0]
	text=[]
	for i, c in enumerate(char_pos):
		char_at_pos = i #c/63
		char_idx = c % CHAR_SET_LEN
		char_code = char_idx + 97
		text.append(chr(char_code))
	return "".join(text)
 
"""
#向量(大小MAX_CAPTCHA*CHAR_SET_LEN)用0,1编码每26个编码一个字符,这样位置也有,字符也有
"""
#数据类型,两列,第一列是图片的name,第二列是图片的label(一个长度为4的字符串)

fff = open("test.csv")
x_test=[]
y_test=[]
while 1:
   line = fff.readline()
   if not line:
       break
   value = line.split(",")
   img = value[0]
   label = value[1]
   x_test.append(img)
   y_test.append(label) 
test_data = zip(x_test,y_test)

y_train = []
x_train = []

ff = open("train.csv")
while 1:
   line = ff.readline()
   if not line:
       break
   value = line.split(",")
   img = value[0]
   label = value[1]
   x_train.append(img)
   y_train.append(label)
ff.close()
train_data = zip(x_train,y_train)

max_train = len(train_data)
print max_train

ffle = "all_data_128_64" 
# 生成一个训练batch
def get_next_batch(T_d,start , batch_size=64):
	logger.info("start to creat batch: from:%d to %d"%(start,batch_size))
	logger.info("the image of the shape:%d  %d"%(IMAGE_HEIGHT,IMAGE_WIDTH))
	batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
	batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
 
	for i in range(batch_size):
		image,text = T_d[start+i][0],T_d[start+i][1].strip()
		img = Image.open('%s/%s' % (ffle, image))
		arr = np.asarray(img, dtype="float32")/255.0
		
		batch_x[i,:] = np.mean(arr,-1).flatten() # (image.flatten()-128)/128  mean为0
		batch_y[i,:] = text2vec(text)
 
	return batch_x, batch_y
 
 
X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32) # dropout
 
# 定义CNN
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
	x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
 
	#w_c1_alpha = np.sqrt(2.0/(IMAGE_HEIGHT*IMAGE_WIDTH)) #
	#w_c2_alpha = np.sqrt(2.0/(3*3*32)) 
	#w_c3_alpha = np.sqrt(2.0/(3*3*64)) 
	#w_d1_alpha = np.sqrt(2.0/(8*32*64))
	#out_alpha = np.sqrt(2.0/1024)
 
	# 3 conv layer
	print "xxxx"
	print X.get_shape()   # 图片大小 64*128 = 8192
	print x.get_shape()   #64 * 64 * 128 * 1
	
	#layer_1
	w_c1 = tf.Variable(w_alpha*tf.random_normal([3, 3, 1, 32]))
	b_c1 = tf.Variable(b_alpha*tf.random_normal([32]))
	
	#tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None),参数属性
	#x[64,64,128,1]  w_c1[3,3,1,32]
	# 卷积后:32个feature,每个shape还是(?,64,128,32)
	conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
	
	print conv1.get_shape() 
	#max_pool之后,就变成了,(?, 32, 64, 32)
	#max_pool不同于卷积,它能保证位置不变性,降低计算参数
	conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
	print conv1.get_shape() 
	
	conv1 = tf.nn.dropout(conv1, keep_prob)
	print conv1.get_shape() 

	#layer_2
	w_c2 = tf.Variable(w_alpha*tf.random_normal([3, 3, 32, 64]))
	b_c2 = tf.Variable(b_alpha*tf.random_normal([64]))
	conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
	#做完conv2d,(?, 32, 64, 64),
	conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
	#做完pool之后(?,16, 32, 64)
	conv2 = tf.nn.dropout(conv2, keep_prob)
    
	#layer_3
	w_c3 = tf.Variable(w_alpha*tf.random_normal([3, 3, 64, 64]))
	b_c3 = tf.Variable(b_alpha*tf.random_normal([64]))
	conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
	#conv2d 后的数据,(?, 16, 32, 64)
	conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
	#pool后的维度 (?, 8, 16, 64)
	conv3 = tf.nn.dropout(conv3, keep_prob)
	print b_c3.get_shape() 
	
	# Fully connected layer 64*128,全连接层
	w_d = tf.Variable(w_alpha*tf.random_normal([8192, 1024]))
	b_d = tf.Variable(b_alpha*tf.random_normal([1024]))

	#reshap(-1,8192),8*16*64 = 8192
	dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
	#dense的shape(?, 8192),w_b(8192,1024),输出为1024,
	
	print dense.get_shape()
	dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
	
	#dense = (?,8192)*(8192,0124)+(1024)
	#dense = (?,1024)
	dense = tf.nn.dropout(dense, keep_prob)
	
	#全连接层的输出是1024维度的
    #全连接层后接着一层输出层:1024*(4*26)
	w_out = tf.Variable(w_alpha*tf.random_normal([1024, MAX_CAPTCHA*CHAR_SET_LEN]))
	b_out = tf.Variable(b_alpha*tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))
	#dense = (?, 1024)
	#w_out = (1024,104)
	#b_out = (104)
	out = tf.add(tf.matmul(dense, w_out), b_out)
	#out的shape为(?,104)
	
	#out = tf.nn.softmax(out)
	return out
	
def eval_once():
    sess = tf.Session()
    ckpt = tf.train.get_checkpoint_state("checkpoint")
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
    else:
        print('No checkpoint file found')
        sess.close()
        return

    coord = tf.train.Coordinator()
    try:
        print "aaaa"
    finally:
        sess.close() 
# 训练#####
def train_crack_captcha_cnn():
	output = crack_captcha_cnn()
	
	# loss
	#之前tensorflow没升级的时候,用的是targets,当升级到1.3时,用的是labels
	loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y))
    # 最后一层用来分类的softmax和sigmoid有什么不同?
	
	# 优化器,optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
	optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
 
	predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
	
    #函数会返回tensor中参数指定的维度中的最大值的索引
    #最后输出的是一个,4*26维度的向量,所以最大的索引值为4个,和正确的结果进行对比
	max_idx_p = tf.argmax(predict, 2)
	max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
	
    #通过tf.equal方法可以比较预测结果与实际结果是否相等:
	correct_pred = tf.equal(max_idx_p, max_idx_l)
	
    #这行代码返回一个布尔列表。为得到哪些预测是正确的,我们可用如下代码将布尔值转换成浮点数:
	accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
	
	global train_data
	#Tensorflow针对这一需求提供了Saver类,保存模型,恢复模型变量。
	saver = tf.train.Saver()


	sess = tf.Session()
	sess.run(tf.global_variables_initializer())
	step = 0
	
	#sess.close()
	#return 
	for kk in range(42):  #epochs = 50
		chee = True
		num = 0
		random.shuffle(train_data)
		while True:
			if num + 64 > max_train:
				break
			batch_x, batch_y = get_next_batch(train_data,num,64)
			logger.info("start to get loss:epochs:%d"%(kk))
			_, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
			logger.info("train of the step:%d loss  %f"%(step,loss_))
			num = num + 64
			# 每100 step计算一次准确率
			if chee:
				chee = False
				random.shuffle(test_data)
				batch_x_test, batch_y_test = get_next_batch(test_data,10,64)
                #feed_dict参数来计算准确率
				acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
				logger.info("the true of the test:%d loss  %f"%(step,acc))
				# 如果准确率大于50%,保存模型,完成训练
				if acc > 0.6 or kk == 41:
					print "out-out-out"
					saver.save(sess, "crack_capcha.model", global_step=step)
					return 
			step += 1
 
train_crack_captcha_cnn()
‘’‘

使用Saver.save()方法保存模型:
sess:表示当前会话,当前会话记录了当前的变量值
checkpoint_dir + 'model.ckpt':表示存储的文件名
global_step:表示当前是第几步
’‘’


  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
验证码识别是一个常见的图像识别问题,卷积神经网络(CNN)是其中一种常用的解决方案。下面是一个使用 TensorFlow 2 实现的简单 CNN 模型来识别验证码的例子: 首先,导入必要的库: ```python import os import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout ``` 接着,定义一些超参数: ```python IMG_WIDTH = 60 IMG_HEIGHT = 20 BATCH_SIZE = 32 EPOCHS = 10 NUM_CLASSES = 4 ``` 其中,`IMG_WIDTH` 和 `IMG_HEIGHT` 分别代表输入图像的宽度和高度,`BATCH_SIZE` 是每次训练时使用的样本数量,`EPOCHS` 是训练迭代次数,`NUM_CLASSES` 是验证码字符集的大小。 然后,准备数据集。假设我们有一个包含 1000 张验证码图片的数据集,每张图片都是 60x20 的灰度图像,保存在 `data` 文件夹下,文件名为 `captcha_{i}.png`(`i` 从 1 到 1000)。我们需要将数据集分成训练集和测试集,并使用 `ImageDataGenerator` 类来对图像进行预处理: ```python train_datagen = ImageDataGenerator( rescale=1./255, shear_range=0.2, zoom_range=0.2, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, validation_split=0.2) train_generator = train_datagen.flow_from_directory( 'data', target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, color_mode='grayscale', class_mode='categorical', subset='training') test_generator = train_datagen.flow_from_directory( 'data', target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, color_mode='grayscale', class_mode='categorical', subset='validation') ``` 其中,`train_datagen` 定义了一系列图像增强操作,包括缩放、剪切、旋转、平移等。`train_generator` 和 `test_generator` 分别是训练集和测试集的生成器。 接下来,构建 CNN 模型: ```python model = Sequential([ Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1)), MaxPooling2D((2, 2)), Conv2D(64, (3, 3), activation='relu'), MaxPooling2D((2, 2)), Conv2D(128, (3, 3), activation='relu'), MaxPooling2D((2, 2)), Flatten(), Dropout(0.5), Dense(512, activation='relu'), Dropout(0.5), Dense(NUM_CLASSES, activation='softmax') ]) ``` 该模型包含 3 个卷积层、3 个池化层和 2 个全连接层,其中每个卷积层后面都跟着一个最大池化层。最后一层是一个大小为 `NUM_CLASSES` 的 softmax 层,用于分类。 最后,编译模型并开始训练: ```python model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(train_generator, epochs=EPOCHS, validation_data=test_generator) ``` 训练完成后,我们可以使用模型来预测新的验证码图片: ```python def predict_captcha(filename): img = tf.keras.preprocessing.image.load_img(filename, color_mode='grayscale', target_size=(IMG_HEIGHT, IMG_WIDTH)) img = tf.keras.preprocessing.image.img_to_array(img) img = np.expand_dims(img, axis=0) img /= 255. prediction = model.predict(img) prediction = np.argmax(prediction, axis=1) return prediction[0] ``` 该函数接受一个验证码图片的文件名,返回模型预测出的验证码字符的标签。 以上就是使用 TensorFlow 2 实现验证码识别的一个简单例子。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值