卷积神经网络验证码版本

最新推荐文章于 2024-07-12 16:16:27 发布
小杨变老杨
最新推荐文章于 2024-07-12 16:16:27 发布
阅读量91
点赞数
文章标签： cnn python tensorflow
本文链接：https://blog.csdn.net/m0_67084346/article/details/128066808
版权
import tensorflow as tf
import random
import os
import numpy as np
from PIL import Image  #PIL(Python Image Library)是python的第三方图像处理库

tf.set_random_seed(777) #设置随机种子

# 获取数据集
train_num = 1000 #训练集数量
test_num = 100 #测试集数量

IMG_HEIGHT = 60 #图片的高度
IMG_WIDTH = 160 #图片的宽度
char_num = 4  # 验证码字符个数
characters = range(10)  #数字0-9
labellen = char_num * len(characters)  # 4位验证码的独热编码长度=4*10=40位
def label2vec(label):
    """
    将验证码标签转为一维40位的向量(40,)即独热编码。
    :param label: 1327
        [0,1,0,0,0,0,0,0,0,0,
        0,0,0,1,0,0,0,0,0,0,
        0,0,1,0,0,0,0,0,0,0,
        0,0,0,0,0,0,0,1,0,0]    """
    label_vec = np.zeros(char_num * len(characters))  #初始化40位的零矩阵
    for i, num in enumerate(label):
        idx = i * len(characters) + int(num)
        label_vec[idx] = 1
    return label_vec

def convert2gray(img):  #彩色转成灰度
    if len(img.shape)>2:
        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
        return gray
    else:
        return img

def get_all_files(file_path, num): # 获取图片路径及其标签 file_path: a sting, 图片所在目录
    image_list = []   # 图片的数据x
    label_list = []   # 图片标签数据y(独热)
    i = 0
    for item in os.listdir(file_path): # item,文件名,形如0982.jpg, listdir(file_path):将file_path目录下所有的文件列出来
        item_path = file_path + '\\' + item  # 图片的完整路径名：如 vcode_data\train\0982.jpg
        image = Image.open(item_path)  # 读取图片
        image = convert2gray(np.array(image))   # 转换成灰度图
        image_array = np.array(image) / 255.0   # 缩放归一化
        image_list.append(image_array.reshape(IMG_HEIGHT,IMG_WIDTH,1))   #图片reshape(60,160,1)
        label = os.path.splitext(os.path.split(item)[1])[0];print(label)#os.path.split(0982.jpg)=('', '0982.jpg'),os.path.splitext(('', '0982.jpg')[1])[0]=0982表示
        label_list.append(label2vec(label))
        i += 1
        if i >= num:
            break
    return np.array(image_list), np.array(label_list)
image_dir = r'vcode_data\train'
test_dir = r'vcode_data\test'
imgArr, Y_one_hot = get_all_files(image_dir, train_num)
imgArrTest, Y_test = get_all_files(test_dir, test_num)
print(imgArr.shape, len(Y_one_hot))

g_b=0
# 自己实现next_batch函数，每次返回一批数据
def next_batch(size):
    global g_b
    xb = imgArr[g_b:g_b+size]
    yb = Y_one_hot[g_b:g_b+size]
    g_b = g_b + size
    return xb,yb

# 参数
learning_rate = 0.001 # 学习率
training_epochs = 100  # 训练总周期
batch_size = 100 # 训练每批样本数

#定义占位符
X = tf.placeholder(tf.float32, [None, IMG_HEIGHT, IMG_WIDTH, 1])
Y = tf.placeholder(tf.float32, [None, labellen])  # 多个数字的独热编码组合

with tf.variable_scope('conv1'): # 第1层卷积，输入图片数据(?, IMG_HEIGHT, IMG_WIDTH, 1)
    W1 = tf.Variable(tf.random_normal([3, 3, 1, 32]))  #卷积核3x3，输入通道1，输出通道32
    L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
    L1 = tf.nn.relu(L1)
    L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')

with tf.variable_scope('conv2'): # 第2层卷积
    W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01)) #卷积核3x3，输入通道32，输出通道64
    L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
    L2 = tf.nn.relu(L2)
    L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('fc1'): #全连接1
    dim = L2.get_shape()[1].value * L2.get_shape()[2].value * L2.get_shape()[3].value
    L2_flat = tf.reshape(L2, [-1, dim])
    W4 = tf.get_variable(name="W4", shape=[dim, 1024], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
    b4 = tf.Variable(tf.random_normal([1024]))
    L4 = tf.nn.relu(tf.matmul(L2_flat, W4) + b4, name="fc1")
with tf.variable_scope('softmax'): #softmax
    W5 = tf.get_variable(name="W5", shape=[1024, labellen], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
    b5 = tf.Variable(tf.random_normal([labellen]))
    logits = tf.add(tf.matmul(L4, W5), b5)

# 代价函数和优化器
# sigmoid_cross适用于每个类别相互独立但不互斥，如图中可以有字母和数字
# softmax_cross适用于每个类别独立且排斥的情况，如数字和字母不可以同时出现
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 优化器

# 测试模型计算准确率
predict = tf.reshape(logits, [-1, 4, 10])
max_idx_p = tf.argmax(predict, 2)    # 预测结果
max_idx_y = tf.argmax(tf.reshape(Y, [-1, 4, 10]), 2)  # 真实结果
correct_pred = tf.equal(max_idx_p, max_idx_y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# 创建会话
sess = tf.Session()
sess.run(tf.global_variables_initializer()) #全局变量初始化
# 迭代训练
print('开始学习...')
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(train_num / batch_size)  # 批次
    g_b = 0  #注意这里,否则cost=nan
    for i in range(total_batch):
        batch_xs, batch_ys = next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
    if epoch % 10 == 0:
        print('Epoch:', (epoch + 1), 'cost =', avg_cost, 'acc=',sess.run(accuracy,feed_dict={X: imgArrTest, Y: Y_test}))
print('学习完成')

# 测试模型检查准确率
print('正确率:', sess.run(accuracy, feed_dict={X: imgArrTest, Y: Y_test}))

# 在测试集中随机抽一个样本进行测试
r = int(random.randint(0, test_num - 1))
print("标签: ", sess.run(tf.argmax(Y_test[r:r + 1].reshape(4, 10), 1)))
pre = sess.run(tf.argmax(tf.reshape(logits, [4, 10]), 1), feed_dict={X: imgArrTest[r:r + 1]})
print("预测: ", pre)