深度学习完全攻略！（连载七：先从基本的模型开始-端到端的验证码识别）

最新推荐文章于 2023-07-22 09:07:47 发布

Aoulun

最新推荐文章于 2023-07-22 09:07:47 发布

阅读量517

点赞数 1

分类专栏：深度学习

本文链接：https://blog.csdn.net/Aoulun/article/details/92846730

版权

深度学习专栏收录该内容

45 篇文章 5 订阅

订阅专栏

直接写FCN跳跃性有点大，那么这一节就用一个简单的例子来回顾一下卷积神经网络。下一节写一写RCNN，再下一节写一些yolo或SSD，再下一节就写FCN，这样保持连贯性。

先看本节内容。

我们把验证码的识别任务转换为一个分类任务，以便于神经网络干他最擅长的事情。这就是端到端的识别，而非传统的先对字符分割，分别训练，最后识别。所以本节任务就很简单，输入一堆验证码的图片，告诉标签，不用其他的操作，直接训练。然后对于模型，也是输入一个图，给个结果。这种方法的效果应该有限。为什么呢，比如说，由26个大写英文字符和1-9数字构成的验证码，就有34^4中情况，训练样本如果完全覆盖，不考虑各种变形的情况下，训练数据量就已经很大了。

1. 开始之前

你需要安装pip install captcha

2. 正式开始

这一部分参考了MNIST_data 手写字识别程序，以及其他的程序。

2.1 生成训练集和测试集

在使用的时候，训练集很大，我生成了120万张，而对于大写字母和1-9数字构成的验证码，总数超过150万张。

测试集合可以随机的生成1000张或其他。

MY_generate_image.py


from captcha.image import ImageCaptcha
from random import sample
import os
from PIL import Image
import numpy as np
import string

image = ImageCaptcha() 
characters =  list("ABCDEFGHIJKLMNOPQRSTUVWXYZ123456789")
svPath = '.\\train_image'  # test_image

def generate_data(digits_num, output, total):
    num = 0
    while(num<total):
        cur_cap = sample(characters, digits_num)
        cur_cap =''.join(cur_cap)
        print(cur_cap)
        _ = image.generate(cur_cap)
        image.write(cur_cap, output + '\\' + cur_cap + ".jpg")
        num += 1


'''
10000000000000000000000000000000000-->A
01000000000000000000000000000000000-->B
......
00000000000000000000000000000000010-->8
00000000000000000000000000000000001-->9
所以：
AB9C可以表示为一个向量：
10000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000100100000000000000000000000000000000
A                                  B                                  9                                  C
'''

# generate lable
def generate_lable(batch_size, imgs):
    num=batch_size
    Y = np.zeros([num,4,35])
    for i in range(num):
        Y[i,0,''.join(characters).find(imgs[i][0])] = 1
        Y[i,1,''.join(characters).find(imgs[i][1])] = 1
        Y[i,2,''.join(characters).find(imgs[i][2])] = 1
        Y[i,3,''.join(characters).find(imgs[i][3])] = 1  #img[x][y]代表第x张图片的第y位字母
    Y = np.reshape(Y,(num,4*35))
    return Y
    
generate_data(4, svPath, 1300000)  #产生四个字符长度的验证码，共120万张,包含一大半的验证码，我也不清楚训练的时候到底需要多少
imgs = os.listdir(svPath)
leng = len(imgs)
labels = generate_lable(leng, imgs)
np.savetxt(".\\new\\label.txt", labels, fmt='%d')

2.2 构建模型

模型包括基本的前向网络和反向传播网络（求解损失函数），以及测试代码

MY_train_model.py

#总共 5 层网络，前 3 层为卷积层，第 4、5 层为全连接层。对 4 层隐藏层都进行 dropout。网络结构如下所示： input——>conv——>pool——>dropout——>conv——>pool——>dropout——>conv——>pool——>dropout——>fully connected layer——>dropout——>fully connected layer——>output


# -*- coding: utf-8 -*
import tensorflow as tf
import math
import os
import numpy as np
from PIL import Image, ImageFilter
import string
import sys
import random

characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ123456789"
img_w = 160
img_h = 60
img_char_num = 4
char_tmp_len = len(characters)
batch_size = 64

svPath = './train_image'
imgs = os.listdir(svPath)
random.shuffle(imgs)
length = len(imgs)
batchNum = length / batch_size
learn_alpha = 1e-4

def get_x_y(batch_step, batch_size):
    # read image and generate lable
    X = np.zeros([batch_size, img_h, img_w, 1])
    img = np.zeros((img_h, img_w, 1),dtype=np.uint8)
    Y = np.zeros([batch_size, img_char_num, char_tmp_len])
    for j in range(1, batch_size):
        img = Image.open(svPath + '\\' + imgs[batch_step * batch_size + j]).convert('L')
        I_array = np.array(img)
        X[j] = np.reshape(I_array, [img_h, img_w, 1])/255.0
        Y[j,0,characters.find(imgs[batch_step * batch_size + j][0])] = 1
        Y[j,1,characters.find(imgs[batch_step * batch_size + j][1])] = 1
        Y[j,2,characters.find(imgs[batch_step * batch_size + j][2])] = 1
        Y[j,3,characters.find(imgs[batch_step * batch_size + j][3])] = 1
    Y = np.reshape(Y, (batch_size, img_char_num * char_tmp_len))
    return X, Y


def weight_kernel(k_shape, regular):
    wkernel = tf.Variable(tf.truncated_normal(k_shape, stddev=0.1))
    #if regular != None:
    #    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regular)(wkernel))
    return wkernel

def bias(b_shape):
    bkernel = tf.Variable(tf.random_normal(b_shape))
    return bkernel

def conv2(X, wkernel):
    conv2D_R = tf.nn.conv2d(X,wkernel,strides=[1, 1, 1, 1], padding='SAME')
    return conv2D_R

def max_pool(conv2D_R):
    max_R = tf.nn.max_pool(conv2D_R, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    return max_R

def forward_model(X, keep_prob, regular):
    #conv + max pool + drop out
    kshape1 = [5, 5, 1, 32]
    wkernel1 = weight_kernel(kshape1, regular)
    b_shape1 = [32]
    bkernel1 = bias(b_shape1)
    conv2D_R1 = conv2(X, wkernel1)
    R_Add1 = tf.nn.bias_add(conv2D_R1, bkernel1)
    act_R1 = tf.nn.relu(R_Add1)
    max_R1 = max_pool(act_R1)
    drop_R1 = tf.nn.dropout(max_R1,keep_prob)
    conv_w = math.ceil(img_w / 2)
    conv_h = math.ceil(img_h / 2)
    
    #conv + max pool + drop out
    kshape2 = [5, 5, 32, 64]
    wkernel2 = weight_kernel(kshape2, regular)
    b_shape2 = [64]
    bkernel2 = bias(b_shape2)
    conv2D_R2 = conv2(drop_R1, wkernel2)
    R_Add2 = tf.nn.bias_add(conv2D_R2, bkernel2)
    act_R2 = tf.nn.relu(R_Add2)
    max_R2 = max_pool(act_R2)
    drop_R2 = tf.nn.dropout(max_R2,keep_prob)
    conv_w = math.ceil(conv_w / 2)
    conv_h = math.ceil(conv_h / 2)
    
    #conv + max pool + drop out
    kshape3 = [5, 5, 64, 64]
    wkernel3 = weight_kernel(kshape3, regular)
    b_shape3 = [64]
    bkernel3 = bias(b_shape3)
    conv2D_R3 = conv2(drop_R2, wkernel3)
    R_Add3 = tf.nn.bias_add(conv2D_R3, bkernel3)
    act_R3 = tf.nn.relu(R_Add3)
    max_R3 = max_pool(act_R3)
    drop_R3 = tf.nn.dropout(max_R3,keep_prob)
    conv_w = math.ceil(conv_w / 2)
    conv_h = math.ceil(conv_h / 2)
    
    #full connect + drop out
    kshape4 = [64 * conv_w * conv_h, 1024]
    wkernel4 = weight_kernel(kshape4, regular)
    b_shape4 = [1024]
    bkernel4 = bias(b_shape4)
    dropout4_flat = tf.reshape(drop_R3,[-1, 64 * conv_w * conv_h])
    mul_R4 = tf.matmul(dropout4_flat, wkernel4)
    R_Add4 = tf.nn.bias_add(mul_R4, bkernel4)
    R_fc1 = tf.nn.relu(R_Add4)
    R_fc1_drop = tf.nn.dropout(R_fc1, keep_prob)
    
    #full connect
    kshape5 = [1024, img_char_num * char_tmp_len]
    wkernel5 = weight_kernel(kshape5, regular)
    b_shape5 = [img_char_num * char_tmp_len]
    bkernel5 = bias(b_shape5)
    mul_R5 = tf.matmul(R_fc1_drop, wkernel5)
    R_Add5 = tf.add(mul_R5, bkernel5)
    
    return R_Add5

def backward_model():
    x = tf.placeholder(tf.float32, [None, img_h, img_w, 1])
    y_ = tf.placeholder(tf.float32, [None, img_char_num * char_tmp_len])
    keep_prob = tf.placeholder(tf.float32)
    y = forward_model(x, keep_prob, 0.)

    sigExp = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y)
    cross_entropy = tf.reduce_mean(sigExp)
    loss_ = cross_entropy# + tf.add_n(tf.get_collection('losses'))
    train_step = tf.train.AdamOptimizer(learn_alpha).minimize(loss_)

    predict = tf.reshape(y, [-1,img_char_num, char_tmp_len])
    real = tf.reshape(y_,[-1,img_char_num, char_tmp_len])
    correct_prediction = tf.equal(tf.argmax(predict,2), tf.argmax(real,2))
    correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        step = 1
        while True:
            batch_x,batch_y = get_x_y(step, batch_size)
            _,loss = sess.run([train_step,loss_],feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.7})
            print ('step:%d,loss:%f' % (step,loss))
            if (step + 1) > batchNum:   # still use these train examples
                step = 1
            if step % 100 == 0:
                step += 1
                batch_x_test,batch_y_test = get_x_y(step, batch_size*2)
                acc = sess.run(accuracy, feed_dict={x: batch_x_test, y_: batch_y_test, keep_prob: 1.})
                print ('----------------------->step:%d, accuracy:%f' % (step,acc))
                if acc > 0.99:
                    saver.save(sess,"./train_model/train_model.ckpt")
                    break
            step += 1
            
            
test_images_path = "./test_images"

test_imgs = os.listdir(test_images_path)
length = len(test_imgs)
def test():
    test_images, test_label = get_x_y(1, length)
    x = tf.placeholder(tf.float32, [None, img_h, img_w, 1])
    keep_prob = tf.placeholder(tf.float32)

    y = forward_model(x, keep_prob, 0)
    predict = tf.argmax(tf.reshape(y, [-1,img_char_num, char_tmp_len]))
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "./train_model/train_model.ckpt")
        pre_list =  sess.run(predict, feed_dict={x: batch_x_test, keep_prob: 1.})
        
        for i in test_label:
            print(i)
        
        for j in pre_list:
            print(j)

2.3 调用函数

直接导入调用

MY_train.py

#-*- coding:utf-8 -*-
import MY_train_model


if __name__ == '__main__':
    MY_train_model.backward_model()

MY_main.py

#-*- coding:utf-8 -*-


import MY_train_model


if __name__ == '__main__':
    MY_train_model.test()

2.4 在anaconda prompt 中

激活tensorflow

activate tensorflow-gpu

运行脚本

python MY_train.py

下一节，我们写一写RCNN。

本文已同步更新到公众号，欢迎订阅

Aoulun

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
深度学习完全攻略！（连载七：先从基本的模型开始-端到端的验证码识别）

直接写FCN跳跃性有点大，那么这一节就用一个简单的例子来回顾一下卷积神经网络。下一节写一写RCNN，再下一节写一些yolo或SSD，再下一节就写FCN，这样保持连贯性。先看本节内容。我们把验证码的识别任务转换为一个分类任务，以便于神经网络干他最擅长的事情。这就是端到端的识别，而非传统的先对字符分割，分别训练，最后识别。所以本节任务就很简单，输入一堆验证码的图片，告诉标签，不用其他的操作，直...
复制链接

扫一扫

专栏目录