记一次 基于 卷积神经网络(CNN)的 验证码图片识别

前几天搭建好了tensorflow2的环境,今天来试验一下神奇的机器学习。
先简单编写一个java程序,收集了10000多个验证码图片,全部进行人工标注(训练素材点击下载),其中600多个用来检验预测结果。

先声明一下,验证码是从真实网站上爬取的,如下图,验证码中包含大写字母和数字,文字有旋转,背景和前景色是随机变化的,有斑点和曲线以及短划线干扰,字体看着有点像华文仿宋,大家可以根据是否对自己有用选择下载。
在这里插入图片描述
所有图片都已经经过人工标注,如以上验证码图片被标记为 2s6h-1586808953525-23a8680c-a80a-4deb-b1ca-a959c73a6c2f.jpg

使用以下代码进行训练,开始运行时要加载所有图片,时间有点长,大概1、2分钟,要耐心等待

# coding:utf-8

import numpy as np
import tensorflow as tf

#模型文件保存的路径
SAVE_PATH = "E:/tensorflow/vcode-analyzer/"

#验证码长宽
IMAGE_WIDTH = 130
IMAGE_HEIGHT = 40

#验证码可能出现的字符
number = ['2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y']

CHAR_SET = number + ALPHABET
CHAR_SET_LEN = len(CHAR_SET)

#验证码长度
MAX_CAPTCHA = 4

#训练批次的大小
BATCH_SIZE = 128
EPOCH = 4

#每次训练是否将错误记录打印出来
TRAIN_DEBUG = 0

def text2vec(text):
    vector = np.zeros([MAX_CAPTCHA, CHAR_SET_LEN])

    try:
        for i, c in enumerate(text):
            idx = CHAR_SET.index(str(c, encoding = "utf-8").upper())
            vector[i][idx] = 1.0
    except:
        print("exception: " + str(text[0], encoding = "utf-8").upper()+ str(text[1], encoding = "utf-8").upper()+ str(text[2], encoding = "utf-8").upper()+ str(text[3], encoding = "utf-8").upper())
    return vector


def vec2text(vec):
    text = []
    for i, c in enumerate(vec):
        text.append(CHAR_SET[c])
    return "".join(text)


current_index = 0

train_images = []
train_texts = []

def load_all_train_data(image_path):
    global train_images
    global train_texts
    PATH = image_path
    train_files = tf.data.Dataset.list_files(PATH+'*.jpg')

    def load_image(image_file, is_train):
        image = tf.io.read_file(image_file)
        image = tf.image.decode_jpeg(image)
        image = tf.image.rgb_to_grayscale(image)
        return image

    train_iter = iter(train_files)
    for x in train_iter:
        paths = tf.strings.split(x, '\\').numpy()
        text = tf.strings.substr(paths[len(paths)-1], 0, 4, unit='BYTE', name=None)
        train_texts.append(text2vec(tf.strings.bytes_split(text).numpy()))
        train_images.append(load_image(x, True))
    return 


def get_next_batch(batch_size=128):
    global train_images
    global current_index

    batch_x = np.zeros([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
    batch_y = np.zeros([batch_size, MAX_CAPTCHA, CHAR_SET_LEN])

    for i in range(batch_size):
        if current_index >= len(train_images):
            current_index = current_index - len(train_images)
            if current_index < 0:
                current_index = 0
        batch_x[i, :] = train_images[current_index]
        batch_y[i, :] = train_texts[current_index]
        current_index = current_index + 1

    return batch_x, batch_y


def crack_captcha_cnn():
    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Conv2D(32, (2, 2)))
    model.add(tf.keras.layers.PReLU())
    model.add(tf.keras.layers.AveragePooling2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(64, (3, 3)))
    model.add(tf.keras.layers.PReLU())
    model.add(tf.keras.layers.MaxPool2D((2, 2)))

    model.add(tf.keras.layers.Conv2D(128, (5, 5)))
    model.add(tf.keras.layers.PReLU())
    model.add(tf.keras.layers.MaxPool2D((2, 2)))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(MAX_CAPTCHA * CHAR_SET_LEN))
    model.add(tf.keras.layers.Reshape([MAX_CAPTCHA, CHAR_SET_LEN]))
    model.add(tf.keras.layers.Softmax())

    return model


def train(pat):

    load_all_train_data(pat)

    try:
        model = tf.keras.models.load_model(SAVE_PATH + 'model')
    except Exception as e:
        print('#######Exception', e)
        model = crack_captcha_cnn()

    model.compile(optimizer='Adam',
                  metrics=['accuracy'],
                  loss='categorical_crossentropy')

    for times in range(500000):
        batch_x, batch_y = get_next_batch(BATCH_SIZE)
        print('times=', times, ' batch_x.shape=',
              batch_x.shape, ' batch_y.shape=', batch_y.shape)
        model.fit(batch_x, batch_y, epochs=EPOCH)
        
        if TRAIN_DEBUG == 1:
            v1 = np.argmax(model.predict(batch_x), axis=2)
            v2 = np.argmax(batch_y, axis=2)

            vlen = len(v1)
            for i in range(vlen):
                if (v1[i] == v2[i]).all():
                    pass
                else:
                    print("TRAIN_DEBUG: 实际 " + vec2text(v2[i]) + ", 预测 " + vec2text(v1[i]))

        if 0 == times % 50:
            print("save model at times=", times)
            model.save(SAVE_PATH + 'model')


def predict(pat):
    load_all_train_data(pat)

    model = tf.keras.models.load_model(SAVE_PATH + 'model')
    success = 0
    count = 1000
    for _ in range(count):
        data_x, data_y = get_next_batch(1)
        prediction_value = model.predict(data_x)
        data_y = vec2text(np.argmax(data_y, axis=2)[0])
        prediction_value = vec2text(np.argmax(prediction_value, axis=2)[0])

        if data_y.upper() == prediction_value.upper():
            print("y预测=", prediction_value, "y实际=", data_y, "预测成功。")
            success += 1
        else:
            print("y预测=", prediction_value, "y实际=", data_y, "预测失败。")

    print("预测", count, "次", "成功率 =", success / count)

    pass


if __name__ == "__main__":
    #传入训练、预测图片集的路径
    #所有图片命名格式约定为 222s-xxxxxxxxxxxx.jpg
    #222s为正确验证码,xxx为任意字符
    train("D:\\projects\\vcode\\imgs-train\\")
    #train("D:\\projects\\vcode\\imgs-coded\\")
    predict("D:\\projects\\vcode\\imgs-coded\\")

收敛的很快,大概几百次循环loss就降到了0.1以下,accuracy持续稳定在0.9以上。

save model at times= 300
times= 301  batch_x.shape= (128, 40, 130, 1)  batch_y.shape= (128, 4, 31)
Train on 128 samples
Epoch 1/4
128/128 [==============================] - 0s 656us/sample - loss: 0.1081 - accuracy: 0.9805
Epoch 2/4
128/128 [==============================] - 0s 593us/sample - loss: 0.0164 - accuracy: 0.9941
Epoch 3/4
128/128 [==============================] - 0s 617us/sample - loss: 0.0134 - accuracy: 0.9961
Epoch 4/4
128/128 [==============================] - 0s 570us/sample - loss: 0.0012 - accuracy: 1.0000
times= 302  batch_x.shape= (128, 40, 130, 1)  batch_y.shape= (128, 4, 31)
Train on 128 samples

350次训练后,切换到测试集进行预测

y预测= AX74 y实际= AX74 预测成功。
y预测= UWCB y实际= UWCB 预测成功。
y预测= 6YKS y实际= 6YKS 预测成功。
y预测= DLUT y实际= DLUT 预测成功。
y预测= RK86 y实际= RK86 预测成功。
y预测= YMFV y实际= YMFV 预测成功。
y预测= 9F3K y实际= 9E3K 预测失败。
y预测= JY5D y实际= JY5D 预测成功。
y预测= WT93 y实际= HT93 预测失败。
y预测= G9NG y实际= G9NG 预测成功。
预测 1000 次 成功率 = 0.76

76%的正确率,还不错!要知道,除去加载图片的时间,实际训练的时间还不到1分钟……

可以查看后一篇:发布验证码图片识别服务

  • 1
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值