残差网络实现验证码识别

前言:

数据及标签介绍
在这里插入图片描述

在这里插入图片描述
图片大致张上面这样,分辨率50*200,6位,每位有36种可能性。

这里共1.4w张,二八分割,2900张左右测试集,训练集1.1w张左右。

一、训练
在这里插入图片描述

这里使用的是精简版的ResNet50,有点ResNet18和ResNet杂交版的感觉,图有点长,直接上代码,哈哈:

import numpy as np

import tensorflow as tf
import h5py

LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
          'v', 'w', 'x', 'y', 'z',
          '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


def identity_block(X, f, filters, stage, block):
    """
    跳转前后激活矩阵的维度相同的残差块
    :param X: 要跳跃的激活值矩阵
    :param f: 整型,卷积层的窗口大小
    :param filters: 整型数组,表示残差块中卷积层的过滤器个数
    :param stage:整型。辅助给网络层取名
    :param block: 字符串,辅助给网络层取名
    :return:X:残差块的最终输出矩阵
    """
    conv_name_base = f"res{stage}{block}_branch"
    bn_name_base = f"bn{stage}{block}_branch"

    # 获取每个卷积层对应的过滤器个数
    F1, F2, F3 = filters
    # 定义参数初始化方法
    initializer = tf.keras.initializers.GlorotUniform
    # 保存输入的激活值,以便后面跳插入到后面的网络中
    X_shortcut = X

    # 这里我们使用functional API方式来构建模型

    # 主路中的第一组网络层,就是图4的第一组绿橙黄小方块
    X = tf.keras.layers.Conv2D(filters=F1,
                               kernel_size=(1, 1),
                               strides=(1, 1),
                               padding='valid',
                               name=f"{conv_name_base}2a",
                               kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2a")(X)
    X = tf.keras.layers.Activation('relu')(X)
    # 主路中的第二组网络层
    X = tf.keras.layers.Conv2D(filters=F2,
                               kernel_size=(f, f),
                               strides=(1, 1),
                               padding='same',
                               name=f"{conv_name_base}2b",
                               kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2b")(X)
    X = tf.keras.layers.Activation('relu')(X)

    # 主路中的第三组网络层
    X = tf.keras.layers.Conv2D(filters=F3,
                               kernel_size=(1, 1),
                               padding='valid',
                               name=f"{conv_name_base}2c",
                               kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2c")(X)
    # 这一步就是实现小路的地方,其实就是简单的将签名的激活值X_shortcut与第三组的网络层的输出值合并在一起
    # 然后将合并的激活值向下传到激活函数中,进入到后面的神经网络中去
    X = tf.keras.layers.Add()([X, X_shortcut])
    X = tf.keras.layers.Activation('relu')(X)
    return X


def convolutional_block(X, f, filters, stage, block, s=2):
    conv_name_base = f'res{stage}{block}_branch'
    bn_name_base = f'bn{stage}{block}_branch'
    F1, F2, F3 = filters
    initializer = tf.keras.initializers.GlorotUniform
    X_shortcut = X
    X = tf.keras.layers.Conv2D(filters=F1, kernel_size=(1, 1), strides=(s, s), padding='valid',
                               name=f"{conv_name_base}2a", kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2a")(X)
    X = tf.keras.layers.Activation('relu')(X)

    X = tf.keras.layers.Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same',
                               name=f"{conv_name_base}2b", kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2b")(X)
    X = tf.keras.layers.Activation('relu')(X)

    X = tf.keras.layers.Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid',
                               name=f"{conv_name_base}2c", kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}2c")(X)
    # 对X_shortcut也做卷积,这样一来维度就一致了
    X_shortcut = tf.keras.layers.Conv2D(filters=F3, kernel_size=(1, 1), strides=(s, s), padding='valid',
                                        name=f"{conv_name_base}1", kernel_initializer=initializer(seed=0))(X_shortcut)
    X_shortcut = tf.keras.layers.BatchNormalization(axis=3, name=f"{bn_name_base}1")(X_shortcut)
    X = tf.keras.layers.Add()([X, X_shortcut])
    X = tf.keras.layers.Activation('relu')(X)
    return X


def ResNet(input_shape=(50, 200, 3), classes=36):
    """
    :param input_shape: 输入图像矩阵的维度
    :param classes: 类别数量
    :return: 网络模型
    """
    X_input = tf.keras.layers.Input(input_shape)
    initializer = tf.keras.initializers.GlorotUniform
    # 用零填充输入向量的周边
    X = tf.keras.layers.ZeroPadding2D((3, 3))(X_input)
    # Stage 1
    X = tf.keras.layers.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=initializer(seed=0))(X)
    X = tf.keras.layers.BatchNormalization(axis=3, name='bn_conv1')(X)
    X = tf.keras.layers.Activation('relu')(X)
    X = tf.keras.layers.MaxPool2D((3, 3), strides=(2, 2))(X)
    # Stage 2
    X = convolutional_block(X, f=3, filters=[64, 64, 128], stage=2, block='a', s=1)
    X = identity_block(X, 3, [64, 64, 128], stage=2, block='b')
    # Stage 3
    X = convolutional_block(X, f=3, filters=[64, 128, 256], stage=3, block='a', s=1)
    X = identity_block(X, 3, [64, 128, 256], stage=3, block='b')
    X = identity_block(X, 3, [64, 128, 256], stage=3, block='c')
    # Stage 4
    X = convolutional_block(X, f=3, filters=[32, 64, 128], stage=4, block='a', s=2)
    X = identity_block(X, 3, [32, 64, 128], stage=4, block='b')

    # 平均池化层
    X = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), padding='same')(X)

    # 扁平化
    X = tf.keras.layers.Flatten()(X)
    # 全连接、softmax激活
    outputs = [tf.keras.layers.Dense(classes, kernel_initializer=initializer(seed=0), activation='softmax',
                                     name=f"digit_{i + 1}")(X) for i in range(6)]
    model = tf.keras.Model(inputs=X_input, outputs=outputs, name='ResNet')
    # model.summary()
    # tf.keras.utils.plot_model(model, "model.png")
    return model


def read_h5_data(key):
    with h5py.File("dataset.h5", 'r') as f:
        return np.array(f[key])


def load_data_set(sample_path, refresh=False, train_ratio=0.8):
    train_set_x = read_h5_data('train_set_x')
    train_set_y = read_h5_data('train_set_y')
    verify_set_x = read_h5_data('verify_set_x')
    verify_set_y = read_h5_data('verify_set_y')
    return train_set_x, train_set_y, verify_set_x, verify_set_y


if __name__ == "__main__":
    X_train_orig, Y_train_orig, X_test_orig, Y_test_orig = load_data_set(
        sample_path="/tmp/samples", refresh=False)
    X_train = X_train_orig / 255
    X_test = X_test_orig / 255
    # Y的维度由(样本数, 6)变成(样本数, 6, 36 )
    Y_train = [tf.one_hot(Y_train_orig[:, i], len(LABELS)) for i in range(6)]
    Y_test = [tf.one_hot(Y_test_orig[:, i], len(LABELS)) for i in range(6)]
    model = ResNet((50, 200, 3), classes=36)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    # model = tf.keras.models.load_model('captcha_model_resnet.h5')
    model.fit(X_train, Y_train, epochs=20, batch_size=32)
    model.save("captcha_model_resnet.h5")
    preds = model.evaluate(X_test, Y_test)

在这里插入图片描述
大概在第17次epoch,达到一个比较优的状态。最终跑完20次epoch后,测试集准确率和损失如下:
在这里插入图片描述
可见6位都达到了99%

二、推理
这里采用6位合在一起做推理,评估一下准确率。

import os

import numpy as np
from PIL import Image
import tensorflow as tf

LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
          'v', 'w', 'x', 'y', 'z',
          '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


def predict(model, img_path):
    image = Image.open(img_path)
    X = np.array(image)
    X = X / 255.
    preds = model.predict(X.reshape(-1, 50, 200, 3), 36)
    predicted_digits = [np.argmax(pred, axis=1) for pred in preds]
    # 将预测的每位字符组合成完整的验证码
    digits = []
    digits_str = ''
    for digit in predicted_digits:
        digits.append(digit[0])
        digits_str += LABELS[digit[0]]
    print(digits)
    return digits_str


if __name__ == "__main__":
    path = '/tmp/sample_verify'
    model = tf.keras.models.load_model("captcha_model_resnet.h5")

    imgs = []
    for root, dirs, files in os.walk(path):
        for file in files:
            img_path = os.path.join(root, file)
            imgs.append(img_path)
    total_num = 0
    correct_num = 0
    error_num = 0
    for img in imgs:
        total_num += 1
        left = img[:str(img).rindex("_")]
        real_value = left[left.rindex("/") + 1:]
        predict_value = predict(model, img)
        if real_value == predict_value:
            correct_num += 1
        else:
            error_num += 1
            print(f"ERR_样本{total_num}, 真实值:{real_value}, 预测值:{predict_value}")
            print(img)
        print(f"样本{total_num}, 准确数:{correct_num}, 错误数:{error_num}, 准确率: {correct_num / total_num}")

在这里插入图片描述

综合准确率在97.8%左右。

三、预测

import numpy as np
from PIL import Image
import tensorflow as tf

LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
          'v', 'w', 'x', 'y', 'z',
          '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


def predict(model, img_path):
    image = Image.open(img_path)
    X = np.array(image)
    X = X / 255.
    preds = model.predict(X.reshape(-1, 50, 200, 3), 36)
    predicted_digits = [np.argmax(pred, axis=1) for pred in preds]
    # 将预测的每位字符组合成完整的验证码
    digits = []
    digits_str = ''
    for digit in predicted_digits:
        digits.append(digit[0])
        digits_str += LABELS[digit[0]]
    print(digits)
    return digits_str


if __name__ == "__main__":
    img_path = '/tmp/sample_verify/3m66b2_f1289f385acea49c5a41c7bab135a607.png'
    model = tf.keras.models.load_model("captcha_model_resnet.h5")

    print("result:", predict(model, img_path))

在这里插入图片描述

在这里插入图片描述
没毛病

四、总结
残差,利用激活值a[l+2] = g( z[l+2] + a[l] ),来跳跃传递激活值,从而来避免梯度消失问题,从而可以加深网络的深度。从而学习到更多特征,提升准确率。
在这里插入图片描述
而残差块又分为输入与输出同维度的、不同维度的两种:

在这里插入图片描述
下面是ResNet50的结构:
在这里插入图片描述
x3表示3个identity_block

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值