基于开源验证码生成库+结合 CNN 和 RNN 的算法对常见字符验证码进行识别

提示:本篇文章仅供学习,请勿用于非法用途!


前言

本篇文章涉及两部分内容。
1.基于开源验证码生成库生成训练集、验证集(训练前准备)
2.结合 CNN 和 RNN 算法代码
关于cnn算法的介绍
本代码来源


一、常见开源验证码生成库合集

语言验证码库名称链接样例
JavaJCaptcha示例在这里插入图片描述在这里插入图片描述在这里插入图片描述
JavaJCaptcha4Struts2示例在这里插入图片描述
JavaSimpleCaptcha示例在这里插入图片描述在这里插入图片描述在这里插入图片描述
Javakaptcha示例在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述
pythonkaptcha示例在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述
javapatchca示例在这里插入图片描述
javaimageRandom示例在这里插入图片描述
javaiCaptcha示例在这里插入图片描述
javaSkewPassImage示例在这里插入图片描述
javaCage示例在这里插入图片描述在这里插入图片描述
Pythoncaptcha示例在这里插入图片描述
Pythonpycapt示例在这里插入图片描述
PHPGregwar/Captcha示例在这里插入图片描述
PHPmewebstudio/captcha示例在这里插入图片描述

参考来源1

二、tensorflow库安装

python版本:3.7.4该版本下可以直接使用pip进行安装
pip install tensorflow-cpu
pip install tensorflow

三、源代码

代码来源keras官网示例
https://keras.io/examples/vision/captcha_ocr/

import os

import cv2
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import load_model
import time
import random

# 定义层结构
class CTCLayer(layers.Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # At test time, just return the computed predictions
        return y_pred

class captch():

    def __init__(self):
        # 所有文件
        self.data_dir = self.get_all_image(r"C:\Users\zywtf\Desktop\smile_captch")
        # 所有图片名
        # self.images = sorted(list(map(str, list(self.data_dir.glob("*.png")))))
        # # 所有标签
        # self.labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in self.images]
        self.images = sorted(list(map(str, list(self.data_dir.glob("*.jpg")))))
        random.shuffle(self.images)
        self.labels = [img.split(os.path.sep)[-1].split(".jpg")[0].split('_')[0] for img in self.images]
        print(self.images[:10])
        print(self.labels[:10])
        self.characters = set(char for label in self.labels for char in label)
        # 提取验证码中所有种类的字母
        self.characters = sorted(list(self.characters))
        print(self.characters)

        # 批量训练,验证大小
        self.batch_size = 16
        # 图片宽高
        self.img_width = 110
        self.img_height = 40
        # 图片最大字符个数
        self.max_length = 4
        # 将字符映射为整数
        self.char_to_num = layers.StringLookup(
            vocabulary=list(self.characters), mask_token=None
        )
        # 将整数映射为字符
        self.num_to_char = layers.StringLookup(
            vocabulary=self.char_to_num.get_vocabulary(), mask_token=None, invert=True
        )

    # 分割数据集,将全部数据分割成,训练集,验证集
    def split_data(self,images, labels, train_size=0.9, shuffle=True):
        # 1. Get the total size of the dataset
        size = len(images)
        # 2. Make an indices array and shuffle it, if required
        indices = np.arange(size)
        if shuffle:
            np.random.shuffle(indices)
        # 3. Get the size of training samples
        train_samples = int(size * train_size)
        # 4. Split data into training and validation sets
        x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
        x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
        return x_train, x_valid, y_train, y_valid

    # 开始训练
    def train(self):
        # 此时图片还是,路径的形式
        x_train, x_valid, y_train, y_valid = self.split_data(np.array(self.images), np.array(self.labels))
        # 对数据进行处理,转化为矩阵,在tensorflow里称为张量
        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        train_dataset = (
            train_dataset.map(
                self.encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
            )
            .batch(self.batch_size)
            .prefetch(buffer_size=tf.data.AUTOTUNE)
        )
        validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
        validation_dataset = (
            validation_dataset.map(
                self.encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
            )
            .batch(self.batch_size)
            .prefetch(buffer_size=tf.data.AUTOTUNE)
        )
        # 查看训练集
        # self.show(train_dataset)
        # 新构建模型
        # model = self.build_model()
        model = load_model(r"C:\Users\zywtf\Desktop\110_model")
        # 输出模型信息
        model.summary()
        epochs = 30
        early_stopping_patience = 10
        # Add early stopping
        callbacks_list = [
            # 目标指标不再有改善了,就可以提前终止
            keras.callbacks.EarlyStopping(
                monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
            ),
            # 保存模型
            keras.callbacks.ModelCheckpoint(
                filepath=r"C:\Users\zywtf\Desktop\110_model",  # 文件路径
                monitor='val_loss',  # 如果val_loss 没有改善就不覆盖
                save_best_only=True)  # 保持最佳模型
        ]

        history = model.fit(
            train_dataset,
            validation_data=validation_dataset,
            epochs=epochs,
            callbacks=callbacks_list,
        )
        # 保存模型
        model.save(r"C:\Users\zywtf\Desktop\100_model")

    # 开始验证
    def predict(self):
        load_options = tf.saved_model.LoadOptions(experimental_io_device="/job:localhost")
        # 加载模型
        model = load_model(r"C:\Users\zywtf\Desktop\fmodel",options=load_options)
        prediction_model = keras.models.Model(
            model.get_layer(name="image").input, model.get_layer(name="dense2").output
        )
        # 读取图片,img类型为byte
        dir_list = r'C:\Users\zywtf\Desktop\验证码存放'
        image_list = os.listdir(dir_list)
        x_predict = []
        y_predict = []
        for i in image_list:
            if "jpg" in i:
                x_predict.append(dir_list + '\\' + i)
                # 对应 labels 验证时可以随便填写
                y_predict.append("abs2")

        test_set = tf.data.Dataset.from_tensor_slices((x_predict, y_predict))
        test_set = (
            test_set.map(
                self.encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE
            )
            .batch(self.batch_size)
            .prefetch(buffer_size=tf.data.AUTOTUNE)
        )
        # 批量验证test_set中的所有图片,并使用plt打印出来
        for batch in test_set:
            batch_images = batch["image"]
            batch_labels = batch["label"]
            print(batch_images.shape)
            print(len(batch_images))
            # 调用模型进行验证
            preds = prediction_model.predict(batch_images)
            # 解码成对应的字符
            pred_texts = self.decode_batch_predictions(preds)

            # 使用plt打印出来
            orig_texts = []
            for label in batch_labels:
                label = tf.strings.reduce_join(self.num_to_char(label)).numpy().decode("utf-8")
                orig_texts.append(label)

            _, ax = plt.subplots(4, 4, figsize=(15, 5))
            for i in range(len(pred_texts)):
                img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
                img = img.T
                title = f"Prediction: {pred_texts[i]}"
                ax[i // 4, i % 4].imshow(img, cmap="gray")
                ax[i // 4, i % 4].set_title(title)
                ax[i // 4, i % 4].axis("off")

            plt.show()


    # 读取所有训练集、测试集图片
    def get_all_image(self,dir_path):
        data_dir = Path(dir_path)
        return data_dir

    # 将图片由路径读取成byte编码,然后转化为张量
    def encode_single_sample(self,img_path, label):
        # 1. Read image
        img = tf.io.read_file(img_path)
        # print((img))
        # 2. Decode and convert to grayscale
        img = tf.io.decode_png(img, channels=1)
        # 3. Convert to float32 in [0, 1] range
        img = tf.image.convert_image_dtype(img, tf.float32)
        # 4. Resize to the desired size
        img = tf.image.resize(img, [self.img_height, self.img_width])
        # 5. Transpose the image because we want the time
        # dimension to correspond to the width of the image.
        img = tf.transpose(img, perm=[1, 0, 2])
        # 6. Map the characters in label to numbers
        label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
        # 7. Return a dict as our model is expecting two inputs
        return {"image": img, "label": label}

    # 展示
    def show(self,train_dataset):
        _, ax = plt.subplots(4, 4, figsize=(10, 5))
        for batch in train_dataset.take(1):
            images = batch["image"]
            labels = batch["label"]
            for i in range(24):
                img = (images[i] * 255).numpy().astype("uint8")
                label = tf.strings.reduce_join(self.num_to_char(labels[i])).numpy().decode("utf-8")
                ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray")
                ax[i // 4, i % 4].set_title(label)
                ax[i // 4, i % 4].axis("off")
        plt.show()

    # 构建模型
    def build_model(self,):
        # Inputs to the model
        input_img = layers.Input(
            shape=(self.img_width, self.img_height, 1), name="image", dtype="float32"
        )
        labels = layers.Input(name="label", shape=(None,), dtype="float32")

        # First conv block
        x = layers.Conv2D(
            32,
            (3, 3),
            activation="relu",
            kernel_initializer="he_normal",
            padding="same",
            name="Conv1",
        )(input_img)
        x = layers.MaxPooling2D((2, 2), name="pool1")(x)

        # Second conv block
        x = layers.Conv2D(
            64,
            (3, 3),
            activation="relu",
            kernel_initializer="he_normal",
            padding="same",
            name="Conv2",
        )(x)
        x = layers.MaxPooling2D((2, 2), name="pool2")(x)

        # We have used two max pool with pool size and strides 2.
        # Hence, downsampled feature maps are 4x smaller. The number of
        # filters in the last layer is 64. Reshape accordingly before
        # passing the output to the RNN part of the model
        new_shape = ((self.img_width // 4), (self.img_height // 4) * 64)
        x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
        x = layers.Dense(64, activation="relu", name="dense1")(x)
        x = layers.Dropout(0.2)(x)

        # RNNs
        x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
        x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)

        # Output layer
        x = layers.Dense(
            len(self.char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2"
        )(x)

        # Add CTC layer for calculating CTC loss at each step
        output = CTCLayer(name="ctc_loss")(labels, x)

        # Define the model
        model = keras.models.Model(
            inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
        )
        # Optimizer
        opt = keras.optimizers.Adam()
        # Compile the model and return
        model.compile(optimizer=opt)
        return model

    def decode_batch_predictions(self,pred):
        input_len = np.ones(pred.shape[0]) * pred.shape[1]
        # Use greedy search. For complex tasks, you can use beam search
        results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
                  :, :self.max_length
                  ]
        # Iterate over the results and get back the text
        output_text = []
        for res in results:
            res = tf.strings.reduce_join(self.num_to_char(res)).numpy().decode("utf-8")
            output_text.append(res)
        return output_text

c = captch()
# c.train()
c.predict()

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值