固定长度验证码识别TensorFlow版

识别固定长度的二维码 notebook

!pip install captcha
Requirement already satisfied: captcha in /usr/local/lib/python3.7/dist-packages (0.3)
Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from captcha) (7.0.0)
from captcha.image import ImageCaptcha

import random
import numpy as np

import matplotlib.pyplot as plt
import PIL.Image as Image
import shutil
import os

NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
             'v', 'w', 'x', 'y', 'z']
UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
             'V', 'W', 'X', 'Y', 'Z']

CAPTCHA_CHARSET = NUMBER  # 验证码字符集
CAPTCHA_LEN = 4  # 验证码长度
CAPTCHA_HEIGHT = 60  # 验证码高度
CAPTCHA_WIDTH = 160  # 验证码宽度

TRAIN_DATASET_SIZE = 25000  # 验证码数据集大小
TEST_DATASET_SIZE = 10000
VALID_DATASET_SIZE = 10000
TRAIN_DATA_DIR = './train-data/'  # 验证码数据集目录
VALID_DATA_DIR='./valid-data/'
TEST_DATA_DIR = './test-data/'

def gen_random_text(charset=CAPTCHA_CHARSET, length=CAPTCHA_LEN):
    text = [random.choice(charset) for _ in range(length)]
    return ''.join(text)


def create_captcha_dataset(size=100,data_dir='./data/',
                           height=60,
                           width=160,
                           image_format='.png'):
    # 如果保存验证码图像,先清空 data_dir 目录

    if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
    os.makedirs(data_dir)

    # 创建 ImageCaptcha 实例 captcha
    captcha = ImageCaptcha(width=width, height=height)

    for i in range(size):
        # 生成随机的验证码字符
        text = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LEN)
        captcha.write(text, data_dir + str(i) + "_" + text + image_format)

    return None

生成验证码数据集

create_captcha_dataset(TRAIN_DATASET_SIZE, TRAIN_DATA_DIR)
create_captcha_dataset(VALID_DATASET_SIZE, VALID_DATA_DIR)
create_captcha_dataset(TEST_DATASET_SIZE, TEST_DATA_DIR)

#!tar -xzf captcha_datasets.tar.gz
from PIL import Image
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

import glob
import pickle

import numpy as np

import matplotlib.pyplot as plt
import os

定义模型超参数


BATCH_SIZE = 20
EPOCHS = 130
OPT = 'adam'
LOSS = 'sparse_categorical_crossentropy'

MODEL_DIR = './model/train_demo/'
MODEL_FORMAT = '.h5'
HISTORY_DIR = './history/train_demo/'
HISTORY_FORMAT = '.history'

filename_str = "{}captcha_{}_{}_bs_{}_epochs_{}{}"

# 模型网络结构文件
MODEL_VIS_FILE = 'captcha_classfication' + '.png'
# 模型文件
MODEL_FILE = filename_str.format(MODEL_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), MODEL_FORMAT)
# 训练记录文件
HISTORY_FILE = filename_str.format(HISTORY_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), HISTORY_FORMAT)


def rgb2gray(img):
    # Y' = 0.299 R + 0.587 G + 0.114 B
    # https://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
    return np.dot(img[..., :3], [0.299, 0.587, 0.114])


def text2vec(text, length=CAPTCHA_LEN, charset=CAPTCHA_CHARSET):
    text_len = len(text)
    # 验证码长度校验
    if text_len != length:
        raise ValueError('Error: length of captcha should be {}, but got {}'.format(length, text_len))

    # 生成一个形如(CAPTCHA_LEN*CAPTHA_CHARSET,) 的一维向量
    # 例如,4个纯数字的验证码生成形如(4*10,)的一维向量
    vec = np.zeros(length * len(charset))
    for i in range(length):
        # One-hot 编码验证码中的每个数字
        # 每个字符的热码 = 索引 + 偏移量
        vec[charset.index(text[i]) + i * len(charset)] = 1
    return vec


def vec2text(vector):
    if not isinstance(vector, np.ndarray):
        vector = np.asarray(vector)
    vector = np.reshape(vector, [CAPTCHA_LEN, -1])
    text = ''
    for item in vector:
        text += CAPTCHA_CHARSET[np.argmax(item)]
    return text


def fit_keras_channels(batch, rows=CAPTCHA_HEIGHT, cols=CAPTCHA_WIDTH):
    if K.image_data_format() == 'channels_first':
        batch = batch.reshape(batch.shape[0], 1, rows, cols)
        input_shape = (1, rows, cols)
    else:
        batch = batch.reshape(batch.shape[0], rows, cols, 1)
        input_shape = (rows, cols, 1)

    return batch, input_shape


def load_data(dir, limit=-1):
    x = []
    y = []
    k = 0
    for filename in glob.glob(dir + '*.png'):
        x.append(np.array(Image.open(filename)))
        y.append(os.path.basename(filename).rstrip('.png')[-4:])
        k += 1
        if limit > 0 and k >= limit:
            break

    return x, y

train_ds0 = tf.data.Dataset.list_files(TRAIN_DATA_DIR + '*.png')
valid_ds0 = tf.data.Dataset.list_files(VALID_DATA_DIR + '*.png')
test_ds0 = tf.data.Dataset.list_files(TEST_DATA_DIR + '*.png')

def parse_image(filename):
    parts = tf.strings.split(filename, os.sep)
    label = parts[-1]
    label = tf.strings.substr(label, tf.strings.length(label) - 8, 4)
    label = tf.strings.to_number(tf.strings.bytes_split(label), tf.int32)
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, 1)
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, label


BUFFER_SIZE = 10000
input_ds = train_ds0.map(parse_image).shuffle(BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE)
valid_ds = valid_ds0.map(parse_image).shuffle(BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE)
test_ds = test_ds0.map(parse_image).batch(BATCH_SIZE)

定义模型


def create_model():
  # 输入层
  inputs = Input(shape=(60, 160, 1), name="inputs")
  # 第1层卷积
  conv1 = Conv2D(16, (3, 3), name="conv1", padding='same',use_bias=False)(inputs)
  bn1 = BatchNormalization(name="bn1")(conv1)
  relu1 = Activation('relu', name="relu1")(bn1)
  pool1 = MaxPooling2D(pool_size=(2, 2), name="pool1")(relu1)

  # 第2层卷积
  conv2 = Conv2D(64, (3, 3), name="conv2", padding='same',use_bias=False)(pool1)
  bn2 = BatchNormalization(name="bn2")(conv2)
  relu2 = Activation('relu', name="relu2")(bn2)
  pool2 = MaxPooling2D(pool_size=(2, 2), name="pool2")(relu2)

  # 第3层卷积
  conv3 = Conv2D(128, (3, 3), name="conv3", padding='same',use_bias=False)(pool2)
  bn3 = BatchNormalization(name="bn3")(conv3)
  relu3 = Activation('relu', name="relu3")(bn3)
  pool3 = MaxPooling2D(pool_size=(2, 2), name="pool3")(relu3)

  # 第4层卷积
  conv4=Conv2D(256, (3, 3), name="conv4", padding='same',use_bias=False)(pool3)
  bn4 = BatchNormalization(name="bn4")(conv4)
  relu4 = Activation('relu', name="relu4")(bn4)
  pool4 = MaxPooling2D(pool_size=(2, 2), name="pool4")(relu4)

  # 第5层卷积
  conv5=Conv2D(512, (3, 3), name="conv5", padding='same',use_bias=False)(pool4)
  bn5 = BatchNormalization(name="bn5")(conv5)
  relu5 = Activation('relu', name="relu5")(bn5)
  pool5 = MaxPooling2D(pool_size=(2, 2), name="pool5")(relu5)


  # 将 Pooled feature map 摊平后输入全连接网络
  x = Flatten()(pool5)

  # 4个全连接层分别做10分类,分别对应4个字符。
  x = Dense(1280,activation='relu')(x)
  x = Dropout(0.2)(x)
  x = Dense(40)(x)
  outs = Reshape((4, 10))(x)
  # 定义模型的输入与输出
  model = Model(inputs=inputs, outputs=outs)
  return model
LOSS=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#一下代码使用TPU训练,请先不要执行
# tf.keras.backend.clear_session()
# resolver = tf.distribute.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
# tf.config.experimental_connect_to_cluster(resolver)

# # This is the TPU initialization code that has to be at the beginning.
# tf.tpu.experimental.initialize_tpu_system(resolver)
# print("All devices: ", tf.config.list_logical_devices('TPU'))

# strategy = tf.distribute.experimental.TPUStrategy(resolver)

# with strategy.scope():
#   model = create_model()
#   model.compile(optimizer=OPT, loss=LOSS, metrics=['accuracy'])
# model.summary()
tf.keras.backend.clear_session()
model = create_model()
model.compile(optimizer=OPT, loss=LOSS, metrics=['accuracy'])
model.summary()
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
inputs (InputLayer)          [(None, 60, 160, 1)]      0         
_________________________________________________________________
conv1 (Conv2D)               (None, 60, 160, 16)       144       
_________________________________________________________________
bn1 (BatchNormalization)     (None, 60, 160, 16)       64        
_________________________________________________________________
relu1 (Activation)           (None, 60, 160, 16)       0         
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 30, 80, 16)        0         
_________________________________________________________________
conv2 (Conv2D)               (None, 30, 80, 64)        9216      
_________________________________________________________________
bn2 (BatchNormalization)     (None, 30, 80, 64)        256       
_________________________________________________________________
relu2 (Activation)           (None, 30, 80, 64)        0         
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 15, 40, 64)        0         
_________________________________________________________________
conv3 (Conv2D)               (None, 15, 40, 128)       73728     
_________________________________________________________________
bn3 (BatchNormalization)     (None, 15, 40, 128)       512       
_________________________________________________________________
relu3 (Activation)           (None, 15, 40, 128)       0         
_________________________________________________________________
pool3 (MaxPooling2D)         (None, 7, 20, 128)        0         
_________________________________________________________________
conv4 (Conv2D)               (None, 7, 20, 256)        294912    
_________________________________________________________________
bn4 (BatchNormalization)     (None, 7, 20, 256)        1024      
_________________________________________________________________
relu4 (Activation)           (None, 7, 20, 256)        0         
_________________________________________________________________
pool4 (MaxPooling2D)         (None, 3, 10, 256)        0         
_________________________________________________________________
conv5 (Conv2D)               (None, 3, 10, 512)        1179648   
_________________________________________________________________
bn5 (BatchNormalization)     (None, 3, 10, 512)        2048      
_________________________________________________________________
relu5 (Activation)           (None, 3, 10, 512)        0         
_________________________________________________________________
pool5 (MaxPooling2D)         (None, 1, 5, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 1280)              3278080   
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 40)                51240     
_________________________________________________________________
reshape (Reshape)            (None, 4, 10)             0         
=================================================================
Total params: 4,890,872
Trainable params: 4,888,920
Non-trainable params: 1,952
_________________________________________________________________

from tensorflow.keras.callbacks import EarlyStopping
earlystop = EarlyStopping(monitor="val_loss", patience=50, verbose=1)
history = model.fit(input_ds,epochs=EPOCHS,
                    verbose=2,
                    validation_data=valid_ds,
                    callbacks=earlystop)

if not tf.io.gfile.exists(MODEL_DIR):
    tf.io.gfile.makedirs(MODEL_DIR)

model.save(MODEL_FILE)
print('Saved trained model at %s ' % MODEL_FILE)

if tf.io.gfile.exists(HISTORY_DIR) == False:
    tf.io.gfile.makedirs(HISTORY_DIR)

with open(HISTORY_FILE, 'wb') as f:
    pickle.dump(history.history, f)

print(HISTORY_FILE)
Epoch 1/130
1191/1191 - 31s - loss: 0.0042 - accuracy: 0.9990 - val_loss: 0.0169 - val_accuracy: 0.9973
Epoch 2/130
1191/1191 - 31s - loss: 0.0044 - accuracy: 0.9988 - val_loss: 7.8253 - val_accuracy: 0.3675
Epoch 3/130
1191/1191 - 31s - loss: 0.0052 - accuracy: 0.9988 - val_loss: 0.0785 - val_accuracy: 0.9850
Epoch 4/130
1191/1191 - 31s - loss: 0.0039 - accuracy: 0.9990 - val_loss: 0.0440 - val_accuracy: 0.9932
Epoch 5/130
1191/1191 - 31s - loss: 0.0047 - accuracy: 0.9990 - val_loss: 0.0950 - val_accuracy: 0.9815
Epoch 6/130
1191/1191 - 31s - loss: 0.0046 - accuracy: 0.9990 - val_loss: 0.2780 - val_accuracy: 0.9583
Epoch 7/130
1191/1191 - 31s - loss: 0.0043 - accuracy: 0.9990 - val_loss: 0.0355 - val_accuracy: 0.9952

省略.....
	
Epoch 60/130
1191/1191 - 31s - loss: 0.0031 - accuracy: 0.9994 - val_loss: 0.0256 - val_accuracy: 0.9973
Epoch 61/130
1191/1191 - 31s - loss: 0.0021 - accuracy: 0.9995 - val_loss: 0.0251 - val_accuracy: 0.9976
Epoch 62/130
1191/1191 - 31s - loss: 0.0028 - accuracy: 0.9995 - val_loss: 0.0926 - val_accuracy: 0.9902
Epoch 63/130
1191/1191 - 31s - loss: 0.0030 - accuracy: 0.9995 - val_loss: 0.0207 - val_accuracy: 0.9976
Epoch 00063: early stopping
Saved trained model at ./model/train_demo/captcha_adam_sparse_categorical_crossentropy_bs_20_epochs_130.h5 
./history/train_demo/captcha_adam_sparse_categorical_crossentropy_bs_20_epochs_130.history
# model=tf.keras.models.load_model(MODEL_FILE)
test_loss, test_acc=model.evaluate(test_ds)
print('test loss:%.2f,test acc:%.2f'%(test_loss,test_acc))
500/500 [==============================] - 7s 14ms/step - loss: 0.0236 - accuracy: 0.9973
test loss:0.02,test acc:1.00
test_dataset_num=54
test_ds=tf.data.Dataset.list_files(TEST_DATA_DIR + '*.png').take(test_dataset_num)
test_data=list(test_ds.map(parse_image).batch(test_dataset_num))[0]
test_dataX=test_data[0].numpy()
test_dataY=test_data[1].numpy()
predicts=model.predict(test_data[0])
predictsY=np.argmax(predicts,axis=-1)
%matplotlib inline
plt.figure(figsize=(9,8))
for i in range(len(test_dataX)):
  img=test_dataX[i].squeeze()
  plt.subplot(9,6,i+1)
  plt.xticks([])
  plt.yticks([])
  plt.grid(False)
  label=''.join(map(str,predictsY[i].tolist()))
  real_label=''.join(map(str,test_dataY[i].tolist()))
  if (predictsY[i]==test_dataY[i]).all():
     color = 'blue'
     plt.xlabel(label)
  else:
     color = 'red'
     plt.xlabel('{}({})'.format(label,real_label), color=color)
  
  plt.imshow(img)
plt.tight_layout()
plt.show()

预测结果

len(os.listdir(TRAIN_DATA_DIR)),len(os.listdir(VALID_DATA_DIR)),len(os.listdir(TEST_DATA_DIR))
(25000, 10000, 10000)

训练结果可视化

history_file = HISTORY_FILE
with open(history_file, 'rb') as f:
    history = pickle.load(f)

fig = plt.figure()
plt.subplot(2,1,1)
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='lower right')

plt.subplot(2,1,2)
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.tight_layout()

plt.show()

可视化

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
验证识别是一个比较复杂的任务,需要进行图像预处理、特征提取和分类等步骤。PyTorch是一个非常适合进行深度学习任务的框架,可以方便地搭建神经网络模型并进行训练和推理。 一般来说,验证识别可以分为以下几个步骤: 1. 数据收集:收集大量的验证码数据,并将其划分为训练集、验证集和测试集。 2. 数据预处理:对验证码图像进行预处理,包括灰度化、二值化、去噪等操作,以便于后续的特征提取和分类。 3. 特征提取:使用卷积神经网络(CNN)等方法对验证码图像进行特征提取,得到一个固定长度的特征向量。 4. 分类器设计:设计一个分类器,将特征向量映射到具体的验证码字符上。 5. 模型训练:使用训练集对模型进行训练,并在验证集上进行调参,以获得最佳的模型性能。 6. 模型评估:使用测试集对模型进行评估,计算准确率、召回率等指标。 下面是一个简单的验证识别模型的代码示例: ```python import torch import torch.nn as nn import torch.optim as optim from torchvision import transforms, datasets # 定义数据预处理 transform = transforms.Compose([ transforms.Grayscale(), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)) ]) # 加载数据集 train_dataset = datasets.ImageFolder('train', transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True) # 定义模型 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 32, kernel_size=3) self.pool = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(32, 64, kernel_size=3) self.fc1 = nn.Linear(64 * 5 * 5, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.pool(torch.relu(self.conv1(x))) x = self.pool(torch.relu(self.conv2(x))) x = x.view(-1, 64 * 5 * 5) x = torch.relu(self.fc1(x)) x = self.fc2(x) return x net = Net() # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 训练模型 for epoch in range(10): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 # 保存模型 torch.save(net.state_dict(), 'model.pth') ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值