深度学习 卷积神经网络 CNN 验证码识别 VGG16 Lenet5(改了一点) AlexNet8

 

环境:TensorFlow2.1

self_model实际过程中效果能好点,测试集能达到90%以上,本人是初学者,很多地方还不太明白,所以VGG16、Lenet5、 AlexNet8在测试过程中效果并不好,欢迎评论。另外Lenet5的全连接层有很大改变,原始的还需自行搜索。

文章代码很大程度来源于:深度学习案例1:验证码识别_验证码数据集_技术人Howzit的博客-CSDN博客

还请支持原创,文章发布仅为学习交流,若有侵权还请联系删除。

数据集来源于:Kaggle

import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Activation, BatchNormalization, Flatten
from tensorflow.keras.layers import Dense, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow_core.python.keras.layers import MaxPool2D, Dropout

# 数据集的路径
data_dir = Path("./dataset2/")

# 获取所有的图片
images = sorted(list(map(str, list(data_dir.glob("*.png")))))
# 从文件名中提取标签
labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
# 从标签中提取所有的字符
characters = set(char for label in labels for char in label)

num= len(characters)
channels=1

# print("Number of images found: ", len(images))
# print("Number of labels found: ", len(labels))
print("Number of unique characters: ", len(characters))
print("Characters present: ", characters)
# print("image\n",images)
# print("label\n",labels)


# 将字符串映射整形
char_to_num = dict((c, i) for c, i in enumerate(characters))
# print(char_to_num)

# 通过整形获取对应字符
num_to_char = dict((i, c) for c, i in enumerate(characters))


# print(num_to_char)

# 将数据集分为测试集和验证集
def split_data(images, labels, train_size=0.9, shuffle=True):
    # 1. 获得总共数据集的大小
    size = len(images)
    # 2. 打乱数组的索引
    indices = np.arange(size)
    if shuffle:
        np.random.shuffle(indices)
        # 3. 获取训练集的大小
        train_samples = int(size * train_size)
        # 4. 将数据集分为测试集和验证集
        x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
        x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
    return x_train, x_valid, y_train, y_valid


# Splitting data into training and validation sets
x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))

# print("x_train",x_train)
# print("x_valid",x_valid)
# print("y_train",y_train)
# print(len(images)*0.9)
# print("y_valid",y_valid)
# Desired image dimensions

img_width = 200
img_height = 50


# 将训练集和验证集中的路径转成图片数组以及将标签转成字符数组
def encode_single_sample(img_path, label):
    # 1. 读图片文件
    img = tf.io.read_file(img_path)
    # 2. 编码
    img = tf.io.decode_png(img, channels=channels)
    # 3. 正则化在[0-1]之间
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. 转成我们想要的图片尺寸。
    img = tf.image.resize(img, [img_height, img_width])
    # 5. 将字符转成整型数组

    label = [num_to_char[i] for i in label]

    return img, label


# 把预测出来的整型数组转成字符串
def vec2text(vec):
    """
 还原标签(向量->字符串)
 """
    text = []
    for c in vec:
        text.append(char_to_num[c.numpy()])
    return "".join(text)


# 获取训练集
def get_train_dataset(dataset_size):
    data_x = []
    data_y = []
    for index in range(dataset_size):
        x, y = encode_single_sample(x_train[index], y_train[index])
        data_x.append(x)
        data_y.append(y)

    data_x = numpy.asarray(data_x)
    data_y = to_categorical(data_y)
    data_x = data_x.reshape(-1, img_height, img_width, channels)  # normalize
    return data_x, data_y


# 获取验证集
def get_valid_dataset(dataset_size):
    data_x = []
    data_y = []
    for index in range(dataset_size):
        x, y = encode_single_sample(x_valid[index], y_valid[index])
        data_x.append(x)
        data_y.append(y)

    data_x = numpy.asarray(data_x)

    data_y = to_categorical(data_y)
    data_x = data_x.reshape(-1, img_height, img_width, channels)  # normalize
    return data_x, data_y


# data_x, data_y = get_train_dataset(len(x_train))
# data_x_test, data_y_test = get_valid_dataset(len(x_valid))
train_num = int(len(images) * 0.90)
test_num = len(images) - train_num

data_x, data_y = get_train_dataset(train_num)

data_x_test, data_y_test = get_valid_dataset(test_num)
# print("data_x",data_x)
# print("data_y",data_y)
# print("data_y_test", data_y_test)
# print(type(data_y_test))
# 模型
model = Sequential()


def vgg_16(models):
    # 第一大层
    models.add(layers.Conv2D(input_shape=(img_height, img_width, channels),
                             filters=64,
                             kernel_size=(3, 3),
                             padding='same',
                             activation='relu'
                             ))
    models.add(layers.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # 第二大层
    models.add(layers.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # 第三大层
    models.add(layers.Conv2D(
        filters=256,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.Conv2D(
        filters=256,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    # 第四大层
    models.add(layers.Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # 第五大层
    models.add(layers.Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.Conv2D(
        filters=512,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # 第六大层
    models.add(layers.Flatten())  # 展开为一维向量

    models.add(layers.Dense(4096, activation='relu'))
    models.add(layers.Dropout(0.5))

    models.add(layers.Dense(4096, activation='relu'))
    models.add(layers.Dropout(0.5))

    # 5个数字+字母,19钟类型
    models.add(layers.Dense(5 * num, activation='softmax'))
    models.add(layers.Reshape([5, num]))
    models.summary()
    return models


def self_model(models):
    models.add(layers.Conv2D(input_shape=(img_height, img_width, channels),
                             filters=32,
                             kernel_size=(3, 3),
                             padding='same',
                             activation='relu'))
    models.add(layers.MaxPooling2D(padding='same'))

    models.add(layers.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.MaxPooling2D(padding='same'))

    models.add(layers.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        padding='same',
        activation='relu'))
    models.add(layers.MaxPooling2D(padding='same'))

    models.add(layers.Flatten())

    models.add(layers.Dense(512, activation='relu'))
    models.add(layers.Dropout(0.5))
    models.add(layers.Dropout(0.5))
    models.add(layers.Dense(5 * num, activation='sigmoid'))

    models.add(layers.Reshape([5, num]))
    models.summary()
    return models


def lenet(models):
    # 第一大层
    models.add(layers.Conv2D(input_shape=(img_height, img_width, channels),
                             filters=6,
                             kernel_size=(5, 5),
                             padding='valid',
                             activation='relu'))
    models.add(layers.MaxPooling2D(pool_size=(2, 2), padding='same'))
    # 第二大层
    models.add(layers.Conv2D(
        filters=16,
        kernel_size=(5, 5),
        padding='valid',
        activation='relu'))

    models.add(layers.MaxPooling2D(pool_size=(2, 2), padding='same'))
    # 第三大层
    models.add(layers.Flatten())
    models.add(layers.Dense(4096, activation=tf.keras.activations.tanh))
    models.add(layers.Dropout(0.1))
    models.add(layers.Dense(2048, activation=tf.keras.activations.tanh))
    models.add(layers.Dense(512, activation=tf.keras.activations.tanh))
    models.add(layers.Dense(5*num, activation='softmax'))
    models.add(layers.Reshape([5, num]))
    models.summary()
    return models


def AlexNet8(models):
    # 第一层:卷积--> 激活 --> 归一化 --> 最大池化
    models.add(layers.Conv2D(input_shape=(img_height, img_width,channels),
                             filters=96,
                             kernel_size=11,
                             strides=4,
                             activation='relu'))
    models.add(layers.Activation('relu'))
    models.add(layers.BatchNormalization())
    models.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2, data_format='channels_first'))

    # 第二层:卷积--> 激活 --> 归一化 --> 最大池化
    models.add(layers.Conv2D(
        filters=256,
        kernel_size=5,
        strides=1,
        padding='same'))
    models.add(layers.Activation('relu'))
    models.add(layers.BatchNormalization())
    models.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2))

    # 第三层:卷积--> 激活
    models.add(layers.Conv2D(
        filters=384,
        kernel_size=3,
        strides=1,
        padding='same'))
    models.add(layers.Activation('relu'))

    # 第四层:卷积--> 激活
    models.add(layers.Conv2D(
        filters=384,
        kernel_size=3,
        strides=1,
        padding='same'))
    models.add(layers.Activation('relu'))

    # 第五层:卷积--> 激活  --> 最大池化
    models.add(layers.Conv2D(
        filters=256,
        kernel_size=3,
        strides=1,
        padding='same'))
    models.add(layers.Activation('relu'))
    models.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2))

    # 第六层  拉直 --> Drop out --> 激活
    models.add(layers.Flatten())  # 展开为一维向量

    models.add(layers.Dense(4096, activation='relu'))
    models.add(layers.Dropout(0.5))

    # 第七层 全连接 --> Drop out
    models.add(layers.Dense(4096, activation='relu'))
    models.add(layers.Dropout(0.5))

    # 第八层 全连接
    models.add(layers.Dense(1000, activation='softmax'))

    models.add(layers.Dense(num*5, activation='softmax'))
    models.add(layers.Reshape([5, num]))
    models.summary()
    return models


# model = vgg_16(model)
# model = lenet(model)
model=self_model(model)
# model=AlexNet8(model)

# 编译
natural_exp_decay = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01, decay_steps=20, decay_rate=0.96, staircase=True)

model.compile(optimizer=tf.keras.optimizers.Adam(0.00002),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

# TensorBoard
# tbCallBack = TensorBoard(log_dir='samples')
checkpoint_path = "./dataset2/model"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, verbose=2, period=50)

# 训练
history = model.fit(data_x, data_y, batch_size=32, epochs=500, verbose=2, callbacks=[cp_callback],
                    validation_data=(data_x_test, data_y_test))

# 绘制
plt.plot(history.history["accuracy"], label='accuracy')
plt.plot(history.history['loss'], label='loss')
plt.legend()
plt.title('train_history')
plt.show()

plt.plot(history.history["val_accuracy"], label='val_accuracy')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('test_history')
plt.show()

# new_model = tf.keras.models.load_model('./samples/model/')
# new_model.summary()
# 通过验证集进行预测
result = model.predict(data_x_test)
result1 = tf.argmax(result, axis=2)
result2 = tf.argmax(data_y_test, axis=2)
#
# def test():
#     print("训练集数量为", train_num)
#     print("测试集数量为", test_num)
#     sum = 0
#     for i in range(test_num):
#         a = vec2text(result1[i])
#         b = vec2text(result2[i])
#         if (a == b):
#             sum = sum + 1
#         print("测试集:\t预测结果:", a + "\t真实结果", b)
#     print("测试集准确率为:", sum / test_num)

# 预测结果可视化
_, ax = plt.subplots(4, 4, figsize=(10, 5))
for i in range(16):
    img = (data_x_test[i] * 255)
    # label = [char_to_num[c] for c in labels]
    ax[i // 4, i % 4].imshow(img[:, :, 0], cmap='gray')
    ax[i // 4, i % 4].set_title(vec2text(result1[i]))
    ax[i // 4, i % 4].axis("off")
plt.show()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值