OCR1.1实现简单lenet5分类

最新推荐文章于 2022-05-14 07:50:00 发布

〆、怀烈

最新推荐文章于 2022-05-14 07:50:00 发布

阅读量288

点赞数

分类专栏： DL 算法文章标签：深度学习 tensorflow 人工智能自然语言处理算法

本文链接：https://blog.csdn.net/qq_45172417/article/details/110913495

版权

算法同时被 2 个专栏收录

9 篇文章 0 订阅

订阅专栏

2 篇文章 0 订阅

订阅专栏

OCR1.1实现简单lenet5分类

文章目录

OCR1.1实现简单lenet5分类

概述

上篇1.0基本介绍了数据集的预处理，这次接着写入门的识别部分（tensorflow(cpu))
原始数据集是有五百个汉字，测试集每个字有四百多张图片，训练集每个字有一千七百多张，像素都在5050到100100之间，如果有需要数据集的朋友可以留言，不过我也不知道这个数据集源于哪里，需要的话可以想办法分享下
本文我们利用之前的图像预处理将图片归一化为28*28的黑白图片，首先致敬88年的lenet5，搭建一个简单的lenet5看一下效果

代码分析

数据读取部分：

def readimg(path):
    dir_counter = 0
    img_list=[]
    label_list=[]
    for child_dir in tqdm(os.listdir(path)):
        child_path = os.path.join(path, child_dir)      
        for dir_image in os.listdir(child_path)[0:500]:
            img = cv2.imread(child_path+"/"+dir_image,0)
            # img =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            img_list.append(img)
            label_list.append(dir_counter)     
        dir_counter += 1
    return img_list,label_list,dir_counter

列表[0:500]是考虑到数据集太大容易内存爆炸，如果内存条没加够的话可以修改这里，每次只用一部分训练集进行训练，如[0:500]表示只使用每个字序号为0到500的图片作为训练集和测试集
tqdm放在那里可以显示循环进度
最外层循环是读取测试集或者训练集里的所有文件夹
内层循环是读取每个文件夹下的图片
（win系统用户要注意汉字路径的可能会出错）

np.set_printoptions(threshold=np.inf)

path="/home/jojo/jason/jason_opencv/char2/train/"
(x_traina, y_traina,tagg)=readimg(path)
x_train=np.array(x_traina)
y_train=np.array(y_traina)
path="/home/jojo/jason/jason_opencv/char2/test/"
(x_testa, y_testa,taggg)=readimg(path)
x_test=np.array(x_testa)
y_test=np.array(y_testa)

x_train, x_test = x_train/255.0, x_test /255.0

这部分将数据读入numpy数组，除以255是为了转为float型便于后续网络计算

网络训练部分

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)  # 给数据增加一个维度，使数据和网络结构匹配
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
print("x_train.shape", x_train.shape)


class LeNet5(Model):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.c1 = Conv2D(filters=6, kernel_size=(5, 5),
                         activation='sigmoid')
        self.p1 = MaxPool2D(pool_size=(2, 2), strides=2)

        self.c2 = Conv2D(filters=16, kernel_size=(5, 5),
                         activation='sigmoid')
        self.p2 = MaxPool2D(pool_size=(2, 2), strides=2)

        self.flatten = Flatten()
        #self.f1 = Dense(120, activation='sigmoid')
        #self.f2 = Dense(84, activation='sigmoid')
        self.f3 = Dense(500, activation='softmax')

    def call(self, x):
        x = self.c1(x)
        x = self.p1(x)

        x = self.c2(x)
        x = self.p2(x)

        x = self.flatten(x)
        #x = self.f1(x)
        #x = self.f2(x)
        y = self.f3(x)
        return y


model = LeNet5()

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['sparse_categorical_accuracy'])

checkpoint_save_path = "./checkpoint_js/LeNet5.ckpt"
if os.path.exists(checkpoint_save_path + '.index'):
    print('-------------load the model-----------------')
    model.load_weights(checkpoint_save_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path,
                                                 save_weights_only=True,
                                                 save_best_only=True)

history = model.fit(x_train, y_train, batch_size=50, epochs=5, validation_data=(x_test, y_test), validation_freq=1,
                    callbacks=[cp_callback])

acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

经典lenet5,两卷两池三全连接就不多说了，不过这里我把三全连接改成了一个500全连接，凑合着跑还可以

运行效果

之前有用测试集训练过，所以现在是明显的过拟合状态
在这里插入图片描述

完整代码

import tensorflow as tf
import os
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras import Model
from cv2 import cv2 
from tqdm import tqdm
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def readimg(path):
    dir_counter = 0
    img_list=[]
    label_list=[]
    for child_dir in tqdm(os.listdir(path)):
        child_path = os.path.join(path, child_dir)      
        for dir_image in os.listdir(child_path)[0:500]:
            img = cv2.imread(child_path+"/"+dir_image,0)
            # img =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            img_list.append(img)
            label_list.append(dir_counter)     
        dir_counter += 1
    return img_list,label_list,dir_counter

np.set_printoptions(threshold=np.inf)

path="/home/jojo/jason/jason_opencv/char2/train/"
(x_traina, y_traina,tagg)=readimg(path)
x_train=np.array(x_traina)
y_train=np.array(y_traina)
path="/home/jojo/jason/jason_opencv/char2/test/"
(x_testa, y_testa,taggg)=readimg(path)
x_test=np.array(x_testa)
y_test=np.array(y_testa)

x_train, x_test = x_train/255.0, x_test /255.0
print("x_train.shape", x_train.shape)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)  # 给数据增加一个维度，使数据和网络结构匹配
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
print("x_train.shape", x_train.shape)


class LeNet5(Model):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.c1 = Conv2D(filters=6, kernel_size=(5, 5),
                         activation='sigmoid')
        self.p1 = MaxPool2D(pool_size=(2, 2), strides=2)

        self.c2 = Conv2D(filters=16, kernel_size=(5, 5),
                         activation='sigmoid')
        self.p2 = MaxPool2D(pool_size=(2, 2), strides=2)

        self.flatten = Flatten()
        #self.f1 = Dense(120, activation='sigmoid')
        #self.f2 = Dense(84, activation='sigmoid')
        self.f3 = Dense(500, activation='softmax')

    def call(self, x):
        x = self.c1(x)
        x = self.p1(x)

        x = self.c2(x)
        x = self.p2(x)

        x = self.flatten(x)
        #x = self.f1(x)
        #x = self.f2(x)
        y = self.f3(x)
        return y


model = LeNet5()

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['sparse_categorical_accuracy'])

checkpoint_save_path = "./checkpoint_js/LeNet5.ckpt"
if os.path.exists(checkpoint_save_path + '.index'):
    print('-------------load the model-----------------')
    model.load_weights(checkpoint_save_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path,
                                                 save_weights_only=True,
                                                 save_best_only=True)

history = model.fit(x_train, y_train, batch_size=50, epochs=5, validation_data=(x_test, y_test), validation_freq=1,
                    callbacks=[cp_callback])
#model.summary()

# print(model.trainable_variables)
#file = open('./weights.txt', 'w')
#for v in model.trainable_variables:
#    file.write(str(v.name) + '\n')
#    file.write(str(v.shape) + '\n')
#    file.write(str(v.numpy()) + '\n')
#file.close()

###############################################    show   ###############################################
# 显示训练集和验证集的acc和loss曲线
acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

ps：欢迎留言指正

〆、怀烈

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
OCR1.1实现简单lenet5分类

OCR1.1实现简单lenet5分类文章目录OCR1.1实现简单lenet5分类概述代码分析数据读取部分：网络训练部分运行效果完整代码ps：欢迎留言指正概述上篇1.0基本介绍了数据集的预处理，这次接着写入门的识别部分（tensorflow(cpu))原始数据集是有五百个汉字，测试集每个字有四百多张图片，训练集每个字有一千七百多张，像素都在5050到100100之间，如果有需要数据集的朋友可以留言，不过我也不知道这个数据集源于哪里，需要的话可以想办法分享下本文我们利用之前的图像预处理将图片归一化为2
复制链接

扫一扫