手写字体识别模型LeNet5诞生于1994年,是最早的卷积神经网络之一。LeNet5通过巧妙的设计,利用卷积、参数共享、池化等操作提取特征,避免了大量的计算成本,最后再使用全连接神经网络进行分类识别,这个网络也是最近大量神经网络架构的起点。
LeNet架构如下:
LeNet_5
import cv2,os
import numpy as np
import tensorflow as tf
from random import shuffle
from tensorflow.keras.models import load_model
from tensorflow.keras import Sequential,layers,optimizers,losses,metrics
labels_num=2 # 类别数
#测试集的导入
def load_image(path,shape):
img_list = []
label_list = []
dir_counter = 0
# 对路径下的所有子文件夹中的所有jpg文件进行读取并存入到一个list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img / 255.0
img=cv2.resize(img,(shape[0],shape[1]))
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
length= len(img_list)
index = [i for i in range(length)]
shuffle(index) # 打乱索引
img_np=np.array(img_list)
label_np=np.array(label_list)
img_np1 = img_np[index]
label_np1 = label_np[index]
train_l=int(0.7*length)
train_data = np.array(img_np1)[0:train_l]
train_label =np.array(label_np1)[0:train_l]
test_data = np.array(img_np1)[train_l:length]
test_label = np.array(label_np1)[train_l:length]
return train_data,train_label,test_data,test_label
def model(label_num=labels_num):
#网络层的搭建
networks=Sequential([
layers.Conv2D(6,kernel_size=3,strides=1,activation='relu'),
layers.MaxPooling2D(pool_size=2,strides=2),
layers.Conv2D(16,kernel_size=3,strides=1,activation='relu'),
layers.MaxPooling2D(pool_size=2,strides=2),
layers.Flatten(),
layers.Dense(120,activation='relu'),
layers.Dense(84,activation='relu'),
layers.Dense(label_num) #输出层,没有激活函数(激活函数为None)
])
return networks
def train(net,train_data,train_label):
def get_batch(batch_size, i):
x = batch_size * i
train_data_batch = train_data[x:x + batch_size, :]
train_lable_batch = train_label[x:x + batch_size]
return train_data_batch, train_lable_batch
epoch = 5 # 迭代次数
batch_size = 32 # 一批要处理的图像
shape_t=train_data.shape
net.build(input_shape=(batch_size,shape_t[1],shape_t[2],shape_t[3]))
num_train_data = shape_t[0] # 训练图像总数
batch_num = int(num_train_data // batch_size) # 训练批数:这里必须取整
optimizer = optimizers.Adam(learning_rate=0.001) # 该函数可以设置一个随训练进行逐渐减小的学习率,此处我们简单的设学习率为常量
for n in range(epoch):
for i in range(batch_num):
with tf.GradientTape() as tape: # with语句内引出需要求导的量
x, y = get_batch(batch_size, i)
out = net(x)
y_onehot = tf.one_hot(y, depth=labels_num) # 一维表示类别(0-9)-> 二维表示类别(1,0,0,0,...)...
loss_object = losses.CategoricalCrossentropy(from_logits=True) # 交叉熵损失函数.这是一个类,loss_object为类的实例化对象
loss = loss_object(y_onehot, out) # 使用损失函数类来计算损失
print('epoch:%d batch:%d loss:%f' % (n, i, loss.numpy()))
grad = tape.gradient(loss, net.trainable_variables) # 用以自动计算梯度. loss对网络中的所有参数计算梯度
optimizer.apply_gradients(zip(grad, net.trainable_variables)) # 根据梯度更新网络参数
net.save('model/lenet.h5')
def test(test_data,test_label):
net=load_model('model/lenet.h5')
batch_size=32
s_c_a = metrics.SparseCategoricalAccuracy() # metrics用于监测性能指标,这里用update_state来对比
num_test_batch = int(test_data.shape[0] // batch_size) # 测试集数量
for num_index in range(num_test_batch):
start_index, end_index = num_index * batch_size, (num_index + 1) * batch_size # 每一批的起始索引和结束索引
y_predict = net.predict(test_data[start_index:end_index])
s_c_a.update_state(y_true=test_label[start_index:end_index], y_pred=y_predict)
print('test accuracy:%f' % s_c_a.result())
if __name__ == '__main__':
path = "E:/project_file/dataset/horse-or-human/valid"
train_data,train_label,test_data,test_label=load_image(path,(244,244))
net = model()
train(net,train_data,train_label)
print('------------------------------')
test(test_data,test_label)
模型架构
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (32, 26, 26, 6) 60
_________________________________________________________________
max_pooling2d (MaxPooling2D) (32, 13, 13, 6) 0
_________________________________________________________________
conv2d_1 (Conv2D) (32, 11, 11, 16) 880
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (32, 5, 5, 16) 0
_________________________________________________________________
flatten (Flatten) (32, 400) 0
_________________________________________________________________
dense (Dense) (32, 120) 48120
_________________________________________________________________
dense_1 (Dense) (32, 84) 10164
_________________________________________________________________
dense_2 (Dense) (32, 10) 850
=================================================================
Total params: 60,074
Trainable params: 60,074
Non-trainable params: 0
测试精度
test accuracy:0.988281