【学习】(MNIST)手写数字识别简单实现_mnist手写数字识别-CSDN博客

本文链接：https://blog.csdn.net/MayYou_SSS/article/details/134942107

本文介绍了如何使用TensorFlow在Kaggle的DigitRecognizer竞赛中实现手写数字识别，包括使用Sequential模型搭建网络，添加正则化和Dropout层提升性能，以及比较了基本的Dense网络与LeNet的差异。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

kaggle排名：Digit Recognizer | Kaggle

参考视频：手把手完成mnist手写数字识别_哔哩哔哩_bilibili

一些函数的用法：函数式 API | TensorFlow Core (google.cn)

一个隐含层

from keras.utils import to_categorical
from keras import models, layers, regularizers
from keras.optimizers import RMSprop
from keras.datasets import mnist
import matplotlib.pyplot as plt

# 从网络加载数据集，最好开梯子
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 从本地加载数据集
# path = "../mnist.npz" # 修改为数据实际路径
# (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data(path)
# https://blog.csdn.net/guotianqing/article/details/109229950

# 简单看看数据集内容
# print(train_images.shape, test_images.shape)
# print(train_images[0])
# print(train_labels[0])
# plt.imshow(train_images[0])
# plt.show()

# 压缩为行向量，转换为float数据类型
train_images = train_images.reshape((60000, 28*28)).astype('float')
test_images = test_images.reshape((10000, 28*28)).astype('float')

# 独热编码
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# 测试一下
# print(train_labels[0]) 

# 网络搭建
# https://blog.csdn.net/xuechanba/article/details/124952150
network = models.Sequential()  
# Sequential 是 Keras 中的一种神经网络框架，可以被认为是一个容器，其中封装了神经网络的结构。
# Sequential 模型只有一组输入和一组输出。各层之间按照先后顺序进行堆叠。
# 前面一层的输出就是后面一次的输入。通过不同层的堆叠，构建出神经网络。
# network是一个空的容器，现在就需要向其中添加层，构成神经网络。Sequential 模型的核心操作。 
network.add(layers.Dense(units=15, activation='relu', input_shape=(28*28, ), ))
# network.add()添加层
# layers中内置了很多常见的层，例如：全连接层、卷积层、池化层等。
# Dense 表示全连接层
# 参数 units 指定该网络层中的神经元个数，
# 参数 activation 表示激活函数，以字符串的形式给出，包括relu、softmax、Sigmoid、Tanh 等。
# 参数 input_shape 表示输入数据的形状。
# 全连接神经网络中的第一层接收来自输入层的数据，必须要指明形状，
# 后面的层接收前面一层的输出，不用再指明输入数据的形状。
network.add(layers.Dense(units=10, activation='softmax'))
# 配置训练
network.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# loss损失函数，optimizer优化器，metrics评测指标。
# 训练模型
network.fit(train_images, train_labels, epochs=20, batch_size=128, verbose=2)
# 训练集的输入特征，训练集的标签，verbose用来控制输出信息显示的方式。
# verbose= 0不输出，1输出进度条，2每个epoch一行记录，默认为1。

# print(network.summary())  pic1
# network.summary() 查看网络的结构和参数信息。

y_pre = network.predict(test_images[:5])
# predict 方法可以使用模型进行分类
print(y_pre, test_labels[:5])
test_loss, test_accuracy = network.evaluate(test_images, test_labels)
# 使用 evaluate 方法来评估模型的性能。
print('test_loss', test_loss, 'test_accuracy', test_accuracy)

网络结构，参数数量

训练结果

一个隐含层（正则化和dropout）

from keras.utils import to_categorical
from keras import models, layers, regularizers
from keras.optimizers import RMSprop
from keras.datasets import mnist
import matplotlib.pyplot as plt

# 加载数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# print(train_images.shape, test_images.shape)
# print(train_images[0])
# print(train_labels[0])
# plt.imshow(train_images[0])
# plt.show()
# 压缩为行向量
train_images = train_images.reshape((60000, 28*28)).astype('float')
test_images = test_images.reshape((10000, 28*28)).astype('float')
# 独热编码
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# print(train_labels[0])
# network = models.Sequential()
# network.add(layers.Dense(units=15, activation='relu', input_shape=(28*28, ), ))
# network.add(layers.Dense(units=10, activation='softmax'))
# network.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# network.fit(train_images, train_labels, epochs=20, batch_size=128, verbose=2)

network = models.Sequential()
network.add(layers.Dense(units=128, activation='relu', input_shape=(28*28, ),
                         kernel_regularizer=regularizers.l1(0.0001)))  # L1正则化
network.add(layers.Dropout(0.01))  # dropout随机失活
network.add(layers.Dense(units=32, activation='relu'))
network.add(layers.Dropout(0.01))
network.add(layers.Dense(units=10, activation='softmax'))
network.compile(optimizer=RMSprop(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy'])
network.fit(train_images, train_labels, epochs=30, batch_size=128, verbose=2)

# print(network.summary())  pic1

y_pre = network.predict(test_images[:5])
print(y_pre, test_labels[:5])
test_loss, test_accuracy = network.evaluate(test_images, test_labels)
print('test_loss', test_loss, 'test_accuracy', test_accuracy)

网络结构：

训练结果：

使用LeNet

from keras.utils import to_categorical
from keras import models, layers, regularizers
from keras.optimizers import RMSprop
from keras.datasets import mnist
import matplotlib.pyplot as plt

# 加载数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 搭建LeNet网络
def LeNet():
    network = models.Sequential()
    network.add(layers.Conv2D(filters=6, kernel_size=(3, 3),activation='relu', input_shape=(28, 28, 1)))
    network.add(layers.AveragePooling2D(2, 2))
    network.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
    network.add(layers.AveragePooling2D(2, 2))
    network.add(layers.Conv2D(filters=120, kernel_size=(3, 3), activation='relu'))
    network.add(layers.Flatten())
    network.add(layers.Dense(84, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))
    return network
network = LeNet()
network.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
print(network.summary())  # pic1
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float')/255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float')/255
# 独热编码
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

network.fit(train_images, train_labels, epochs=10, batch_size=128, verbose=2)
y_pre = network.predict(test_images[:5])
print(y_pre, test_labels[:5])
test_loss, test_accuracy = network.evaluate(test_images, test_labels)
print('test_loss', test_loss, 'test_accuracy', test_accuracy)

log

2023-12-12 10:54:27.494559: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
WARNING:absl:`lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.RMSprop.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d (Average  (None, 13, 13, 6)         0         
 Pooling2D)                                                      
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 16)        880       
                                                                 
 average_pooling2d_1 (Avera  (None, 5, 5, 16)          0         
 gePooling2D)                                                    
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 120)         17400     
                                                                 
 flatten (Flatten)           (None, 1080)              0         
                                                                 
 dense (Dense)               (None, 84)                90804     
                                                                 
 dense_1 (Dense)             (None, 10)                850       
                                                                 
=================================================================
Total params: 109994 (429.66 KB)
Trainable params: 109994 (429.66 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/10
469/469 - 4s - loss: 0.3474 - accuracy: 0.8942 - 4s/epoch - 8ms/step
Epoch 2/10
469/469 - 3s - loss: 0.0971 - accuracy: 0.9703 - 3s/epoch - 7ms/step
Epoch 3/10
469/469 - 3s - loss: 0.0644 - accuracy: 0.9804 - 3s/epoch - 7ms/step
Epoch 4/10
469/469 - 3s - loss: 0.0474 - accuracy: 0.9859 - 3s/epoch - 7ms/step
Epoch 5/10
469/469 - 3s - loss: 0.0390 - accuracy: 0.9879 - 3s/epoch - 7ms/step
Epoch 6/10
469/469 - 3s - loss: 0.0324 - accuracy: 0.9900 - 3s/epoch - 7ms/step
Epoch 7/10
469/469 - 3s - loss: 0.0262 - accuracy: 0.9918 - 3s/epoch - 7ms/step
Epoch 8/10
469/469 - 3s - loss: 0.0225 - accuracy: 0.9929 - 3s/epoch - 7ms/step
Epoch 9/10
469/469 - 3s - loss: 0.0203 - accuracy: 0.9937 - 3s/epoch - 7ms/step
Epoch 10/10
469/469 - 3s - loss: 0.0168 - accuracy: 0.9949 - 3s/epoch - 7ms/step
1/1 [==============================] - 0s 55ms/step
[[3.01864367e-10 9.17240936e-11 1.53738000e-09 1.44232235e-08
  3.60069049e-12 3.10055481e-10 1.11714185e-18 1.00000000e+00
  2.56001245e-11 2.30264074e-08]
 [8.97978802e-10 3.14025465e-06 9.99996901e-01 1.20213154e-11
  4.27789411e-16 3.62771418e-13 2.09481651e-12 7.15738883e-13
  1.35102707e-09 1.07217621e-17]
 [3.56921930e-07 9.99976277e-01 4.39466731e-07 1.81662774e-08
  2.06501545e-06 2.51831779e-07 5.92849210e-06 2.30238652e-06
  1.23167438e-05 1.24050842e-07]
 [9.99999762e-01 1.20103237e-15 1.28844851e-11 2.22669279e-12
  2.31126975e-11 5.59162605e-10 6.03151662e-08 8.46737749e-11
  2.34630501e-12 7.35331440e-08]
 [2.53502641e-09 4.85985197e-10 5.43723757e-13 1.01204440e-12
  9.99987006e-01 3.27354699e-10 2.28736224e-10 1.06788411e-09
  1.14983822e-09 1.30280223e-05]] [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
313/313 [==============================] - 0s 1ms/step - loss: 0.0339 - accuracy: 0.9906
test_loss 0.033884115517139435 test_accuracy 0.9905999898910522

Process finished with exit code 0