MNIST机器学习入门
环境介绍:
Python版本:Python 3.8.16
TensorFlow版本:2.6.0
MNIST数据集
简介
MNIST数据集主要由一些手写数字的图片和相应的标签组成,图片一共由10类,分别对应从0-9,共10个阿拉伯数字。如下图所示:
下载MNIST数据集
要下载MNIST数据集大致有两个方向:
从网站下载MNIST数据集
网站下载链接:http://yann.lecun.com/exdb/mnist/
用TensorFlow自带的工具去下载MNIST数据集(推荐这个)
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
加载MNIST数据集
从网上下载的四个压缩包的加载数据方法:
import gzip
import numpy as np
import os
def load_data(path, kind='train'):
labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte.gz')
images_path = os.path.join(path, f'{kind}-images-idx3-ubyte.gz')
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
return images, labels
x_train, y_train = load_data('./MNIST_data', kind='train')
x_test, y_test = load_data('./MNIST_data', kind='t10k')
x_train = x_train.reshape((60000, 28, 28))
x_test = x_test.reshape((10000, 28, 28))
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
print("Training data shape:", x_train.shape)
print("Training labels shape:", y_train.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)
用TensorFlow自带的工具加载MNIST数据集:(推荐这个)
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
print("Training data shape:", x_train.shape)
print("Training labels shape:", y_train.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)
将MNIST数据集保存为图片
在原始的MNIST数据集中,每张图片都有一个28x28的矩阵表示。
将四个压缩包的数据集保存为图片
import gzip
import numpy as np
import os
from PIL import Image
def load_data(path, kind='train'):
labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte.gz')
images_path = os.path.join(path, f'{kind}-images-idx3-ubyte.gz')
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
return images, labels
x_train, y_train = load_data('./MNIST_data', kind='train')
x_test, y_test = load_data('./MNIST_data', kind='t10k')
x_train = x_train.reshape((60000, 28, 28))
x_test = x_test.reshape((10000, 28, 28))
if not os.path.exists('mnist_images'):
os.makedirs('mnist_images')
for i in range(10):
img = Image.fromarray(x_train[i], mode='L')
img.save(f'mnist_images/{i}.png')
TensorFlow自带工具加载数据集并保存为图片
import tensorflow as tf
from PIL import Image
import os
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
if not os.path.exists('mnist_images'):
os.makedirs('mnist_images')
for i in range(10):
img = Image.fromarray(x_train[i], mode='L')
img.save(f'mnist_images/{i}.png')
正式编程
从这里开始,以TensorFlow为工具,写一个手写体数字识别程序。
利用Softmax回归
Softmax回归是一个线性的多类分类模型,它是由Logistic回归模型转化而来的,Logistic回归模型为两分类模型。
在这个代码中模型主要由两个层,一个是展平层,一个是全连接层,展平层将图像数据从二维矩阵变成一维向量,该向量长度为 28x28 = 784,在全连接层中,每个神经元都与前一层的每个输入连接,因此该层包含 784x10 = 7840 个连接权重和 10 个偏置项,最后,该输出向量通过 softmax 函数进行激活,将输出转换为概率分布,表示输入图像属于 0 到 9 这 10 个数字的概率分布
读取四个MNIST压缩文件编写
import gzip
import numpy as np
import os
import tensorflow as tf
def load_data(path, kind='train'):
labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte.gz')
images_path = os.path.join(path, f'{kind}-images-idx3-ubyte.gz')
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
return images, labels
x_train, y_train = load_data('./MNIST_data', kind='train')
x_test, y_test = load_data('./MNIST_data', kind='t10k')
x_train = x_train.reshape((60000, 28, 28))
x_test = x_test.reshape((10000, 28, 28))
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)
y_pred = model.predict(x_test)
print("Prediction:", np.argmax(y_pred[0]))
print("Label:", y_test[0])
利用TensorFlow自带的工具加载MNIST编写
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)
y_pred = model.predict(x_test)
print("Prediction:", np.argmax(y_pred[0]))
print("Label:", y_test[0])
运行结果:
利用两层卷积网络分类
两个卷积层,每个卷积层后面跟着一个最大池化层,然后是一个展平层和两个全连接层
通过卷积和池化操作将输入的图像数据进行特征提取,并通过全连接层将提取的特征映射到对应的分类得分
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
train_images = x_train / 255.0
test_images = x_test / 255.0
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_images.reshape(-1, 28, 28, 1), y_train, epochs=5,
validation_data=(test_images.reshape(-1, 28, 28, 1), y_test))
test_loss, test_acc = model.evaluate(test_images.reshape(-1, 28, 28, 1), y_test, verbose=2)
print('Test accuracy:', test_acc)
y_pred = model.predict(x_test[0].reshape((1, 28, 28, 1)))
print("Prediction:", np.argmax(y_pred[0]))
print("Label:", y_test[0])
运行结果