Mnist 手写数字识别
文章目录
前情提要: conda或是python3.9以上版本
conda实现。
显卡的 计算可以用gpu、cpu
由python-> TensorFlow 实现
TensorFlow 是由Google开发的。
安装
%windows\Liunx用户
pip install tensorflow
%Mac用户
pip install tensorflow-macos
pip install tensorflow-metal
代码的实现
获取Mnist数据集
#官网地址下载
#http://yann.lecun.com/exdb/mnist/
#使用TF下载
#先导入包
import tensorflow as tf
# mac用户
import keras
import matplotlib.pyplot as plt
# windows用户 Linux
import tensorflow.keras as keras
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data("Absolute_path/data/mnist.npz")
展示部分数据集
def showImages(x_train, y_train):
import random
from matplotlib import pyplot as plt
plt.figure(figsize=(8, 8))
for i in range(9):
x = random.randint(0, len(x_train))
img, label = x_train[x], y_train[x]
ax = plt.subplot(3, 3, i + 1)
ax.imshow(img)
ax.set_title(f"{label}")
ax.set_xticks([])
ax.set_yticks([])
plt.show()
操作数据集
import tensorflow as tf
# mac用户
import keras
import matplotlib.pyplot as plt
# windows用户 Linux
import tensorflow.keras as keras
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data("Absolute_Path/data/mnist.npz")# 使用绝对路径
def showImages(x_train, y_train):
import random
from matplotlib import pyplot as plt
plt.figure(figsize=(8, 8))
for i in range(9):
x = random.randint(0, len(x_train))
img, label = x_train[x], y_train[x]
ax = plt.subplot(3, 3, i + 1)
ax.imshow(img)
ax.set_title(f"{label}")
ax.set_xticks([])
ax.set_yticks([])
plt.show()
x_train, x_test = x_train / 255., x_test / 255.
showImages(x_train, y_train)
# 添加通道
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
train_ds = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(10000).batch(32) # 将训练集存入内存, 加快读写速度
该片段代码是对训练集做相应的操作
创建网络模型
# 网络结构
class MnistNet(keras.Model):
def __init__(self):
super(MnistNet, self).__init__()
# 构建网络模型
self.conv1 = keras.Sequential([
keras.layers.Conv2D(32, 3, activation="relu"), # 激活函数 为 relu
keras.layers.Flatten(),
keras.layers.Dense(128, activation="relu"), # 全连接层
keras.layers.Dense(10) # 全连接层
])
# pytorch forward函数
def call(self, inputs, training=None, mask=None): # 每次训练时调用 inputs 输入的数据
return self.conv1(inputs)
实例化网络
# 实例化网络
def createNet(path=None):
model = MnistNet()
# 损失函数
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 优化器
# windows / linux
# keras.optimizers.Adam(learning_rate=0.001)
# mac
optimizer = keras.optimizers.legacy.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"]) # 配置网络
if path is not None: # 加载训练好的模型
model.load_weights(path)
return model
训练网络
model_1 = createNet()
model_1.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test)) # 训练网络
查看网络结构
# 网络结构
print(model_1.summary)
验证
# 测试数据集
model_1.evaluate(x_test, y_test, verbose=2)
保存
# 按照自动保存节点保存
model = createNet()
# 训练集
# 创建节点目录
checkpoints_path = "Mnist/mnist--{epoch:04d}.cpkt"
checkpoints_dir = os.path.dirname(checkpoints_path)
# 创建保存节点
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoints_path, save_weights_only=True, period=2, verbose=1)
# 调用保存节点
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, callbacks=[cp_callback])
model.evaluate(x_test, y_test, verbose=2) # 测试集校验
加载
# 读取节点目录
latest = tf.train.latest_checkpoint(checkpoints_dir)
model.load_weigths(latest).expect_partial()
单张图片测试
需要设置图片通道为 BCHW格式 即 (1,28,28,1)
# 单张测试
x = x_test[0][tf.newaxis, ...].astype('float32') # 增加通道
print((x.shape))
predictions = model_1.predict(x) # 预测结果
print(predictions)
plt.imshow(x_test[0])
plt.title(numpy.argmax(predictions)) # 获取结果的下标索引 及预测值
plt.show()
由python-> Pytorch实现
安装
# mac
pip install torch torchvision torchaudio
# windows
# 需要查看你的cuda版本去下载相应的pytorch版本
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu版本
# 若是不需要使用gpu计算
pip install torch torchvision torchaudio
代码的实现
下载数据集
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
train_loader = DataLoader(
datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3001,))
])))
test_loader = DataLoader(
datasets.MNIST('./data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3001,))
])))
创建网络模型
class Net(nn.Module):
def __init__(self):
super().__init__()
self.sequential = nn.Sequential(
nn.Linear(28 * 28, 256),
nn.ReLU(),
nn.Linear(256, 64),
nn.ReLU(),
nn.Linear(64, 10),
nn.Softmax(dim=1)
)
def forward(self, x):
out = self.sequential(x)
return out
实例化网络
# 实例化网络
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Net().to(device=device)
epochs = 10
optimzer = optim.Adam(net.parameters(), lr=0.0001)
训练
# 训练
net.train()
for epoch in range(1, epochs + 1):
correct = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device=device), target.to(device=device)
optimzer.zero_grad() # 清空优化器
data = data.view(data.size(0), 28 * 28) # 数据通道做处理
output = net(data) # 训练
target_onehot = F.one_hot(target, num_classes=10).type(torch.FloatTensor).to(device) # 对数据标签做处理
loss = F.mse_loss(output, target_onehot) # 计算损失函数
loss.backward() # 更新损失函数
optimzer.step() # 更新参数
if (batch_idx + 1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
测试
# 测试
net.eval()
test_loss = 0
correct = 0
for batch_idx, (data, target) in enumerate(test_loader):
data, target = data.to(device=device), target.to(device=device)
data = data.view(data.size(0), 28 * 28) # 数据通道做处理
output = net(data) # 测试
target_onehot = F.one_hot(target, num_classes=10).type(torch.FloatTensor).to(device) # 对数据标签做处理
test_loss += F.mse_loss(output, target_onehot) # 计算 损失函数
pred = output.max(1, keepdim=True)[1] # 找到最大值的下标索引
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
保存模型
# 保存权重
torch.save(net.state_dict(), 'mnist.pt') # model是实例化网络的变量名
加载模型
model.load_state_dict(torch.load('mnist.pt')) # model是实例化网络的变量名
测试单张图片
下载cv
pip install opencv-python
图片处理
import cv2 as cv
img = cv.imread("3.png")
print(img.shape)
# 二值化操作
cv.resize(img, (300, 300))
# # 展示灰度,二值化后图像
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY) # 灰度化
res, img = cv.threshold(img, 90, 255, cv.THRESH_BINARY_INV) # 反向二值化
cv.imshow("gray", img)
# img = cv2.resize(img, (140, 140))
img = cv.resize(img, (28, 28)) # 转换成28*28大小
print(img.shape)
img = torch.FloatTensor(img).to(device)
img = torch.unsqueeze(img, dim=0)
img = torch.unsqueeze(img, dim=0)
img = img.view(img.size(0), 28 * 28) # 1 28 28
print(img.shape)
图片预测
pre = net(img)
print(torch.argmax(pre)) # 获取最大值索引坐标的