Pytorch手写数字实验（自己手写识别）

p=a(1＋cosθ)

已于 2024-08-21 20:54:50 修改

阅读量499

点赞数 16

文章标签： pytorch 人工智能 python

于 2024-08-21 20:44:08 首次发布

本文链接：https://blog.csdn.net/hgjgklkj/article/details/141396772

版权

一、概述

该文是基于PyTorch框架，采用CNN卷积神经网络实现手写数字识别，共采用了2个卷积层、两个池化层和三个线性层。（仅使用GPU进行训练，用的ide是pycharm）

本人也是初学者，如果有不对的地方，欢迎各位大佬提出意见和改进。

导入相关的包：

import time
import torch
from torch.utils.data import DataLoader
from torch import nn
import torchvision

二、构建网络模型

首先需要了解cnn网络模型框架：

比如传入的数字是‘5’，它的维度为（1，28，28）

首先通过一个5x5的卷积核（卷积层），其通道从1变为10，尺寸从28*28变为24*24.

在通过一个2x2卷积核（最大池化层），其通道没有变，为10，尺寸24*24变为12*12

然后通过一个5x5的卷积核（卷积层），其通道从10变为20，尺寸12*12变为8*8

再通过一个2x2卷积核（最大池化层），其通道没有变，为10，尺寸8*8变为4*4

最后通过一个线性层，通道从320变为10

填充边界----padding 和步长----stride 通过下面这个公式进行计算

构建网络的代码为：

class Mnist(nn.Module):
    def __init__(self):
        super(Mnist, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Conv2d(10, 20, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Flatten()  # 展平
            , nn.Linear(320, 128) # 320是展平后的结果，20*4*4
            , nn.Linear(128, 64)
            , nn.Linear(64, 10)
            , nn.ReLU()
        )
    
    def forward(self, x):
        x = self.model(x)
        return x

三、数据集的导入

通过torchvision导入Mnists数据集，代码如下：

def loader_data(test_data_train):
    data_set = torchvision.datasets.MNIST('../dataset', test_data_train
                                          , torchvision.transforms.ToTensor()
                                          , download=True)  # 加载数据以及张量化
    return DataLoader(data_set, batch_size=32, shuffle=True)  # 每批32个样本随机打乱

为了简化代码和方便，将导入数据集封装成一个函数。

四、训练

将模型实例化和GPU设定

为了节省训练的时间，采用GPU进行训练。

device = 'cuda'
mnist = Mnist()
mnist.to(device)

采用GPU训练前，首先需要确定是否下载了CUDA驱动,如果没有安装，提前安装好。安装教程请参考：https://blog.csdn.net/qq_35831906/article/details/134349866?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522172423115416800178555617%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=172423115416800178555617&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-134349866-null-null.142^v100^pc_search_result_base1&utm_term=cuda%20pytorch&spm=1018.2226.3001.4187

训练代码如下：

def train(mnist, epoch):  # epoch训练轮数
    best_acc = 0
    train_data = loader_data(test_data_train=True) # 载入训练集
    test_data = loader_data(test_data_train=False) # 载入测试集
    start_time = time.time() # 这里是为了方便查看训练用了多久时间
    print("initial_accuracyL:{}".format(estimate(test_data, mnist)[1]))
    optim = torch.optim.Adam(mnist.parameters(), lr=0.001) # 采用Adam优化器，学习率为1
    for i in range(epoch):
        print('-----第{}轮训练开始-----'.format(i + 1))
        for data in train_data:
            images, targets = data
            images = images.to(device)  
            targets = targets.to(device)  
            mnist.zero_grad()  # 梯度清零
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()  # 交叉熵损失
            loss_.to(device)
            loss = loss_(outPuts, targets)
            loss.backward()  # 反向传播
            optim.step()     # 参数优化
        end_time = time.time()
        total_time = end_time - start_time
        print("accuracy:{}, 用时：{}min{:.2f}s, Loss{}".format(estimate(test_data, mnist)[1]
                                                        , total_time // 60
                                                        , total_time % 60
                                                        , estimate(test_data, mnist)[0]
                                                        ))
        epoch_acc = estimate(test_data, mnist)[1]  
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = mnist.state_dict()  # 这里是为了选出最好的模型
    
    torch.save(best_model, "Mnist.pth") # 保存模型

Adam（Adaptive Moment Estimation）是一种常用的优化算法，特别适用于训练神经网络和深度学习模型。它是一种自适应学习率的优化算法，可以根据不同参数的梯度信息来动态调整学习率，以提高训练的效率和稳定性。

训练整体代码

"""
author: XiaoShu
date: 2024-08-21
"""

import time
import torch
from torch.utils.data import DataLoader
from torch import nn
import torchvision

device = 'cuda'


class Mnist(nn.Module):
    def __init__(self):
        super(Mnist, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Conv2d(10, 20, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Flatten()
            , nn.Linear(320, 128)
            , nn.Linear(128, 64)
            , nn.Linear(64, 10)
            , nn.ReLU()
        )
    
    def forward(self, x):
        x = self.model(x)
        return x


mnist = Mnist()
mnist.to(device)


def loader_data(test_data_train):
    data_set = torchvision.datasets.MNIST('../dataset', test_data_train
                                          , torchvision.transforms.ToTensor(), download=True)  # 加载数据以及张量化
    return DataLoader(data_set, batch_size=32, shuffle=True)  # 每批32个样本随机打乱


# 预测模型性能
def estimate(test_data, mnist):
    yes_num = 0
    total_num = 0
    with torch.no_grad():
        for data in test_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            outPuts = mnist(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            for i, outPut in enumerate(outPuts):
                if torch.argmax(outPut) == targets[i]:
                    yes_num += 1
                total_num += 1
        accuracy = yes_num / total_num
    return loss, accuracy


def train(mnist, epoch):
    best_acc = 0
    train_data = loader_data(test_data_train=True)
    test_data = loader_data(test_data_train=False)
    start_time = time.time()
    print("initial_accuracyL:{}".format(estimate(test_data, mnist)[1]))
    optim = torch.optim.Adam(mnist.parameters(), lr=0.001)
    for i in range(epoch):
        print('-----第{}轮训练开始-----'.format(i + 1))
        for data in train_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            mnist.zero_grad()
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            loss.backward()
            optim.step()
        end_time = time.time()
        total_time = end_time - start_time
        print("accuracy:{}, 用时：{}min{:.2f}s, Loss{}".format(estimate(test_data, mnist)[1]
                                                        , total_time // 60
                                                        , total_time % 60
                                                        , estimate(test_data, mnist)[0]
                                                        ))
        epoch_acc = estimate(test_data, mnist)[1]
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = mnist.state_dict()
    
    torch.save(best_model, "Mnist1.pth")


if __name__ == '__main__':
    train(mnist, 100)

五、模型性能评估

代码：

def estimate(test_data, mnist):
    yes_num = 0
    total_num = 0
    with torch.no_grad():
        for data in test_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            outPuts = mnist(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            for i, outPut in enumerate(outPuts):
                if torch.argmax(outPut) == targets[i]:
                    yes_num += 1
                total_num += 1
        accuracy = yes_num / total_num
    return loss, accuracy

六、训练后效果

本文训练了100轮

模型训练好后可以开始测试了，新建一个测试的py文件

测试代码(可视化)：

"""
author: XiaoShu
date: 2024-08-21
"""
import torchvision

import train
import torch
from PIL import Image
import matplotlib.pyplot as plt

img_path = '../test_two.png'
image = Image.open(img_path)
image1 = Image.open(img_path)
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((28, 28))
                                               , torchvision.transforms.ToTensor()])

image = transform(image)
image = torch.reshape(image, (-1, 1, 28, 28))
model = train.Mnist()
model.load_state_dict(torch.load('Mnist1.pth', map_location=torch.device('cpu')))
model.eval()
with torch.no_grad():
    outPut = model(image)
    prediction = outPut.argmax().item()
print('识别结果：{}'.format(prediction))

# 可视化
f, a = plt.subplots(1, 1, figsize=(7, 7))
a.set_title('predict:{}'.format(prediction))
a.axis('off')
a.imshow(image1)
plt.show()

我导入图片是4

结果预测也是4

可视化效果如下

七、自己手写的数字

由于自己手写的数字尺寸大小和通道与我们想要输入的图片不同，所以我们需要先对图片进行预处理。

新建一个py文件夹

预处理代码如下：

preprocess = transforms.Compose([
    transforms.Resize((28, 28))  # 尺寸大小
    , transforms.Grayscale(num_output_channels=1)  # 转为灰度图
    , transforms.ToTensor()  # 转换为张量
    , transforms.Normalize((0.1307,),(0.3081,))  # 归一化处理,这里的数据均值和方差是Mnist数据                
                                                    集均值和方差
])

整体代码加上可视化

"""
author: XiaoShu
date: 2024-08-21
"""
from torchvision import transforms
import train
import torch
from PIL import Image
import matplotlib.pyplot as plt

preprocess = transforms.Compose([
    transforms.Resize((28, 28))  # 尺寸大小
    , transforms.Grayscale(num_output_channels=1)  # 转为灰度图
    , transforms.ToTensor()  # 转换为张量
    , transforms.Normalize((0.1307,), (0.3081,))  # 归一化处理
])
# 定义路径
img_path1 = '0.png'
img_path2 = '3.png'
img_path3 = '5.png'
img_path4 = '8.png'
# 加载图片
image1 = Image.open(img_path1)
image2 = Image.open(img_path2)
image3 = Image.open(img_path3)
image4 = Image.open(img_path4)
# 预处理
image_1 = preprocess(image1)
image_2 = preprocess(image2)
image_3 = preprocess(image3)
image_4 = preprocess(image4)
# 修改成模型所需要的维度
image__1 = torch.reshape(image_1, (-1, 1, 28, 28))
image__2 = torch.reshape(image_2, (-1, 1, 28, 28))
image__3 = torch.reshape(image_3, (-1, 1, 28, 28))
image__4 = torch.reshape(image_4, (-1, 1, 28, 28))

model = train.Mnist()

model.load_state_dict(torch.load('Mnist1.pth', map_location=torch.device('cpu')))
model.eval()
with torch.no_grad():
    outPut1 = model(image__1)
    prediction1 = outPut1.argmax().item()
    outPut2 = model(image__2)
    prediction2 = outPut2.argmax().item()
    outPut3 = model(image__3)
    prediction3 = outPut3.argmax().item()
    outPut4 = model(image__4)
    prediction4 = outPut4.argmax().item()
    
    
# print(prediction1)
# print('识别结果：{}'.format(prediction1))

# 可视化
f, a = plt.subplots(2, 2, figsize=(7, 7))
a[0][0].set_title('predict:{}'.format(prediction1))
a[0][0].axis('off')
a[0][0].imshow(image1)

a[0][1].set_title('predict:{}'.format(prediction2))
a[0][1].axis('off')
a[0][1].imshow(image2)

a[1][0].set_title('predict:{}'.format(prediction3))
a[1][0].axis('off')
a[1][0].imshow(image3)

a[1][1].set_title('predict:{}'.format(prediction4))
a[1][1].axis('off')
a[1][1].imshow(image4)

plt.show()

p=a(1＋cosθ)

关注

16
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
Pytorch手写数字实验（自己手写识别）

该文是基于PyTorch框架，采用CNN卷积神经网络实现手写数字识别，共采用了2个卷积层、两个池化层和两个线性层。（仅使用GPU进行训练，用的ide是pycharm）本人也是初学者，如果有不对的地方，欢迎各位大佬提出意见和改进。
复制链接

扫一扫