Pytorch手写数字实验(自己手写识别)

一、概述

该文是基于PyTorch框架,采用CNN卷积神经网络实现手写数字识别,共采用了2个卷积层、两个池化层和三个线性层。(仅使用GPU进行训练,用的ide是pycharm)

本人也是初学者,如果有不对的地方,欢迎各位大佬提出意见和改进。

导入相关的包:

import time
import torch
from torch.utils.data import DataLoader
from torch import nn
import torchvision

二、构建网络模型

首先需要了解cnn网络模型框架

比如传入的数字是‘5’,它的维度为(1,28,28)

首先通过一个5x5的卷积核(卷积层),其通道从1变为10,尺寸从28*28变为24*24.

在通过一个2x2卷积核(最大池化层),其通道没有变,为10,尺寸24*24变为12*12

然后通过一个5x5的卷积核(卷积层),其通道从10变为20,尺寸12*12变为8*8

再通过一个2x2卷积核(最大池化层),其通道没有变,为10,尺寸8*8变为4*4

最后通过一个线性层,通道从320变为10

填充边界----padding 和 步长----stride 通过下面这个公式进行计算

构建网络的代码为:

class Mnist(nn.Module):
    def __init__(self):
        super(Mnist, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Conv2d(10, 20, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Flatten()  # 展平
            , nn.Linear(320, 128) # 320是展平后的结果,20*4*4
            , nn.Linear(128, 64)
            , nn.Linear(64, 10)
            , nn.ReLU()
        )
    
    def forward(self, x):
        x = self.model(x)
        return x

三、数据集的导入

通过torchvision导入Mnists数据集,代码如下:

def loader_data(test_data_train):
    data_set = torchvision.datasets.MNIST('../dataset', test_data_train
                                          , torchvision.transforms.ToTensor()
                                          , download=True)  # 加载数据以及张量化
    return DataLoader(data_set, batch_size=32, shuffle=True)  # 每批32个样本随机打乱

为了简化代码和方便,将导入数据集封装成一个函数。

四、训练

将模型实例化和GPU设定

为了节省训练的时间,采用GPU进行训练。

device = 'cuda'
mnist = Mnist()
mnist.to(device)

采用GPU训练前,首先需要确定是否下载了CUDA驱动,如果没有安装,提前安装好。安装教程请参考:https://blog.csdn.net/qq_35831906/article/details/134349866?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522172423115416800178555617%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=172423115416800178555617&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-134349866-null-null.142^v100^pc_search_result_base1&utm_term=cuda%20pytorch&spm=1018.2226.3001.4187

训练代码如下:

def train(mnist, epoch):  # epoch训练轮数
    best_acc = 0
    train_data = loader_data(test_data_train=True) # 载入训练集
    test_data = loader_data(test_data_train=False) # 载入测试集
    start_time = time.time() # 这里是为了方便查看训练用了多久时间
    print("initial_accuracyL:{}".format(estimate(test_data, mnist)[1]))
    optim = torch.optim.Adam(mnist.parameters(), lr=0.001) # 采用Adam优化器,学习率为1
    for i in range(epoch):
        print('-----第{}轮训练开始-----'.format(i + 1))
        for data in train_data:
            images, targets = data
            images = images.to(device)  
            targets = targets.to(device)  
            mnist.zero_grad()  # 梯度清零
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()  # 交叉熵损失
            loss_.to(device)
            loss = loss_(outPuts, targets)
            loss.backward()  # 反向传播
            optim.step()     # 参数优化
        end_time = time.time()
        total_time = end_time - start_time
        print("accuracy:{}, 用时:{}min{:.2f}s, Loss{}".format(estimate(test_data, mnist)[1]
                                                        , total_time // 60
                                                        , total_time % 60
                                                        , estimate(test_data, mnist)[0]
                                                        ))
        epoch_acc = estimate(test_data, mnist)[1]  
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = mnist.state_dict()  # 这里是为了选出最好的模型
    
    torch.save(best_model, "Mnist.pth") # 保存模型

Adam(Adaptive Moment Estimation)是一种常用的优化算法,特别适用于训练神经网络和深度学习模型。它是一种自适应学习率的优化算法,可以根据不同参数的梯度信息来动态调整学习率,以提高训练的效率和稳定性。

训练整体代码

"""
author: XiaoShu
date: 2024-08-21
"""

import time
import torch
from torch.utils.data import DataLoader
from torch import nn
import torchvision

device = 'cuda'


class Mnist(nn.Module):
    def __init__(self):
        super(Mnist, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 10, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Conv2d(10, 20, 5, 1, 0)
            , nn.MaxPool2d(kernel_size=(2, 2), padding=0, stride=2)
            , nn.Flatten()
            , nn.Linear(320, 128)
            , nn.Linear(128, 64)
            , nn.Linear(64, 10)
            , nn.ReLU()
        )
    
    def forward(self, x):
        x = self.model(x)
        return x


mnist = Mnist()
mnist.to(device)


def loader_data(test_data_train):
    data_set = torchvision.datasets.MNIST('../dataset', test_data_train
                                          , torchvision.transforms.ToTensor(), download=True)  # 加载数据以及张量化
    return DataLoader(data_set, batch_size=32, shuffle=True)  # 每批32个样本随机打乱


# 预测模型性能
def estimate(test_data, mnist):
    yes_num = 0
    total_num = 0
    with torch.no_grad():
        for data in test_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            outPuts = mnist(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            for i, outPut in enumerate(outPuts):
                if torch.argmax(outPut) == targets[i]:
                    yes_num += 1
                total_num += 1
        accuracy = yes_num / total_num
    return loss, accuracy


def train(mnist, epoch):
    best_acc = 0
    train_data = loader_data(test_data_train=True)
    test_data = loader_data(test_data_train=False)
    start_time = time.time()
    print("initial_accuracyL:{}".format(estimate(test_data, mnist)[1]))
    optim = torch.optim.Adam(mnist.parameters(), lr=0.001)
    for i in range(epoch):
        print('-----第{}轮训练开始-----'.format(i + 1))
        for data in train_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            mnist.zero_grad()
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            loss.backward()
            optim.step()
        end_time = time.time()
        total_time = end_time - start_time
        print("accuracy:{}, 用时:{}min{:.2f}s, Loss{}".format(estimate(test_data, mnist)[1]
                                                        , total_time // 60
                                                        , total_time % 60
                                                        , estimate(test_data, mnist)[0]
                                                        ))
        epoch_acc = estimate(test_data, mnist)[1]
        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model = mnist.state_dict()
    
    torch.save(best_model, "Mnist1.pth")


if __name__ == '__main__':
    train(mnist, 100)

五、模型性能评估

代码:

def estimate(test_data, mnist):
    yes_num = 0
    total_num = 0
    with torch.no_grad():
        for data in test_data:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            outPuts = mnist(images.view(-1, 1, 28, 28))
            loss_ = nn.CrossEntropyLoss()
            loss_.to(device)
            loss = loss_(outPuts, targets)
            outPuts = mnist.forward(images.view(-1, 1, 28, 28))
            for i, outPut in enumerate(outPuts):
                if torch.argmax(outPut) == targets[i]:
                    yes_num += 1
                total_num += 1
        accuracy = yes_num / total_num
    return loss, accuracy

六、训练后效果

本文训练了100轮

模型训练好后可以开始测试了,新建一个测试的py文件

测试代码(可视化):

"""
author: XiaoShu
date: 2024-08-21
"""
import torchvision

import train
import torch
from PIL import Image
import matplotlib.pyplot as plt

img_path = '../test_two.png'
image = Image.open(img_path)
image1 = Image.open(img_path)
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((28, 28))
                                               , torchvision.transforms.ToTensor()])

image = transform(image)
image = torch.reshape(image, (-1, 1, 28, 28))
model = train.Mnist()
model.load_state_dict(torch.load('Mnist1.pth', map_location=torch.device('cpu')))
model.eval()
with torch.no_grad():
    outPut = model(image)
    prediction = outPut.argmax().item()
print('识别结果:{}'.format(prediction))

# 可视化
f, a = plt.subplots(1, 1, figsize=(7, 7))
a.set_title('predict:{}'.format(prediction))
a.axis('off')
a.imshow(image1)
plt.show()

我导入图片是4

结果预测也是4

可视化效果如下

七、自己手写的数字

由于自己手写的数字尺寸大小和通道与我们想要输入的图片不同,所以我们需要先对图片进行预处理。

新建一个py文件夹

预处理代码如下:

preprocess = transforms.Compose([
    transforms.Resize((28, 28))  # 尺寸大小
    , transforms.Grayscale(num_output_channels=1)  # 转为灰度图
    , transforms.ToTensor()  # 转换为张量
    , transforms.Normalize((0.1307,),(0.3081,))  # 归一化处理,这里的数据均值和方差是Mnist数据                
                                                    集均值和方差
])

整体代码加上可视化

"""
author: XiaoShu
date: 2024-08-21
"""
from torchvision import transforms
import train
import torch
from PIL import Image
import matplotlib.pyplot as plt

preprocess = transforms.Compose([
    transforms.Resize((28, 28))  # 尺寸大小
    , transforms.Grayscale(num_output_channels=1)  # 转为灰度图
    , transforms.ToTensor()  # 转换为张量
    , transforms.Normalize((0.1307,), (0.3081,))  # 归一化处理
])
# 定义路径
img_path1 = '0.png'
img_path2 = '3.png'
img_path3 = '5.png'
img_path4 = '8.png'
# 加载图片
image1 = Image.open(img_path1)
image2 = Image.open(img_path2)
image3 = Image.open(img_path3)
image4 = Image.open(img_path4)
# 预处理
image_1 = preprocess(image1)
image_2 = preprocess(image2)
image_3 = preprocess(image3)
image_4 = preprocess(image4)
# 修改成模型所需要的维度
image__1 = torch.reshape(image_1, (-1, 1, 28, 28))
image__2 = torch.reshape(image_2, (-1, 1, 28, 28))
image__3 = torch.reshape(image_3, (-1, 1, 28, 28))
image__4 = torch.reshape(image_4, (-1, 1, 28, 28))

model = train.Mnist()

model.load_state_dict(torch.load('Mnist1.pth', map_location=torch.device('cpu')))
model.eval()
with torch.no_grad():
    outPut1 = model(image__1)
    prediction1 = outPut1.argmax().item()
    outPut2 = model(image__2)
    prediction2 = outPut2.argmax().item()
    outPut3 = model(image__3)
    prediction3 = outPut3.argmax().item()
    outPut4 = model(image__4)
    prediction4 = outPut4.argmax().item()
    
    
# print(prediction1)
# print('识别结果:{}'.format(prediction1))

# 可视化
f, a = plt.subplots(2, 2, figsize=(7, 7))
a[0][0].set_title('predict:{}'.format(prediction1))
a[0][0].axis('off')
a[0][0].imshow(image1)

a[0][1].set_title('predict:{}'.format(prediction2))
a[0][1].axis('off')
a[0][1].imshow(image2)

a[1][0].set_title('predict:{}'.format(prediction3))
a[1][0].axis('off')
a[1][0].imshow(image3)

a[1][1].set_title('predict:{}'.format(prediction4))
a[1][1].axis('off')
a[1][1].imshow(image4)

plt.show()

  • 16
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
实验目的: 使用PyTorch框架实现手写数字识别模型,并对模型进行优化,提高识别准确率。 实验步骤: 1. 数据集准备 使用MNIST手写数字数据集,该数据集包含60000个训练样本和10000个测试样本,每个样本都是28x28像素的灰度图像。可以使用PyTorch自带的torchvision.datasets.MNIST类进行数据集的加载。 2. 模型设计与训练 使用PyTorch搭建卷积神经网络模型,对手写数字图像进行分类。具体网络结构如下: ```python class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout2d(0.25) self.dropout2 = nn.Dropout2d(0.5) self.fc1 = nn.Linear(9216, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.conv1(x) x = F.relu(x) x = self.conv2(x) x = F.relu(x) x = F.max_pool2d(x, 2) x = self.dropout1(x) x = torch.flatten(x, 1) x = self.fc1(x) x = F.relu(x) x = self.dropout2(x) x = self.fc2(x) output = F.log_softmax(x, dim=1) return output ``` 模型训练过程: ```python model = Net().to(device) optimizer = optim.Adadelta(model.parameters(), lr=0.1) def train(model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) def test(model, device, test_loader): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) epochs = 10 for epoch in range(1, epochs + 1): train(model, device, train_loader, optimizer, epoch) test(model, device, test_loader) ``` 3. 模型优化 对模型进行优化,提高模型的准确率。可以尝试以下优化方法: - 改变学习率,使用更好的优化器(如Adam等); - 对数据集进行增强,如旋转、平移、缩放等; - 改变网络结构,尝试添加BatchNormalization层、使用更多的卷积层和全连接层等; - 加入正则化,如L1、L2正则化等。 实验结果: 使用上述模型,在MNIST数据集上进行训练,最终得到的准确率为98.89%。可以看出使用PyTorch框架实现手写数字识别是非常方便的。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值