使用PyTorch实现MNIST手写数字识别

标题:使用PyTorch实现MNIST手写数字识别

1.在这篇文章中,我们将使用PyTorch框架来实现一个简单的神经网络模型,用于识别MNIST手写数字数据集。我们将分为以下几个步骤进行:

  1. 导入所需的库和模块
  2. 定义数据预处理和加载器
  3. 创建神经网络模型
  4. 定义损失函数和优化器
  5. 训练模型并测试准确率
  6. 使用训练好的模型识别图像
  7. 将图像转换为NumPy数组并显示

首先,我们需要导入所需的库和模块:

import torch.nn
import numpy as np
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import torch.nn.functional as F
import matplotlib.pyplot as plt

接下来,我们定义数据预处理和加载器:

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))], )
train_dataset = datasets.MNIST(root='./dataset/mnist', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./dataset/mnist', train=False, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

然后,我们创建神经网络模型:

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = torch.nn.Linear(784, 512)
        self.linear2 = torch.nn.Linear(512, 256)
        self.linear3 = torch.nn.Linear(256, 128)
        self.linear4 = torch.nn.Linear(128, 64)
        self.linear5 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        return self.linear5(x)

model = Net()

接下来,我们定义损失函数和优化器:

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

现在,我们可以开始训练模型并测试准确率了:

accuracies = []

def train(epoch):
    running_loss = 0.0
    for batch_idex, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idex % 300 == 299:
            print('[%d,%5d loss:%.3f]' % (epoch + 1, batch_idex + 1, running_loss / 300))
            running_loss = 0.0
    accuracy = test()
    accuracies.append(accuracy)

def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            predicted = torch.argmax(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
    print('Accuracy on test set: %d %%' % (100 * correct / total))
    return accuracy

if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
    plt.figure()
    plt.plot(range(1, 11), accuracies)
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.show()

画出Accuracy变化图像:
在这里插入图片描述

最后,我们使用训练好的模型识别图像,并将图像转换为NumPy数组并显示:

image_path = './image/img_1.png'
image = Image.open(image_path).convert('L')
transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor()])
image = transform(image).unsqueeze(0)
output = model(image)
predicted = torch.argmax(output.data, dim=1)
print('Predicted:', predicted, 'GroundTruth:', 8)

image_array = np.array(image)
image_array = np.squeeze(image_array)
plt.imshow(image_array, cmap='gray')
plt.show()

我们给的图片是8这个手写数字,所以预测为8:
在这里插入图片描述
展示出用于预测的灰度图片:
在这里插入图片描述

通过以上代码,我们成功地实现了一个简单的神经网络模型,用于识别MNIST手写数字数据集。希望这篇文章能帮助你更好地理解如何使用PyTorch进行深度学习任务。

2.完整代码如下:

import torch.nn
import numpy as np
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import torch.nn.functional as F
import matplotlib.pyplot as plt

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))], )
train_dataset = datasets.MNIST(root='./dataset/mnist', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./dataset/mnist', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = torch.nn.Linear(784, 512)
        self.linear2 = torch.nn.Linear(512, 256)
        self.linear3 = torch.nn.Linear(256, 128)
        self.linear4 = torch.nn.Linear(128, 64)
        self.linear5 = torch.nn.Linear(64, 10)
        
        # self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
        # self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
        # self.pooling = torch.nn.MaxPool2d(2)
        # self.fc = torch.nn.Linear(320, 10)

    def forward(self, x):
        # batch_size = x.size(0)
        
        x = x.view(-1, 784)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        
        # x = F.relu(self.pooling(self.conv1(x)))
        # x = F.relu(self.pooling(self.conv2(x)))
        # x = x.view(batch_size, -1)
        # x = self.fc(x)
        
        return self.linear5(x)
        # return x


model = Net()
# 定义device,如果有GPU就用GPU,否则用CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# 将所有模型的parameters and buffers转化为CUDA Tensor.
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

accuracies = []


def train(epoch):
    running_loss = 0.0
    for batch_idex, data in enumerate(train_loader, 0):
        inputs, labels = data
        # 转移数据
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idex % 300 == 299:
            print('[%d,%5d loss:%.3f]' % (epoch + 1, batch_idex + 1, running_loss / 300))
            running_loss = 0.0
    accuracy = test()
    accuracies.append(accuracy)


def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predicted = torch.argmax(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
    print('Accuracy on test set: %d %%' % (100 * correct / total))
    return accuracy


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
    plt.figure()
    plt.plot(range(1, 11), accuracies)
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.show()
# 用训练好的模型识别./image/img_1.png的数字
model.eval()
image_path = './image/img_1.png'
image = Image.open(image_path).convert('L')
transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor()])
image = transform(image).unsqueeze(0).to(device)
output = model(image).to(device)
predicted = torch.argmax(output.data, dim=1)
print('Predicted:', predicted, 'GroundTruth:', 8)
# 将图像转换为 NumPy 数组
image_array = np.array(image.cpu())
# 使用 squeeze() 函数将图像数组转换为二维数组
image_array = np.squeeze(image_array)
# 显示出用于预测的图片
plt.imshow(image_array, cmap='gray')
plt.show()

运行代码结果:
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述

  • 12
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值