PyTorch:基于CNN的数字识别(MNIST)

背景

之前尝试了直接使用全链接直接进行数字识别,效果虽然也说的过去,但是一旦使用我自己写的数字,预测的效果瞬间就不太行了。所以本次又尝试了一下使用卷积神经网络进行数字识别,需要注意的是本博客中的网络结构是随便设置的。

代码链接:https://github.com/Yannnnnnnnnnnn/learnPyTorch/tree/master/trainMNIST/conv

一、训练
train_gpu.py

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms

# 我设计的网络结构,包括三个卷积层和三个全链接层
class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, stride=2, padding=1)
        self.fc1   = nn.Linear( 6144,4096)
        self.fc2   = nn.Linear( 4096,4096) 
        self.fc3   = nn.Linear( 4096,10)
        self.dropout = nn.Dropout(0.25)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        # forward
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(x.size(0), 6144 )
        nn.Dropout()
        x = F.relu(self.fc1(x))
        nn.Dropout()
        x = F.relu(self.fc2(x))
        nn.Dropout()
        x = F.relu(self.fc3(x))
        x = self.softmax(x)
        return x

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST('./MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

# Download and load the test data
testset = datasets.MNIST('./MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=True)


model = myModel()
model.cuda()

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 100
train_losses, test_losses = [], []

for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.cuda()
        labels = labels.cuda()
        
        # TODO: Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            for images, labels in testloader:
                images = images.cuda()
                labels = labels.cuda()
                
                log_ps = model(images)
                test_loss += criterion(log_ps, labels)
                
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
        train_losses.append(running_loss/len(trainloader))
        test_losses.append(test_loss/len(testloader))

        torch.save(model.state_dict(), str(e+1) +'.pth')

        print("Epoch: {}/{}.. ".format( e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
      
        
import matplotlib.pyplot as plt
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)

最后训练时的精度图如下,validation的最终精度为97.2%,比全链接高了5%左右。
在这里插入图片描述


二、预测

预测的代码和之前差不多。

predict.py

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms

# define the CNN architecture
class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, stride=2, padding=1)
        self.fc1   = nn.Linear( 6144,4096)
        self.fc2   = nn.Linear( 4096,4096) 
        self.fc3   = nn.Linear( 4096,10)
        self.dropout = nn.Dropout(0.25)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        # add sequence of convolutional and max pooling layers
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(x.size(0), 6144 )
        nn.Dropout()
        x = F.relu(self.fc1(x))
        nn.Dropout()
        x = F.relu(self.fc2(x))
        nn.Dropout()
        x = F.relu(self.fc3(x))
        x = self.softmax(x)
        return x

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])


model = myModel()



# load model
state_dict = torch.load('53.pth')
model.load_state_dict(state_dict)


import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# 打开图片并转成灰度
image = Image.open('9_M.png')
gray=image.convert('L')

# 显示图片
plt.figure("predict")
plt.imshow(gray)
plt.show()


# 转成tensor
tensor = transform(gray)
tensor = tensor.unsqueeze(0)
inputdata = torch.autograd.Variable(tensor,requires_grad=False)
outputdata = model(inputdata)
ps = torch.exp(outputdata)

top_p, top_class = ps.topk(1, dim=1)

# 输出结果
print(top_p)
print(top_class)

最后在我手写的数字上的预测结果是:

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

1(0.7305) 3(0.9704) 4(0.8309) 3(0.4391) 8(0.9913)

与仅使用全链接的结果相比,使用卷积神经网络还是有明显的效果提升。但是不知为何数字7总是不能正确识别,可能是我写的数字7和MNIST数据集中的差异较大吧。

©️2020 CSDN 皮肤主题: 精致技术 设计师:CSDN官方博客 返回首页