Pytorch手写数字识别

本文档展示了如何使用PyTorch训练LeNet5卷积神经网络模型来识别手写数字数据集。数据集包含1000张左右的图片,经过预处理后输入模型进行训练。训练过程中采用数据加载器、SGD优化器和CrossEntropyLoss损失函数,当准确率达到95%时保存模型。此外,还提供了预测函数用于验证模型性能和对单张图片的预测。
摘要由CSDN通过智能技术生成

1. 数据集


每个目录下对应数字1000张左右

2. 训练

#coding=utf-8
import torchvision
from torchvision import datasets, transforms 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import matplotlib.pyplot as plt
import cv2

class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, padding=2) #pytorch文档,torch.nn.Conv2d函数参数定义
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120) #全连接层就是线性层
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.tanh(self.conv1(x)), (2, 2))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = F.max_pool2d(F.tanh(self.conv2(x)), (2, 2))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = x.view(-1, self.num_flat_features(x))   # view函数用来改变维度,-1是占位符

        x = F.tanh(self.fc1(x))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = F.tanh(self.fc2(x))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = self.fc3(x)
        return x

        # 定义num_flat_features函数进行尺度的变换
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

def train(epoch):

    batch_size = 20
    dataset = datasets.ImageFolder(
       'data',
       transforms.Compose([
            transforms.Grayscale(num_output_channels=3),
            #transforms.RandomResizedCrop(28),
            transforms.Resize((28, 28)),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
       ])
    )
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_size=batch_size,
                                              
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                            
                                              shuffle=False)

    images, labels = next(iter(train_loader))
    img = torchvision.utils.make_grid(images)

    model = LeNet5()
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    model.train()   
    device = torch.device('cuda')
    model.to(device)
    for epoch in range(epoch):
        sum_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device),labels.to(device)
            optimizer.zero_grad()  #将梯度归零
            outputs = model(inputs)  #将数据传入网络进行前向运算
            loss = criterion(outputs, labels)  #得到损失函数
            loss.backward()  #反向传播
            optimizer.step()  #通过梯度做一步参数更新
            test_error_count = 0.0
            #print ("runing",inputs.shape)
            sum_loss += loss.item()
            test_acc = 0
            if i % 100 == 0:
                for images, labels in iter(test_loader):
                    images = Variable(images).cuda()
                    labels = Variable(labels).cuda()
                    output = model(images)
                    labels = (labels)
                    predicted = (output.argmax(1)) 
                    test_acc +=  (predicted == labels).sum()                 

                ave_test_acc = test_acc.item()/len(dataset)
                #print("---------------",test_acc.item(),len(dataset),"---------------")
                print('[%d,%d] loss:%.03f acc:%.04f' %
                      (epoch + 1, i + 1, sum_loss / 100,ave_test_acc))
            if ave_test_acc>0=.95:
               torch.save(model.state_dict(), "model_.pth")
    torch.save(model.state_dict(), "model_.pth")
               
def main():
   train(10)


if __name__ == "__main__":
    main()

在这里插入图片描述

思路:
首先在data目录下放入分类目录,对应目录存放对应文件;
使用迭代器存放加载数据集,节省空间;
选择卷积神经网络模型进行训练;
计算准确率及loss值。
当准确率达到95%时保存文件,当迭代结束时再次保存文件,防止结束时准确率不足未保存模型。
该训练集及测试集采用了同一个,共有10000张40*40的手写数字图片。

3. 预测

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import torch.nn.functional as F
import random
from torch.autograd import Variable



class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, padding=2) #pytorch文档,torch.nn.Conv2d函数参数定义
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120) #全连接层就是线性层
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.tanh(self.conv1(x)), (2, 2))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = F.max_pool2d(F.tanh(self.conv2(x)), (2, 2))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = x.view(-1, self.num_flat_features(x))   # view函数用来改变维度,-1是占位符

        x = F.tanh(self.fc1(x))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = F.tanh(self.fc2(x))
        x = F.dropout(x, p = 0.3, training=self.training)
        x = self.fc3(x)
        return x

        # 定义num_flat_features函数进行尺度的变换
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features



def predict_image():
    batch_size = 20
    dataset = datasets.ImageFolder(
       'data',
       transforms.Compose([
            transforms.Grayscale(num_output_channels=3),
            #transforms.RandomResizedCrop(28),
            transforms.Resize((28, 28)),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
       ])
    )
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_size=batch_size,
                                              
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                            
                                              shuffle=True)

    images, labels = next(iter(train_loader))
    img = torchvision.utils.make_grid(images)

    model = LeNet5()
    model.load_state_dict(torch.load('model_.pth'))
    device = torch.device('cuda')
    model.to(device)
    correct = 0
    total = 0
    for data_test in test_loader:
        images, labels = data_test
        images, labels = Variable(images).cuda(), Variable(labels).cuda()
        output_test = model(images)
        _, predicted = torch.max(output_test, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
        #print(labels,"  ",predicted)
    print("correct1: ", correct)
    print("Test acc: {0}".format(correct.item() /
                                 len(dataset)))


def predict_image_singer():
    batch_size = 20
    dataset = datasets.ImageFolder(
       'data',
       transforms.Compose([
            transforms.Grayscale(num_output_channels=3),
            #transforms.RandomResizedCrop(28),
            transforms.Resize((28, 28)),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
       ])
    )
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_size=batch_size,
                                              
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                            
                                              shuffle=True)


   
    net = LeNet5()
    net.load_state_dict(torch.load('model_.pth'))
    device = torch.device('cuda')
    net = net.to(device)
    #net.eval()  #将模型变换为测试模式
    correct = 0
    total = 0
    test_images = []
    for data_test in test_loader:
        images, labels = data_test
        images, labels = Variable(images).cuda(), Variable(labels).cuda()  
        test_images.append([images,labels])

        
    
    file = random.choice(test_images)
    print(file[0].shape)
    print (file[1])

    predicted = net(file[0]).argmax(1)

    #_, predicted = torch.max(net(file[0]), 1)
    print(predicted)

def predict_image_singer_v1(filepath="data/3/3_10.png"):
    
    image = Image.open(filepath) 

    data_dataset = transforms.Compose([
           transforms.Grayscale(num_output_channels=3),
            transforms.Resize((28, 28)),
            transforms.ToTensor(),
       ])
    
    my_tensor = data_dataset(image)
    #print (my_tensor.shape)
    my_tensor = my_tensor.resize_(1,3,28,28)
    #my_tensor= my_tensor.cuda()
    my_tensor = Variable(my_tensor).cuda()

    device = torch.device('cuda')
    
    #print(my_tensor)


    net = LeNet5()
    net.load_state_dict(torch.load('model_.pth'))
    net.eval()
    net = net.to(device)
    predicted = net(my_tensor).argmax(1)
    print("filepath={},result={}".format(filepath,predicted))


predict_image() #acc
predict_image_singer() #genetor predict 
predict_image_singer_v1(filepath="data/4/4_12.png") #image predict

在这里插入图片描述

思路:
predict_image()计算准确率;
predict_image_singer() 预测结果查看,从迭代器一次预测20个数据;
predict_image_singer_v1()测试单张图片

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

佐倉

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值