Pytorch 深度学习 手写数字分类(亲测)

该文详细介绍了如何使用PyTorch构建一个简单的卷积神经网络(CNN)模型,包括定义CNN结构、创建自定义数据集、模型训练过程以及预测单张图片的步骤。文章还涉及数据预处理、损失函数和优化器的选择,以及模型保存和加载。
摘要由CSDN通过智能技术生成

先简单记录下,后期有空再补充。

1. 定义cnn 模型 

import torch.nn as nn
import torch
from torchvision import transforms,models

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv = torch.nn.Sequential(
            # 用来实现2d卷积操作,h和w2个维度,当前图片的channel是1,输出是32,卷积核是5
            torch.nn.Conv2d(1, 32, kernel_size=5, padding=2),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        #第一轮卷积之后的大小,输入尺寸是28*28,变为14*14*channel32,输出结果为10维
        self.fc = torch.nn.Linear(14 * 14 * 32, 10)
    def forward(self, x):
        out = self.conv(x)
        out = out.view(out.size()[0], -1)
        out = self.fc(out)
        return out

2.制作自己的数据集

import os
import torch
from torch.utils import data
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset

species = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9
}

class MyDataset(Dataset):
    def __init__(self, root, transform = None):
        # root :'mnist\\test' or 'mnist\\train'
        self.root = root
        self.transform = transform
        self.data =[]
        # 获取子目录 '0','1','2','3',...
        sub_root_test = os.listdir(self.root)
        for sub_root in sub_root_test:
            # 获取子目录下所有图片的名字
            sub_image_name_list = os.listdir(os.path.join(self.root,sub_root))
            for sub_image_name in sub_image_name_list:
                # 获取每张图片的完整路径
                image_path = os.path.join(self.root, os.path.join(sub_root, sub_image_name))
                # 获取标签,也就是子目录的文件名
                label = species[image_path.split('\\')[-2]]
                # 做成(图片路径,标签)的元组
                sample = (image_path,label)
                self.data.append(sample)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        image_path,label = self.data[index]
        image_original = Image.open(image_path).convert('RGB')
        image_tensor = self.transform(image_original)
        return image_tensor,label
class MyDataset_pre(Dataset):
    def __init__(self, root, transform = None):
        # root :'test_images
        self.root = root
        self.transform = transform
        self.data =[]
        image_name_list = os.listdir(self.root)
        for image_name in image_name_list:
            image_path = os.path.join(self.root, image_name)
            self.data.append(image_path)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        image_path = self.data[index]
        image_original = Image.open(image_path).convert('RGB')
        image_tensor = self.transform(image_original)
        return image_tensor

 3.模型训练

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader

from preprocess_dataset import MyDataset
from model import CNN

BATCH_SIZE = 32
EPOCHS = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

trans = transforms.Compose([transforms.ToTensor(),transforms.Grayscale()])
# trans = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
train_dataset = MyDataset('mnist1_new\\train',transform = trans)
test_dataset = MyDataset('mnist1_new\\test',transform = trans)

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)

#model = torch.load("model/mnist_model_nn.pkl")
model = CNN()
# model = NeuralNetwork()
# model = VGG16().vgg16_model()
net = model.to(DEVICE)

# loss function
criterion = torch.nn.CrossEntropyLoss()
# 优化器
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

# training
# 将所有的样本遍历完,对模型进行训练后,这一轮称为epoch
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        images, labels = data
        outputs = net(images)
        loss = criterion(outputs, labels)
        # running_loss += loss.item()
        # 反向传播,完成对参数的优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("epoch is {}, batch is{}/{}, loss is {}".format(epoch + 1, i, len(train_dataset)/BATCH_SIZE, loss.item()))
    # eval/test 计算在测试集的精度
    loss_test = 0.0
    acc = 0.0
    accuracy = 0.0
    model.eval()
    for i, data in enumerate(test_loader):
        images , labels = data
        outputs = net(images)
        loss_test += criterion(outputs, labels)
        _, pred = outputs.max(1)
        # 判断是否相等计算准确率
        accuracy += (pred == labels).sum().item()
    accuracy = accuracy / len(test_dataset)
    loss_test = loss_test / (len(test_dataset) / BATCH_SIZE)
    # 打印精度和损失
    print("epoch is {}, accuracy is {}, loss test is {}".format(epoch + 1, accuracy, loss_test.item()))
    if accuracy > acc:
        acc = accuracy
        torch.save(net, "model/mnist_model_nn.pkl")
        print('accuancy',acc)

4之前.对图片进行变换,将一串数字按照从左到右切成单个图片。(仅供参考)

import cv2
import numpy as np
import os,torch
from torchvision.transforms import transforms
import numpy
from PIL import Image

def sort_contours(cnts, method='left-to-right'):
    # 从左到右排序
    reverse = False
    i = 0
    # handle if sort in reverse
    if method == 'right-to-left' or method == 'bottom-to-top':
        reverse = True
    # handle if sort against y rather than x of the bounding box
    if method == 'bottom-to-top' or method == 'top-to-bottom':
        i = 1
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key = lambda b: b[1][i], reverse = reverse))
    return (cnts, boundingBoxes)

def cut_image_sign():
    '''
    中间数字串,切成方块图,白底黑字
    '''
    root_dir = 'output_me\\'
    for im_name in os.listdir(root_dir):
        # 149,341,3
        image_writer_recongnize = cv2.imread(os.path.join(root_dir,im_name))
        h, w = image_writer_recongnize.shape[:2]
        SIZE = 138
        w_size = 256
        image_writer_recongnize = cv2.resize(image_writer_recongnize, (w_size, SIZE))

    # cv2.imshow('image_writer_recongnize', image_writer_recongnize)
    # cv2.waitKey()
    gray_new = cv2.cvtColor(image_writer_recongnize,cv2.COLOR_BGR2GRAY)
    # cv2.imshow('gray_new', gray_new)
    # cv2.waitKey()
    threshold, adaptive_image_1 = cv2.threshold(gray_new, 100, 255, cv2.THRESH_BINARY)
    adaptive_image_1 = cv2.dilate(adaptive_image_1,(15,15))
    # cv2.imshow('adaptive_image_1', adaptive_image_1)
    # cv2.waitKey()
    adaptive_image_copy = adaptive_image_1.copy()
    cnts_1, h = cv2.findContours(adaptive_image_1, cv2.RETR_CCOMP , cv2.CHAIN_APPROX_SIMPLE)
    # contourPic_1 = cv2.drawContours(adaptive_image_copy, cnts_1, -1, (0, 0, 255)
    (cnts_new, boundingboxes) = sort_contours(cnts_1)

    count1 = 0
    for c in cnts_new:
        # print(cv2.contourArea(c))
        if cv2.contourArea(c) > 300 and cv2.contourArea(c) <4000:
            # print(cv2.contourArea(c))
            count1 = count1 + 1
            x, y, w, h = cv2.boundingRect(c)
            image_result = image_writer_recongnize[y:y+h, x:x+w]
            image_path = os.path.join('output_me_cut','me_cut{}.png'.format(count1))
            cv2.imwrite(image_path, image_result)

def image_address():
    '''
    图像裁剪为28*28,图像增强,变成黑底白字
    '''
    root_dir = 'output_me_cut\\'
    count3 =0
    for im_name in os.listdir(root_dir):
        # 149,341,3
        count3 = count3 + 1
        image_writer_recongnize = Image.open(os.path.join(root_dir,im_name))
        image_1 = transforms.Resize((28,28))(image_writer_recongnize)
        image2 = np.array(image_1)
        image1 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
        ret,thresh1 = cv2.threshold(image1,0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        thresh2 = cv2.bitwise_not(thresh1)
        thresh3 = cv2.erode(thresh2, (15,15))
        cv2.imwrite(os.path.join('output_me_cut_black\\','0_{}.png'.format(count3)),thresh2)
        # cv2.imshow('image_writer_recongnize',thresh3)
        # cv2.waitKey()

 4.预测单张图片

import os
import torch
from PIL import Image
from torch import nn
from torchvision import transforms, models
from torch.utils.data import DataLoader
from preprocess_dataset import MyDataset_pre
from torchvision.transforms import ToPILImage
import cv2
species= ['0','1','2','3','4','5','6','7','8','9']

model = torch.load("model\\mnist_model_nn.pkl", map_location=torch.device("cpu"))
trans = transforms.Compose([transforms.Resize((28,28)),transforms.ToTensor(), transforms.Grayscale()])
#预测的图片最好是按照训练图片的尺寸维度先进行前处理,黑底白字,尺寸为28*28
predict_dataset = MyDataset_pre('output_me_cut_black',transform = trans)
predict_loader = DataLoader(predict_dataset, batch_size=32)
model.eval()
predict =[]

with torch.no_grad():
    for i,data in enumerate(predict_loader):
        predict =[]
        images = data
        output = model(images)
        _, pred = torch.max(output, 1)
        # print(pred)
        for i in range(0,len(images)):
            class_name = species[int(pred[i].item())]
            predict.append(class_name)
s = ''
for i in range(0,len(predict)):
    s += predict[i]
print(s)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值