使用AlexNet进行手写数字识别:项目结构与代码

一、文件目录结构

abc.ttf是字体文件,用于在图片上标注预测标签

model.py是AlexNet模型代码

train.py用于训练

test.py用于单张图片测试

MNIST文件夹是数据集文件

二、各部分代码

将train.py调整一下结构后,三部分代码内容为:

train.py

'''
复现AlexNet
MNIST 数据集 大小28x28
'''
import torch
import torch.nn as nn
from torchvision.datasets import MNIST
import torchvision.transforms as transform
from torch.utils.data import DataLoader
from model import AlexNet

'''
MNIST数据集下载与准备
'''
data_train = MNIST('./MNIST/data',
    download = True,
    transform = transform.Compose([
        transform.Resize((32,32)),
        transform.ToTensor()
    ])
    )

data_test = MNIST('./MNIST/data',
    train = 'False',
    download = True,
    transform = transform.Compose([
        transform.Resize((32,32)),
        transform.ToTensor()t
    ])
    )

data_train_loader = DataLoader(data_train,batch_size=32,shuffle=True)
data_test_loader = DataLoader(data_test,batch_size=32,shuffle=True)


alexNet = AlexNet()

'''
准备训练参数
'''
alexNet.train()
lr = 0.01
criterion = nn.CrossEntropyLoss() #定义损失函数
optimizer = torch.optim.SGD(alexNet.parameters(),lr=lr,momentum=0.9,weight_decay=5e-4)
train_loss = 0
correct = 0
total = 0
index = 0
'''
训练
'''
for batch_idx,(inputs,targets) in enumerate(data_train_loader):
    optimizer.zero_grad()
    outputs = alexNet(inputs)
    loss = criterion(outputs,targets)
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
    _,predicted = outputs.max(1) #返回outputs每一行最大值组成的一维数组
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()
    print(batch_idx,'Loss: %.3f | Acc: %.3f'%(train_loss/(batch_idx+1),100*correct/total))

    # if batch_idx % 300 == 0:
    #     index += 1
    #     torch.save(alexNet, './MINIST_AlexNet_{}.pt'.format(index))

'''
保存最后一次的模型
'''
torch.save(alexNet, './MINIST_AlexNet_last.pt')

model.py

import torch
import torch.nn as nn
from model import AlexNet
'''
AlexNet定义
'''
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,64,5), #输入通道3 输出通道64 卷积核大小5
            nn.ReLU(True)
        )
        self.max_pool1=nn.MaxPool2d(3,2) #卷积核3 步长2

        self.conv2 = nn.Sequential(
            nn.Conv2d(64,64,5), #输入通道3 输出通道64 卷积核大小5
            nn.ReLU(True)
        )
        self.max_pool2=nn.MaxPool2d(3,2) #卷积核3 步长2

        self.fc1 = nn.Sequential(  #输入1024 输出384
            nn.Linear(1024,384),
            nn.ReLU(True)
        )

        self.fc2 = nn.Sequential(
            nn.Linear(384,192),
            nn.ReLU(True)
        )

        self.fc3 = nn.Linear(192,10)
    
    def forward(self,x):
        x = self.conv1(x)
        # print(x.shape)
        x = self.max_pool1(x)
        # print(x.shape)
        x = self.conv2(x)
        # print(x.shape)
        x = self.max_pool2(x)
        # print(x.shape)

        #将张量x展平为向量
        # print(x.shape[0])
        x = x.view(x.shape[0], -1)
        # print(x.shape)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return x

test.py : 测试单张图片

import torch
import torch.nn as nn
from model import AlexNet
from PIL import Image
import numpy as np
import torchvision.transforms as transform

model_path = './MINIST_AlexNet_last.pt'
alexnet = torch.load(model_path)

'''
读入一张图片
转换成AlexNet可以处理的格式
'''
def readImage(path='3.jpg'):
    mode = Image.open(path)
    transform1 = transform.Compose([
        transform.Resize((32,32)),
        transform.Grayscale(1),
        transform.ToTensor()
    ])
    mode = transform1(mode)
    return mode

img = readImage()
# print(img.shape)
img.unsqueeze_(0) #增加一个维度 第0维度增加
# print(img.shape)

'''
预测
'''
_,pre = alexnet(img).max(1)
print(pre.item())

更新后的test.py代码

import torch
import torch.nn as nn
from model import AlexNet
from PIL import Image,ImageDraw,ImageFont
import numpy as np
import torchvision.transforms as transform

model_path = './MINIST_AlexNet_last.pt'
alexnet = torch.load(model_path)

'''
读入一张图片
转换成AlexNet可以处理的格式
'''
def readImage(path='2.jpg'):
    mode = Image.open(path)
    transform1 = transform.Compose([
        transform.Resize((32,32)),
        transform.Grayscale(1),
        transform.ToTensor()
    ])
    mode = transform1(mode)
    return mode

'''
在图片上添加预测标签
在图片上添加数字,imageFile为要添加数字的图片文件,
targetImageFile为添加数字后保存的图片文件,txtnum为添加的数字
'''
def DrawImageTxt(imageFile,targetImageFile,txtnum):
    #设置字体大小
    font = ImageFont.truetype('abc.ttf', 100)
    #打开文件
    im = Image.open(imageFile)
    #字体坐标
    draw = ImageDraw.Draw(im)
    draw.text((0,0), txtnum , (255,255,0), font=font)

    #保存
    im.save(targetImageFile)
    #关闭
    im.close()

if __name__ == "__main__":
    img = readImage("./5.jpg")
    # print(img.shape)
    img.unsqueeze_(0) #增加一个维度 第0维度增加
    # print(img.shape)

    '''
    预测
    '''
    _,pre = alexnet(img).max(1)
    imageFile = './5.jpg'
    targetImageFile = './5_pre.jpg'
    txtnum = str(pre.item())
    DrawImageTxt(imageFile,targetImageFile,txtnum)

测试图片

预测结果:

所有文件我已经上传到CSDN,欢迎下载!

下载链接:https://download.csdn.net/download/qq_41964545/15600435

如果您觉得有用的话,不妨帮我点个关注和赞哦!

  • 6
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论
AlexNet是一个深度卷积神经网络,由Alex Krizhevsky、Geoffrey Hinton和Ilya Sutskever在2012年ImageNet大规模视觉识别竞赛中首次提出,可以用于图像分类、目标检测和语义分割等任务。在本文中,我们将介绍如何使用AlexNet网络实现手写数字识别。 1. 数据集准备 我们将使用MNIST手写数字数据集来训练我们的网络。MNIST数据集包含由手写数字组成的图像,每个图像都是28*28像素的灰度图像。数据集共有60000个训练图像和10000个测试图像,每个图像都有一个对应的标签,表示该图像中的数字。 2. 网络结构 AlexNet网络由5个卷积层和3个全连接层组成。每个卷积层后面跟着一个池化层。最后一个全连接层输出数据集中数字的概率分布。 3. 网络训练 我们使用PyTorch框架来实现AlexNet网络。首先,我们需要定义网络结构: ``` import torch.nn as nn class AlexNet(nn.Module): def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x ``` 接下来,我们需要定义数据加载器和优化器: ``` import torch.optim as optim import torchvision.datasets as datasets import torchvision.transforms as transforms train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AlexNet(num_classes=10).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) ``` 最后,我们可以开始训练网络: ``` for epoch in range(10): train_loss = 0 train_acc = 0 model.train() for data, target in train_loader: data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() train_loss += loss.item() * data.size(0) pred = output.argmax(dim=1, keepdim=True) train_acc += pred.eq(target.view_as(pred)).sum().item() train_loss /= len(train_loader.dataset) train_acc /= len(train_loader.dataset) test_loss = 0 test_acc = 0 model.eval() with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) loss = criterion(output, target) test_loss += loss.item() * data.size(0) pred = output.argmax(dim=1, keepdim=True) test_acc += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) test_acc /= len(test_loader.dataset) print('Epoch: {} \tTraining Loss: {:.6f} \tTraining Accuracy: {:.6f} \tTesting Loss: {:.6f} \tTesting Accuracy: {:.6f}'.format( epoch+1, train_loss, train_acc, test_loss, test_acc)) ``` 4. 测试网络 训练完成后,我们可以使用测试集来测试网络的性能: ``` model.eval() with torch.no_grad(): test_loss = 0 test_acc = 0 for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += criterion(output, target).item() * data.size(0) pred = output.argmax(dim=1, keepdim=True) test_acc += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) test_acc /= len(test_loader.dataset) print('Test Loss: {:.6f} \tTest Accuracy: {:.6f}'.format(test_loss, test_acc)) ``` 通过以上步骤,我们就可以使用AlexNet网络实现手写数字识别

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

开始学AI

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值