Pytorch实现LeNet

LeNet架构图
为自己的学习做个笔记,以免以后遗忘。

model.py

定义网络模型

import torch
import torch.nn as nn
import torch.nn.functional as F 

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.maxpool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32*5*5, 120)   # 全连接层的输入必须是一维向量
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)    # `10`可以根据具体分类数进行修改

    # 计算公式 (W-F+2P) / S + 1   W:图像的宽度(假设H=W) F:卷积核的大小  P:pooling值  S:步长
    def forward(self, x):
        x = F.relu(self.conv1(x))   # [batch, 3, 32, 32] -> [batch, 16, 28, 28]   (32-5+0)/1 + 1 = 28
        x = self.maxpool1(x)        # [batch, 16, 28, 28] -> [batch, 16, 14, 14]
        x = F.relu(self.conv2(x))           # [batch, 16, 14, 14] -> [batch, 32, 10, 10]  (14-5+0)/1+1=10
        x = self.maxpool2(x)        # [batch, 32, 10, 10] -> [batch, 32, 5, 5] 
        x = x.view(-1,32*5*5)   # [batch,32*5*5]
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        out = self.fc3(x)   # 不需要使用激活函数,因为softmax激活函数被嵌入在交叉熵函数中
        return out

注意:卷积后尺寸的计算公式:(W - F + 2P) / S + 1,其中W是图像宽度(假设长度和宽度相同),F是卷积核的大小,P是padding的值,S是步长。

train.py

import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.utils.data as data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=False)
trainloader = data.DataLoader(trainset, batch_size=36, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=False)
testloader = data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)
test_data_iter = iter(testloader)
test_image, test_label = test_data_iter.next()

classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

net = LeNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.001)

# 开始训练
for epoch in range(5):
    running_loss = 0
    for step, data in enumerate(trainloader, start=1):
        inputs, labels = data

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        # 打印数据
        running_loss += loss.item()
        if step % 500 == 0:
            with torch.no_grad():
                outputs = net(test_image)   # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]    # dim=1表示在输出的标签中寻找最大值,max()->(最大值,下标)
                accuracy = (predict_y==test_label).sum().item() / test_label.size(0)

                print('[%d, %5d] train_loss:%.3f test_accuracy:%.3f'%(epoch+1, step, running_loss/500, accuracy))
                running_loss = 0.0

print("Finished trainning!")

save_path = './LeNet.pth'
torch.save(net.state_dict(), save_path)

# # 查看图片(官方代码)

# def imshow(img):
#     img = img / 2 + 0.5     # 反标准化
#     npimg = img.numpy()     # 将图片装换成numpy形式
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))  # Tensor(channel,height, width) -> numpy(height, width, channel)
#     plt.show()
# # get some random training images
# dataiter = iter(trainloader)
# images, labels = dataiter.next()

# # show images
# imshow(torchvision.utils.make_grid(images))
# # print labels
# print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

predict.py

import torch
import torchvision.transforms as transforms
from PIL import Image
from model import LeNet

transform = transforms.Compose([
    transforms.ToTensor(),  # 最好是把这一行写在最前边
    transforms.Resize((32,32)),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
    
])
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

net = LeNet()
net.load_state_dict(torch.load('LeNet.pth'))

im = Image.open('1.jpeg')
im = transform(im)  # (C,H,W)缺少batch维度
im = torch.unsqueeze(im, dim=0) # (batch,C,H,W) dim=0表示在最前边加上一个维度

with torch.no_grad():
    outputs = net(im)   # outputs:(batch, 10)
    predict = torch.max(outputs, dim=1)[1].data.numpy()
    # predict = torch.softmax(outputs, dim=1)
    # print(predict)    # 这样会显示预测为10个类别的各自的概率
print(classes[int(predict)])

代码出处

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值