Lenet5结构
Lenet5是最简单版本的卷积神经网络。
- 卷积层
- Pooling
- 卷积层
- Pooling
- 全连接层
实现Lenet结构
'''最简单版本的卷积神经网络'''
import torch
from torch import nn, optim
from torch.nn import functional as F
class Lenet5(nn.Module):
"""
处理cifar10数据集的Lenet5神经网络
conv --> pooling --> conv --> pooling --> 全连接神经网络
"""
def __init__(self):
super(Lenet5, self).__init__()
'''卷积部分'''
self.conv_unit = nn.Sequential(
# x: [b, 3, 32, 32] => [b, 6, 28, 28]
nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0), # 输入3 channel,输出6 channel
# x: [b, 6, 28, 28] => [b, 6, 14, 14]
nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # 长宽都变为原来的一半
# x: [b, 6, 14, 14] => [b, 16, 10, 10]
nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0), # 输入6 channel,输出16 channel
# x: [b, 16, 10, 10] => [b, 16, 5, 5]
nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 长宽都变为原来的一半
)
# flatten
'''全连接部分'''
self.fc_unit = nn.Sequential(
nn.Linear(16*5*5, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 10)
)
"""
param x: [b, 3, 32, 32]
return:
"""
def forward(self, x):
batchsz = x.size(0)
# [b, 3, 32, 32] => [b, 16, 5, 5]
x = self.conv_unit(x)
# [b, 16, 5, 5] => [b, 16*5*5]
x = x.view(batchsz, 16*5*5) # 写x.view(batchsz, -1)也可
# [b, 16*5*5] => [b, 10]
logits = self.fc_unit(x)
return logits
使用CIFAR10数据集测试Lenet5
CIFAR10数据集介绍
CIFAR10一共包含 10 个类别的 RGB 彩色图 片:飞机( airlane )、汽车( automobile )、鸟类( bird )、猫( cat )、鹿( deer )、狗( dog )、蛙类( frog )、马( horse )、船( ship )和卡车( truck )。图片的尺寸为 32×32 。
- data – 10000x3072 的uint8s格式numpy数组。数组的每一行存储一个32x32的彩色图像,按顺序包含红色、绿色和蓝色三个通道的值,因此每行的长度为32x32x3=3072。图像按行进行存储,如数组的前32个值是图像第一行的红色通道值。
- labels – 取值为0-9的包含10000个数字的list。索引i处的数字表示数组data中第i个图像的标签。
实现Lenet对CIFAR10数据集的分类
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2021/7/29 22:13
# @Author : Liu Lihao
# @File : origin_lenet_main.py
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5
def main():
batchsz = 32
'''引入训练集'''
# 一次加载一张图片
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225])
]), download=True)
# 载入多张照片
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True) # shuffle随机化
'''引入测试集'''
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225])
]), download=True)
# 载入多张照片
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
# classes = ('plane', 'car', 'bird', 'cat', 'deer',
# 'dog', 'frog', 'horse', 'ship', 'truck')
# x, label = iter(cifar_train).next()
# print('x:', x.shape, 'label:', label.shape)
'''定义模型,loss函数,优化器'''
device = torch.device('cuda')
model = Lenet5().to(device)
lossFuction = nn.CrossEntropyLoss().to(device) # 此方法会先进行softmax然后接crossEntropy
optimizer = optim.Adam(model.parameters(), lr=1e-3) # 不需要转换到GPU
print(model)
'''开始训练和评估'''
for epoch in range(1000):
'''train'''
model.train() # train模式:启用Dropout, Batch Normalization的参数会学习和更新
for batchidx, (x, label) in enumerate(cifar_train):
# x: [b, 3, 32, 32]
# y: [b]
x, label = x.to(device), label.to(device)
# logits: [b, 10]
logits = model(x)
# loss: tensor scalar (标量)
loss = lossFuction(logits, label)
# backward
optimizer.zero_grad() # 每次backward会对梯度累加,因此每次backward前要把梯度清零
loss.backward()
optimizer.step()
print(epoch, 'loss:', loss.item())
'''eval'''
model.eval() # eval模式:不启用Dropout,Batch Normalization的参数保持不变
with torch.no_grad(): # 告诉pytorch,此段不需要构建计算图,更加安全
# test
total_correct = 0
total_num = 0
for x, label in cifar_test:
# x: [b, 3, 32, 32]
# y: [b]
x, label = x.to(device), label.to(device)
# logits: [b, 10]
logits = model(x)
# pred: [b]
pred = logits.argmax(dim=1)
# [b] vs [b] => scalar tensor
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
acc = total_correct / total_num
print(epoch, 'test acc:', acc)
if __name__ == '__main__':
main()