2.2pytorch官方demo(Lenet)
霹雳吧啦Wz 2.2pytorch官方demo(Lenet)
霹雳吧啦Wz GitHub deep-learning-for-image-processing
model.py: Lenet模型搭建
#CNN(Convolutional Neural Network)
#雏形:LeCun的LeNet(1998)网络结构
#Pytorch Tensor的通道顺序:[batch,channel,height,width]
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 5) #3输入特征层深度 16卷积核个数 5卷积核尺度
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
#10 输出根据训练集进行修改 CIFAR10具有10个类别 所以设置为10
def forward(self, x):
x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28) channel,height,width
#输入图片大小W×W;Filter大小F×F;步长S;padding的像素数P
#N=(W-F+2P)/S+1 W=32 F=5 P=0 S=1 因此N=28
x = self.pool1(x) # output(16, 14, 14) 高宽减半
x = F.relu(self.conv2(x)) # output(32, 10, 10)
# N=(W-F+2P)/S+1 W=14 F=5 P=0 S=1 因此N=10
x = self.pool2(x) # output(32, 5, 5) 高宽减半
x = x.view(-1, 32*5*5) # output(32*5*5)
x = F.relu(self.fc1(x)) # output(120)
x = F.relu(self.fc2(x)) # output(84)
x = self.fc3(x) # output(10)
return x
# import torch
# input1 = torch.rand([32,3,32,32])
# model = LeNet()
# print(model)
# output = model(input1)
实验结果
# print结果
LeNet(
(conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=800, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
train.py: 训练脚本
import numpy as np
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
#def main():
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#标准化是:output=(input-0.5)/0.5
#反标准化是:input=output*0.5+0.5=output/2+0.5
# # 50000张训练图片
# # 第一次使用时要将download设置为True才会自动去下载数据集
train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
download=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
shuffle=True, num_workers=0)
# # 10000张验证图片
# # 第一次使用时要将download设置为True才会自动去下载数据集
val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
download=False, transform=transform)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000,
shuffle=False, num_workers=0)
val_data_iter = iter(val_loader)
val_image, val_label = val_data_iter.next()
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# # print labels
# #print(' '.join('%5' % classes[val_label[j]] for j in range(4)))
# #https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
# print(' '.join(f'{classes[val_label[j]]:5s}' for j in range(4))) #batch_size=4
# # show images
# imshow(torchvision.utils.make_grid(val_image))
net = LeNet()
loss_function = nn.CrossEntropyLoss() #定义损失函数
#This criterion combines :func:`nn.LogSoftmax` and :func:`nn.NLLLoss` in one single class.
optimizer = optim.Adam(net.parameters(), lr=0.001) #定义优化器
for epoch in range(5): # loop over the dataset multiple times 迭代五次
running_loss = 0.0
for step, data in enumerate(train_loader, start=0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
#为什么每计算一个batch,就需要调用一次optimizer.zero_grad()
#如果不清除历史梯度,就会对计算机的历史梯度进行累加(通过这个特性你能够变相实现一个很大batch数值的训练)
#https://www.zhihu.com/question/303070254
# forward + backward + optimize
outputs = net(inputs)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if step % 500 == 499: # print every 500 mini-batches
with torch.no_grad():
outputs = net(val_image) # [batch, 10]
predict_y = torch.max(outputs, dim=1)[1]
accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)
print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, step + 1, running_loss / 500, accuracy))
running_loss = 0.0
print('Finished Training')
save_path = './Lenet.pth'
torch.save(net.state_dict(), save_path)
# if __name__ == '__main__':
# main()
实验结果
[1, 500] train_loss: 1.734 test_accuracy: 0.454
[1, 1000] train_loss: 1.444 test_accuracy: 0.508
[2, 500] train_loss: 1.242 test_accuracy: 0.534
[2, 1000] train_loss: 1.184 test_accuracy: 0.582
[3, 500] train_loss: 1.053 test_accuracy: 0.620
[3, 1000] train_loss: 1.052 test_accuracy: 0.629
[4, 500] train_loss: 0.935 test_accuracy: 0.650
[4, 1000] train_loss: 0.944 test_accuracy: 0.655
[5, 500] train_loss: 0.847 test_accuracy: 0.658
[5, 1000] train_loss: 0.854 test_accuracy: 0.663
Finished Training
predict.py: 单张图像预测脚本
import torch
import torchvision.transforms as transforms
from PIL import Image
from model import LeNet
def main():
transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = LeNet()
net.load_state_dict(torch.load('Lenet.pth'))
im = Image.open('plane.jpg')
im = transform(im) # [C, H, W]
im = torch.unsqueeze(im, dim=0) # [N, C, H, W]
with torch.no_grad():
outputs = net(im)
predict = torch.max(outputs, dim=1)[1].numpy()
#predict = torch.softmax(outputs, dim=1)
print(classes[int(predict)])
#print(predict)
if __name__ == '__main__':
main()
实验结果
#print(classes[int(predict)]) 结果 plane
#print(predict) 结果
tensor([[9.9333e-01, 8.2619e-04, 5.4407e-04, 3.6783e-05, 4.9352e-05, 1.1255e-06,
6.9997e-06, 2.6429e-06, 2.6157e-03, 2.5905e-03]])
认真是一种态度更是一种责任