为自己的学习做个笔记,以免以后遗忘。
model.py
定义网络模型
import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 5)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 5)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32*5*5, 120) # 全连接层的输入必须是一维向量
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # `10`可以根据具体分类数进行修改
# 计算公式 (W-F+2P) / S + 1 W:图像的宽度(假设H=W) F:卷积核的大小 P:pooling值 S:步长
def forward(self, x):
x = F.relu(self.conv1(x)) # [batch, 3, 32, 32] -> [batch, 16, 28, 28] (32-5+0)/1 + 1 = 28
x = self.maxpool1(x) # [batch, 16, 28, 28] -> [batch, 16, 14, 14]
x = F.relu(self.conv2(x)) # [batch, 16, 14, 14] -> [batch, 32, 10, 10] (14-5+0)/1+1=10
x = self.maxpool2(x) # [batch, 32, 10, 10] -> [batch, 32, 5, 5]
x = x.view(-1,32*5*5) # [batch,32*5*5]
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
out = self.fc3(x) # 不需要使用激活函数,因为softmax激活函数被嵌入在交叉熵函数中
return out
注意:卷积后尺寸的计算公式:(W - F + 2P) / S + 1
,其中W是图像宽度(假设长度和宽度相同),F是卷积核的大小,P是padding的值,S是步长。
train.py
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.utils.data as data
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))
])
batch_size = 4
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=False)
trainloader = data.DataLoader(trainset, batch_size=36, shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=False)
testloader = data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)
test_data_iter = iter(testloader)
test_image, test_label = test_data_iter.next()
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = LeNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.001)
# 开始训练
for epoch in range(5):
running_loss = 0
for step, data in enumerate(trainloader, start=1):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
# 打印数据
running_loss += loss.item()
if step % 500 == 0:
with torch.no_grad():
outputs = net(test_image) # [batch, 10]
predict_y = torch.max(outputs, dim=1)[1] # dim=1表示在输出的标签中寻找最大值,max()->(最大值,下标)
accuracy = (predict_y==test_label).sum().item() / test_label.size(0)
print('[%d, %5d] train_loss:%.3f test_accuracy:%.3f'%(epoch+1, step, running_loss/500, accuracy))
running_loss = 0.0
print("Finished trainning!")
save_path = './LeNet.pth'
torch.save(net.state_dict(), save_path)
# # 查看图片(官方代码)
# def imshow(img):
# img = img / 2 + 0.5 # 反标准化
# npimg = img.numpy() # 将图片装换成numpy形式
# plt.imshow(np.transpose(npimg, (1, 2, 0))) # Tensor(channel,height, width) -> numpy(height, width, channel)
# plt.show()
# # get some random training images
# dataiter = iter(trainloader)
# images, labels = dataiter.next()
# # show images
# imshow(torchvision.utils.make_grid(images))
# # print labels
# print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))
predict.py
import torch
import torchvision.transforms as transforms
from PIL import Image
from model import LeNet
transform = transforms.Compose([
transforms.ToTensor(), # 最好是把这一行写在最前边
transforms.Resize((32,32)),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
])
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
net = LeNet()
net.load_state_dict(torch.load('LeNet.pth'))
im = Image.open('1.jpeg')
im = transform(im) # (C,H,W)缺少batch维度
im = torch.unsqueeze(im, dim=0) # (batch,C,H,W) dim=0表示在最前边加上一个维度
with torch.no_grad():
outputs = net(im) # outputs:(batch, 10)
predict = torch.max(outputs, dim=1)[1].data.numpy()
# predict = torch.softmax(outputs, dim=1)
# print(predict) # 这样会显示预测为10个类别的各自的概率
print(classes[int(predict)])