以Inception Module实现手写数字识别为例:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets # 放置了许多常用数据集,包括手写数字识别
import torch.nn.functional as F
数据预处理
transform = transforms.Compose([
transforms.ToTensor(), # 转张量,将值缩放到[0,1]之间
transforms.Normalize((0.1307,),(0.3081,)) # 归一化,第一个为均值,第二个为方差
])
# 加载数据
train_dataset = datasets.MNIST(root= "E:/MNIST/mnist",
train=True, # 下载训练集
transform=transform, # 转张量,将值缩放到[0,1]之间.也可以写成transform = transforms.ToTensor()
download=True
)
test_dataset = datasets.MNIST(root= "E:/MNIST/mnist",
train=False, # 下载训练集
transform=transform, # 转张量,将值缩放到[0,1]之间
download=True
)
train_loader = DataLoader(dataset=train_dataset,
batch_size=100,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=100,
shuffle=False)
Inception Block
class InceptionA(nn.Module):
def __init__(self, in_channels):
super(InceptionA,self).__init__()
# 第二个分支
self.branch1_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
# 第三个分支
self.branch5_5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5_5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
# 第四个分支
self.branch3_3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch3_3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
self.branch3_3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
# 第一个分支
self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
def forward(self, x):
# x = self.branch1×1(x)
# x = self.branch5×5_1(x)
# x = self.branch5×5_2(x)
# 以上两种写法是错误的,因为等式左边都是 x,而根据图片可知,各分支之间是并联结构。
branch1_1 = self.branch1_1(x)
branch5_5 = self.branch5_5_1(x)
branch5_5 = self.branch5_5_2(branch5_5)
branch3_3 = self.branch3_3_1(x)
branch3_3 = self.branch3_3_2(branch3_3)
branch3_3 = self.branch3_3_3(branch3_3)
branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1_1, branch5_5, branch3_3, branch_pool]
return torch.cat(outputs, dim=1) # (b, c, w, h),则dim=1,即按照通道进行拼接。
搭建模型
# 此模型用到到两个Inception Block
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(88, 20, kernel_size=5)
self.incep1 = InceptionA(in_channels=10)
self.incep2 = InceptionA(in_channels=20)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(1408, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.mp((self.conv1(x)))) # 卷积、池化、激活,出来之后通道变为了10
x = self.incep1(x) # InceptionA输出是通道数并联 = 88
x = F.relu(self.mp((self.conv2(x))))
x = self.incep2(x) # 输入通道=20,输出通道=88
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr= 0.01, momentum= 0.5)
训练函数
def train(epoch):
runing_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target =data
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_fn(outputs, target)
loss.backward()
optimizer.step()
runing_loss += loss.item()
if batch_idx % 300 == 299:
print("[%d, %5d] loss: %.3f" % (epoch +1, batch_idx+1, runing_loss/300))
runing_loss = 0.0
测试函数
def test():
correct = 0
total =0
with torch.no_grad():
for data in test_loader:
images, labels =data
outputs = model(images)
_, predicted = torch.max(outputs.data, dim =1 ) # 返回两个值,第一个是最大值,第二个是最大值的索引。dim=1表示在列维度求以上结果,dim = 0表示在行维度求以上结果。
total += labels.size(0) # 每一个batch_size 中labels是一个(N,1)的元组,size(0)=N
correct +=(predicted == labels).sum().item() # 对的总个数
print("Accuracy on the test set %s %%" % (100*correct/total))
网络启动
if __name__=="__main__":
for epoch in range(10):
train(epoch)
if epoch % 2 ==0:
test()
[1, 300] loss: 0.806
[1, 600] loss: 0.189
Accuracy on the test set 96.0 %
[2, 300] loss: 0.125