使用torch.nn构建神经网络
- 一个nn.Module包含各个层和一个forward(input)方法,该方法返回output。
- 更新网络参数:weight = weight - learning_rate * gradient
1. 定义网络
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 1 input image channel, 6 output channels,
# 5x5 square convolution kernel
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self,x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only use a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x)) # 16x5x5 => 400
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
print(net)
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
在模型中必须定义forward函数,backward函数会被autograd自动创建。
# net.parameters() 返回可被学习的参数列表和值
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's weight
print(net.conv1.weight) # print(params[0])
10
torch.Size([6, 1, 5, 5])
Parameter containing:
tensor([[[[ 0.0920, -0.1502, -0.1146, -0.0823, 0.1165],
[-0.1942, 0.0238, -0.0423, -0.0805, 0.0507],
[ 0.0569, 0.0641, 0.0955, -0.1030, 0.0068],
[-0.0708, -0.1233, -0.0266, 0.1495, -0.1303],
[-0.1718, -0.0681, 0.1756, 0.0646, -0.0773]]],
[[[ 0.0478, 0.0839, 0.0128, 0.1381, -0.0549],
[ 0.1063, -0.0660, -0.0870, -0.1063, -0.1631],
[-0.0737, -0.1829, -0.1275, -0.0730, 0.1965],
[-0.1875, 0.0265, -0.0916, 0.1861, 0.1647],
[ 0.0039, 0.1785, -0.0257, -0.1394, 0.1163]]],
[[[-0.1633, -0.1982, 0.0732, -0.1768, -0.0351],
[ 0.1836, 0.0577, -0.1532, 0.0189, -0.1283],
[ 0.1046, 0.0998, -0.1137, 0.1240, -0.1994],
[-0.0218, -0.1059, -0.0555, 0.0113, 0.0569],
[ 0.0434, 0.0684, 0.0628, -0.1646, -0.0417]]],
[[[ 0.1068, -0.0242, 0.1280, 0.1933, 0.0967],
[ 0.0882, 0.0430, -0.0364, 0.0477, -0.0285],
[-0.0868, -0.0659, 0.0984, 0.0146, 0.1582],
[-0.1694, -0.0822, 0.1173, -0.0462, -0.1102],
[ 0.1505, -0.0821, -0.1327, -0.1426, 0.1711]]],
[[[-0.1637, -0.0669, -0.1259, -0.1409, 0.0151],
[ 0.1652, -0.1291, 0.1375, -0.0178, -0.0214],
[ 0.1341, 0.1496, 0.0894, 0.1005, -0.0573],
[-0.0171, -0.0105, 0.1748, -0.1021, 0.1058],
[-0.1521, 0.0841, -0.1838, -0.1268, -0.0858]]],
[[[ 0.0160, 0.1429, 0.1350, 0.0637, 0.1490],
[ 0.1036, 0.1720, -0.1720, -0.1303, 0.1165],
[ 0.0377, -0.0726, -0.1716, -0.0395, 0.0793],
[-0.1122, -0.1481, 0.0029, 0.0744, -0.0566],
[-0.0909, -0.1701, -0.0342, -0.0336, 0.0530]]]], requires_grad=True)
# 随机输入32x32
input = torch.randn(1,1,32,32)
out = net(input)
print(out) # out's dimension 10
tensor([[-0.0520, -0.0715, 0.0821, -0.1048, 0.0430, -0.0734, 0.0284, 0.0908,
-0.0979, -0.0390]], grad_fn=<AddmmBackward>)
# 梯度缓存清零,进行随机梯度的反向传播
net.zero_grad()
out.backward(torch.randn(1,10))
print(out)
tensor([[-0.0520, -0.0715, 0.0821, -0.1048, 0.0430, -0.0734, 0.0284, 0.0908,
-0.0979, -0.0390]], grad_fn=<AddmmBackward>)
Note
“torch.nn” 只支持小批量输入,而不支持单个样本。如"nn.Conv2d"接受一个4维张量,
每一维分别是sSamples * nChannels * Height * Width(样本数 * 通道数 * 高 * 宽)。
2. 损失函数
一个损失函数接受一对(output,target)(网络输出,目标值)作为输入,计算一个
值来估计网络输出与目标值相差多少。
nn包中有很多不同的损失函数,如nn.MSELoss,计算输入和目标间的均方误差。
output = net(input)
target = torch.randn(10) # 随机值作目标值
target = target.view(1, -1) # 使target和output的shape一致
loss_func = nn.MSELoss()
loss = loss_func(output, target)
print(loss)
tensor(1.2870, grad_fn=<MseLossBackward>)
整个计算图如下:
input => { conv1 => relu => maxpool2d } => { conv2 => relu => maxpool2d }
=> view => { fc1 => relu } => { fc2 => relu } => fc3 => MSELoss
=> loss
3. 反向传播
调用loss.backward()获得反向传播的误差。
net.zero_grad() # 梯度清零
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad) # conv1层的偏差项梯度
loss.backward()
print("conv1.bias.grad after backward")
print(net.conv1.bias.grad)
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0028, 0.0242, -0.0007, -0.0056, -0.0173, -0.0310])
4. 更新权重
最简单的更新规则 随机梯度下降SGD:
weight = weight - learning_rate * gradient
Pytorch中torch.optim包实现了许多种更新规则,如SGD、Adam等。
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)
# in your training loop:
optimizer.zero_grad() # 梯度清零
print("conv1's bias =",net.conv1.bias)
print("loss =", loss, "\n")
output = net(input)
loss = loss_func(output, target)
loss.backward()
optimizer.step() # update
print("conv1's bias =",net.conv1.bias)
print("loss =", loss)
conv1's bias = Parameter containing:
tensor([ 0.1526, 0.1589, -0.0955, 0.1759, 0.0411, 0.1898],
requires_grad=True)
loss = tensor(1.1480, grad_fn=<MseLossBackward>)
conv1's bias = Parameter containing:
tensor([ 0.1528, 0.1592, -0.0954, 0.1761, 0.0417, 0.1903],
requires_grad=True)
loss = tensor(1.1115, grad_fn=<MseLossBackward>)