Ccontents
1 二维卷积层
1.1 二维互相关运算
import torch
from torch import nn
def corr2d(X, K):
h, w = K.shape
Y = torch.zeros((X.shape[0]-h+1, X.shape[1]-w+1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j] = (X[i:i+h, j:j+w] * K).sum()
return Y
X = torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
K = torch.tensor([[0,1],[2,3]])
Y = corr2d(X, K)
print(Y)
tensor([[19., 25.],
[37., 43.]])
1.2 二维卷积层
# 卷积层的模型参数包括了卷积核和标量偏差。在训练模型的时候,通常先对卷积核随机初始化,然后不断迭代卷积核和偏差。
class Conv2D(nn.Module):
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
self.weight = nn.Parameter(torch.rand(kernel_size))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, x):
return corr2d(x, self.weight) + self.bias
1.3 图像中物体边缘检测
X = torch.ones(6, 8)
X[:, 2:6] = 0
X
tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.]])
K = torch.tensor([[1,-1]])
Y= corr2d(X, K)
1.4 通过数据学习核数组
# 构造一个核数组形状是(1, 2)的二维卷积层
conv2d = Conv2D(kernel_size = (1,2))
step = 20
lr = 0.01
for i in range(step):
Y_hat = conv2d(X)
l = ((Y_hat - Y) ** 2).sum()
l.backward()
conv2d.weight.data -= lr * conv2d.weight.grad
conv2d.bias.data -= lr * conv2d.bias.grad
# 梯度清零
conv2d.weight.grad.fill_(0)
conv2d.bias.grad.fill_(0)
if (i + 1) %5 ==0:
print('Step %d, loss %.3f' % (i+1, l.item()))
Step 5, loss 5.737
Step 10, loss 1.403
Step 15, loss 0.369
Step 20, loss 0.100
print("weight", conv2d.weight.data)
print("bias", conv2d.bias.data)
weight tensor([[ 0.9259, -0.9145]])
bias tensor([-0.0064])
1.5 卷积运算和互相关运算
为了得到卷积运算的输出,只需将核数组左右翻转并上下翻转,再与输入数组做互相关运算。可见,卷积运算和互相关运算虽然类似,但如果它们使用相同的核数组,对于同一个输入,输出往往并不相同。
卷积层为何能使用互相关运算替代卷积运算:
其实,在深度学习中核数组都是学出来的;卷积层无论使用互相关运算或卷积运算都不影响模型预测时的输出。
1.6 特征图和感受野
二维卷积层输出的二维数组可以看作是输入在空间维度(宽和高)上某一级的表征,也叫特征图(feature map)。
影响元素x的前向计算的所有可能输入区域(可能大于输入的实际尺寸)叫做x的感受野(receptive field)。
可以通过更深的卷积神经网络使特征图中单个元素的感受野变得更加广阔,从而捕捉输入上更大尺寸的特征。
2 填充和步幅
2.1 填充(Padding)
# 定义一个函数来计算卷积层。它对输入和输出做相应的降维和升维
def comp_conv2D(conv2D , X):
# (1, 1)代表批量大小和通道数(“多输入通道和多输出通道”一节将介绍)均为1
X = X.view((1, 1)+ X.shape)
Y = conv2D(X)
return Y.view(Y.shape[2:])
conv2D = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = 3, padding = 1)
X = torch.rand(8,8)
comp_conv2D(conv2D , X).shape
torch.Size([8, 8])
# 卷积核的高和宽不同时,可以通过设置高和宽上不同的填充数使输出和输入具有相同的高和宽。
conv2D = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (5,3), padding = (2,1))
comp_conv2D(conv2D , X).shape
torch.Size([8, 8])
2.2 步幅(stride)
conv2d = nn.Conv2d(1,1, kernel_size = 3, padding = 1, stride = 2)
comp_conv2D(conv2d, X)
tensor([[-0.3517, -0.3616, -0.3252, -0.4718],
[-0.4032, -0.1917, -0.5097, -0.5917],
[-0.5383, -0.6397, -0.6420, -0.5808],
[-0.4204, -0.6860, -0.5132, -0.5171]], grad_fn=<ViewBackward>)
conv2d = nn.Conv2d(1,1, kernel_size = (3,5), padding = (0,1), stride = (3,4))
comp_conv2D(conv2d, X)
tensor([[ 0.0853, 0.2237],
[-0.0703, 0.2072]], grad_fn=<ViewBackward>)
3 多通道输入和多通道输出
3.1 多通道输入
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
def corr2d_multi_in(X, K):
res = corr2d(X[0, :, :], K[0, :, :])
for i in range(1, X.shape[0]):
res += corr2d(X[i, :, :], K[i, :, :])
return res
X = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]],
[[0,1,2],[3,4,5],[6,7,8]]])
K = torch.tensor([[[1,2],[3,4]],[[0,1],[2,3]]])
corr2d_multi_in(X, K)
tensor([[ 56., 72.],
[104., 120.]])
3.2 多通道输出
def corr2d_multi_in_out(X, K):
# 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起
return torch.stack([corr2d_multi_in(X, k) for k in K])
K = torch.stack([K, K+1, K+2])
print(K.shape)
torch.Size([3, 2, 2, 2])
corr2d_multi_in_out(X, K)
tensor([[[ 56., 72.],
[104., 120.]],
[[ 76., 100.],
[148., 172.]],
[[ 96., 128.],
[192., 224.]]])
3.3 1×1卷积层
1×1卷积层通常用来调整网络层之间的通道数,并控制模型复杂度。
def corr2d_multi_in_out_1x1(X, K):
ci, h,w = X.shape
co = K.shape[0]
X = X.view(ci, h*w)
K = K.view(co, ci)
Y = torch.mm(K,X)
return Y.view(co, h ,w )
X = torch.rand(3,3,3)
K = torch.rand(2,3,1,1)
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)
(Y1 - Y2).norm().item() < 1e-6
True
4 池化(pooling)
4.1 二维最大池化层和平均池化层
为了缓解卷积层对位置的过度敏感性。
def pool2d(X, pool_size, mode = 'max'):
X = X.float()
ph, pw = pool_size
Y = torch.zeros(X.shape[0] -ph + 1, X.shape[1] -pw + 1)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i,j] = X[i:i+ph, j:j+ph].max()
elif mode == 'avg':
Y[i,j] = X[i:i+ph, j:j+ph].mean()
return Y
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
pool2d(X,(2,2), mode = 'max')
tensor([[4., 5.],
[7., 8.]])
pool2d(X,(2,2), mode = 'avg')
tensor([[2., 3.],
[5., 6.]])
4.2 填充和步幅
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))
X
tensor([[[[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]]]])
pool2d = nn.MaxPool2d(3)
pool2d(X)
tensor([[[[10.]]]])
pool2d = nn.MaxPool2d(3, padding = 1, stride = 2)
pool2d(X)
tensor([[[[ 5., 7.],
[13., 15.]]]])
pool2d = nn.MaxPool2d((2, 4), padding=(1, 2), stride=(2, 3))
pool2d(X)
tensor([[[[ 1., 3.],
[ 9., 11.],
[13., 15.]]]])
4.3 多通道
X = torch.cat((X, X+1), dim = 0)
X
tensor([[[[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.]]],
[[[ 1., 2., 3., 4.],
[ 5., 6., 7., 8.],
[ 9., 10., 11., 12.],
[13., 14., 15., 16.]]]])
X.shape
torch.Size([2, 1, 4, 4])
pool2d = nn.MaxPool2d(3, padding = 1, stride = 2)
pool2d(X).shape
tensor([[[[ 5., 7.],
[13., 15.]]],
[[[ 6., 8.],
[14., 16.]]]])
pool2d(X).shape
torch.Size([2, 1, 2, 2])
欢迎关注【OAOA】