1.卷积运算行为
2.卷积的超参数之一:stride(步幅)、padding(填充,通常以0填充)
3.基于1和2的原理手动实现最普通的卷积行为
def corr2d(X, K, stride=1, padding=0):
'''
:param X: (H, W)
:param K: (k_h, k_w)
:return: (Y_H, Y_W)
'''
H, W = X.shape
k_h, k_w = K.shape
Y_H = int(math.floor((H - k_h + 2 * padding) / stride + 1))
Y_W = int(math.floor((W - k_w + 2 * padding) / stride + 1))
if padding > 0:
new_X = torch.zeros((H + 2 * padding, W + 2 * padding))
new_X[padding: - padding, padding: -padding] = X
X2multi = new_X
if new_X.shape[0] % 2 == 0 and new_X.shape[1] % 2 != 0:
X2multi = new_X[0: -1, :]
if new_X.shape[1] % 2 == 0 and new_X.shape[0] % 2 != 0:
X2multi = new_X[:, 0: -1]
if new_X.shape[1] % 2 == 0 and new_X.shape[0] % 2 == 0:
X2multi = new_X[0: -1, 0: -1]
else:
X2multi = X
Y = torch.zeros(Y_H, Y_W)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
row_start = stride * i
colum_start = stride * j
Y[i, j] = (X2multi[row_start : row_start + k_h, colum_start : colum_start + k_w] * K).sum()
return Y
需要注意,如下情况。
0.在被卷积的张量X的分辨率H×W中,H或W为奇数时,以H为例,卷积之后的分辨率遵循如下计算公式:
1.H或W为偶数时,以H为例,卷积之后的分辨率遵循如下公式:,其中floor表示向下取整。
2.padding为P时其实表示原来被卷积的张量X,所属的H和W,变为了H+2P和W+2P
3.举例:H=W=5,S=1,P=0 --> H'=W'=3;H=W=5,S=2,P=0 --> H'=W'=2;
H=W=5,S=1,P=1 --> H'=W'=5;H=W=5,S=2,P=1 --> H'=W'=3;
H=W=4,S=1,P=0 --> H'=W'=2;H=W=4,S=2,P=0 --> H'=W'=1;
H=W=4,S=1,P=1 --> H'=W'=4;H=W=4,S=2,P=1 --> H'=W'=2;
上述指的是K=3的情况
4.以上四项示例在上面代码中已经相应给出,可自行debug理解
4.多输入通道
应注意卷积核的通道数与输入通道数一致
代码手动实现如下。
def corr2d_muti_in(X, K, stride=1, padding=0):
'''
:param X: (C_in, H, W)
:param K: (C_in, k_h, k_w)
:return: (Y_H, Y_W)
'''
res = corr2d(X[0, :, :], K[0, :, :], stride, padding)
for i in range(1, X.shape[0]):
res += corr2d(X[i, :, :], K[i, :, :], stride, padding)
return res
5.多输出通道
代码实现如下段所示。
def corr2d_muti_in_out(X, K, stride=1, padding=0):
out = torch.stack([corr2d_muti_in(X, k, stride, padding) for k in K])
return out
6.Convd2D的部分功能等价手动实现
直接是代码哦。
class MyConv2D(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
super(MyConv2D, self).__init__()
if isinstance(kernel_size, int):
size = (out_channels, in_channels, kernel_size, kernel_size)
elif isinstance(kernel_size, tuple):
size = (out_channels, in_channels, kernel_size[0], kernel_size[1])
self.weight = nn.Parameter(torch.randn(size))
self.bias = nn.Parameter(torch.randn(out_channels, 1, 1))
self.stride = stride
self.padding = padding
def forward(self, x):
'''
:param x: (C_in, H, W)
:return: (C_out, Y_H, Y_W)
'''
return corr2d_muti_in_out(x, self.weight, self.stride, self.padding) + self.bias
可以自行debug尝试,用此可以看出,一个卷积层的参数数量为。即一个卷积层为MyConv2D(16, 3, 3, 1, 0)或者为MyConv2D(16, 3, (3, 3), 1, 0),这个卷积层的参数为:16*3*3*3+16=448。
7.总体代码
# -*- coding: utf-8 -*-
"""
@Time : 2022/8/13 9:00
@Auth : Fanteng Meng
@File :conv_manual.py
@IDE :PyCharm
@Github : https://github.com/FT115
"""
import torch
from torch import nn
import math
def corr2d(X, K, stride=1, padding=0):
'''
:param X: (H, W)
:param K: (k_h, k_w)
:return: (Y_H, Y_W)
'''
H, W = X.shape
k_h, k_w = K.shape
Y_H = int(math.floor((H - k_h + 2 * padding) / stride + 1))
Y_W = int(math.floor((W - k_w + 2 * padding) / stride + 1))
if padding > 0:
new_X = torch.zeros((H + 2 * padding, W + 2 * padding))
new_X[padding: - padding, padding: -padding] = X
X2multi = new_X
if new_X.shape[0] % 2 == 0 and new_X.shape[1] % 2 != 0:
X2multi = new_X[0: -1, :]
if new_X.shape[1] % 2 == 0 and new_X.shape[0] % 2 != 0:
X2multi = new_X[:, 0: -1]
if new_X.shape[1] % 2 == 0 and new_X.shape[0] % 2 == 0:
X2multi = new_X[0: -1, 0: -1]
else:
X2multi = X
Y = torch.zeros(Y_H, Y_W)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
row_start = stride * i
colum_start = stride * j
Y[i, j] = (X2multi[row_start : row_start + k_h, colum_start : colum_start + k_w] * K).sum()
return Y
def corr2d_muti_in(X, K, stride=1, padding=0):
'''
:param X: (C_in, H, W)
:param K: (C_in, k_h, k_w)
:return: (Y_H, Y_W)
'''
res = corr2d(X[0, :, :], K[0, :, :], stride, padding)
for i in range(1, X.shape[0]):
res += corr2d(X[i, :, :], K[i, :, :], stride, padding)
return res
def corr2d_muti_in_out(X, K, stride=1, padding=0):
out = torch.stack([corr2d_muti_in(X, k, stride, padding) for k in K])
return out
class MyConv2D(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
super(MyConv2D, self).__init__()
if isinstance(kernel_size, int):
size = (out_channels, in_channels, kernel_size, kernel_size)
elif isinstance(kernel_size, tuple):
size = (out_channels, in_channels, kernel_size[0], kernel_size[1])
self.weight = nn.Parameter(torch.randn(size))
self.bias = nn.Parameter(torch.randn(out_channels, 1, 1))
self.stride = stride
self.padding = padding
def forward(self, x):
'''
:param x: (C_in, H, W)
:return: (C_out, Y_H, Y_W)
'''
return corr2d_muti_in_out(x, self.weight, self.stride, self.padding) + self.bias
记录自己学习到的知识的同时,希望对你有所帮助!