1、卷积算子 torch.nn.Conv2d
(1)原理:
假设输入图像input尺寸为4x4,元素矩阵为:
卷积核kernel尺寸为3x3,元素矩阵为:
步长strides =1,填充padding=0,即i=4,k=3,s=1,p=0,则按照卷积计算公式,输出图像output的尺寸为2x2。
(2)代码
import torch
import numpy as np
import cv2
import os
# conv
def conv():
randed_data=np.random.randn(1,3,8,8) #double
inputs=torch.from_numpy(randed_data.astype(np.float32)) #float
outputs=torch.nn.Conv2d(3,3,2,stride=2,padding=1)(inputs)
print("input:",inputs)
print("outputs:",outputs.shape)
print("outputs:",outputs)
if __name__=="__main__":
conv()
(3)底层分析
在初始化权重参数,采用kaiming的均匀线性采样赋值。
def reset_parameters(self) -> None:
# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
# uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size)
# For more details see: https://github.com/pytorch/pytorch/issues/15314#issuecomment-477448573
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
2、转置卷积算子torch.nn.ConvTranspose2d
(1)原理
把input的元素矩阵展开成一个列向量X:
把输出图像output的元素矩阵展开成一个列向量Y:
对于输入的元素矩阵X和输出的元素矩阵Y,用矩阵运算描述这个过程:
通过推导,我们可以得到稀疏矩阵C:
反卷积的操作就是对这个矩阵运算进行逆运算,即通过C和Y得到X,根据各个矩阵的尺寸大小,即为反卷积的操作:
反卷积的操作只是恢复矩阵X的尺寸大小,并不能恢复X的每个元素值。
(2)代码
import torch
import numpy as np
import cv2
import os
# conv transpose
def deconvation():
randed_data=np.random.randn(1,3,8,8) #double
inputs=torch.from_numpy(randed_data.astype(np.float32)) #float
outputs=torch.nn.ConvTranspose2d(3,3,2,stride=2,padding=1)(inputs)
print("input:",inputs)
print("outputs:",outputs.shape)
print("outputs:",outputs)
if __name__=="__main__":
deconvation()
(3)底层分析
在初始化权重参数,采用kaiming的均匀线性采样赋值。与卷积共同继承_ConvNd,采用相同初始化过程。
3、最大池化torch.nn.MaxPool2d
最大池化层输出形状计算:
class _MaxPoolNd(Module):
__constants__ = ['kernel_size', 'stride', 'padding', 'dilation',
'return_indices', 'ceil_mode']
return_indices: bool
ceil_mode: bool
def __init__(self, kernel_size: _size_any_t, stride: Optional[_size_any_t] = None,
padding: _size_any_t = 0, dilation: _size_any_t = 1,
return_indices: bool = False, ceil_mode: bool = False) -> None:
super(_MaxPoolNd, self).__init__()
self.kernel_size = kernel_size
self.stride = stride if (stride is not None) else kernel_size
self.padding = padding
self.dilation = dilation
self.return_indices = return_indices
self.ceil_mode = ceil_mode
def extra_repr(self) -> str:
return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \
', dilation={dilation}, ceil_mode={ceil_mode}'.format(**self.__dict__)
class MaxPool2d(_MaxPoolNd):
kernel_size: _size_2_t
stride: _size_2_t
padding: _size_2_t
dilation: _size_2_t
def forward(self, input: Tensor) -> Tensor:
return F.max_pool2d(input, self.kernel_size, self.stride,
self.padding, self.dilation, self.ceil_mode,
self.return_indices)
4、全连接层torch.nn.Linear()
import torch
import numpy as np
def linear():
randed_data=np.random.randn(128,20)
inputs=torch.from_numpy(randed_data.astype(np.float32))
m=torch.nn.Linear(20,30)
outputs=m(inputs)
print("m.weight.shape:",m.weight.shape) #torch.Size([30, 20])
print("m.bias.shape:",m.bias.shape) #torch.Size([30])
print("outputs.shape:",outputs.shape) #torch.Size([128, 30])
# 等价于
ans = torch.mm(inputs,m.weight.t())+m.bias
print("ans.shape:",ans.shape) #torch.Size([128, 30])
print(torch.equal(ans,outputs)) #True
if __name__=="__main__":
linear()
线性变换的公式: ,先生成一个(30,20)的weight,实际运算中再转置,就能和x做矩阵乘法。
底层分析:
class Linear(Module):
__constants__ = ['in_features', 'out_features']
in_features: int
out_features: int
weight: Tensor
def __init__(self, in_features: int, out_features: int, bias: bool = True,
device=None, dtype=None) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
if bias:
self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self) -> None:
# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
# uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
# https://github.com/pytorch/pytorch/issues/57109
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
init.uniform_(self.bias, -bound, bound)
def forward(self, input: Tensor) -> Tensor:
return F.linear(input, self.weight, self.bias)
def extra_repr(self) -> str:
return 'in_features={}, out_features={}, bias={}'.format(
self.in_features, self.out_features, self.bias is not None
)
5、torch.nn.BatchNorm2d
对输入batch的每一个特征通道进行标准化,采用公式如下:
y表示输出,x为输入,E(x)表示输入的均值,Var(x)为x的方差,默认值为1e-5,
为1,
为0.
torch.nn.BatchNorm2d对每一特征通道进行normalize,因此会计算出所有样本每一通道的均值和方差,以第一通道为例:
import torch
import numpy as np
def batchNormTest():
data=[
[[[141,138,143,144,145],[138,139,143,143,145],[135,137,138,137,146],[133,137,130,135,143]],
[[138,135,139,140,144],[135,136,139,139,144],[132,134,135,134,143],[130,134,127,132,140]],
[[109,106,113,114,141],[106,107,113,113,138],[111,113,116,115,135],[109,113,108,113,133]]],
[[[141,138,140,138,139],[141,140,141,142,141],[140,141,141,144,142],[141,143,143,144,144]],
[[138,135,137,134,135],[137,135,136,138,136],[137,136,136,139,137],[138,138,138,139,139]],
[[115,112,114,111,112],[116,115,116,117,115],[116,116,116,119,117],[117,118,118,119,119]]]
]
inputs=torch.from_numpy(np.array(data).astype(np.float32))
bn=torch.nn.BatchNorm2d(3)
out=bn(inputs)
print(out)
print(out[:,0,:,:])
mean_channel1=torch.mean(inputs[:,0,:,:])
var_channel1=torch.var(inputs[:,0,:,:],unbiased=False)
normed_result_channel1 = (inputs[:, 0, :, :]-mean_channel1)/((var_channel1+1e-5)**0.5)
print(normed_result_channel1)
if __name__=="__main__":
batchNormTest()