一、Pytorch版本
- 定义有如下参数:
- 输入:batch_size = 1, input_channel = 3, input_h = 10, input_w = 10
- 卷积层定义:input_channel = 3,output_channel = 5, kernel_size = 3x3,stride=2, dilation=2
- 代码如下:
import torch
import torch.nn as nn
x_torch = torch.randn(1, 3, 10, 10)
# m.shape (5, 3, 3, 3): output_channel, input_channel, kernel_h, kernel_w
m = nn.Conv2d(3, 5, 3, stride=2, dilation=2)
y_torch = m(x_torch)
print('torch output shape: ', y_torch.shape)
print('torch output: ', y_torch)
二、numpy版本
import numpy as np
class Conv2d:
def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=True, dilation=1):
self.input_channel = input_channel
self.output_channel = output_channel
self.stride = stride
self.padding = padding
self.dilation = (dilation, dilation) if isinstance(dilation, int) else dilation
self.kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
self.weight = np.random.randn(output_channel, input_channel, self.kernel_size[0], self.kernel_size[1])
self.bias = None
if bias:
self.bias = np.random.randn(output_channel)
def __call__(self, inputs):
return self.infer(inputs)
def infer(self, inputs):
# 根据参数,算出输出的shape
batch_size, input_channel, height, width = inputs.shape
output_h = (height + 2 * self.padding - self.dilation[0] * (self.kernel_size[0] - 1) - 1) // self.stride + 1
output_w = (width + 2 * self.padding - self.dilation[1] * (self.kernel_size[1] - 1) - 1) // self.stride + 1
outputs = np.zeros([batch_size, self.output_channel, output_h, output_w])
# 计算padding之后的inputs_array
inputs_padding = np.zeros([batch_size, input_channel, height + 2 * self.padding, width + 2 * self.padding])
inputs_padding[:, :, self.padding: self.padding + height, self.padding:self.padding + width] = inputs
# 如果有dilation,根据dilation之后的shape往kernel中插入0(注意,原self.weight不变)
dilation_shape = self.dilation[0] * (self.kernel_size[0] - 1) + 1, self.dilation[1] * (self.kernel_size[1] - 1) + 1
kernel = np.zeros((self.output_channel, input_channel, dilation_shape[0], dilation_shape[1]))
if self.dilation[0] > 1:
for i in range(self.kernel_size[0]):
for j in range(self.kernel_size[1]):
kernel[:, :, self.dilation[0] * i, self.dilation[1] * j] = self.weight[:, :, i, j]
else:
kernel = self.weight
# 开始前向计算
for h in range(output_h):
for w in range(output_w):
input_ = inputs_padding[
:,
:,
h * self.stride:h * self.stride + dilation_shape[0],
w * self.stride:w * self.stride + dilation_shape[1]
]
# input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
# kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)
# output shape: batch_size, output_channel
output = input_ * kernel_
output = np.sum(output, axis=(-1, -2, -3))
outputs[:, :, h, w] = output
if self.bias is not None:
bias_ = np.tile(self.bias.reshape(-1, 1), (1, output_h * output_w)).\
reshape(self.output_channel, output_h, output_w)
outputs += bias_
return outputs
三、验证结果
为了验证结果是否正确,把pytorch版本的Conv2d的参数传给numpy版本的Conv2d,完整代码如下:
import torch
import torch.nn as nn
import numpy as np
class Conv2d:
def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=True, dilation=1):
self.input_channel = input_channel
self.output_channel = output_channel
self.stride = stride
self.padding = padding
self.dilation = (dilation, dilation) if isinstance(dilation, int) else dilation
self.kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
self.weight = np.random.randn(output_channel, input_channel, self.kernel_size[0], self.kernel_size[1])
self.bias = None
if bias:
self.bias = np.random.randn(output_channel)
def __call__(self, inputs):
return self.infer(inputs)
def infer(self, inputs):
# 根据参数,算出输出的shape
batch_size, input_channel, height, width = inputs.shape
output_h = (height + 2 * self.padding - self.dilation[0] * (self.kernel_size[0] - 1) - 1) // self.stride + 1
output_w = (width + 2 * self.padding - self.dilation[1] * (self.kernel_size[1] - 1) - 1) // self.stride + 1
outputs = np.zeros([batch_size, self.output_channel, output_h, output_w])
# 计算padding之后的inputs_array
inputs_padding = np.zeros([batch_size, input_channel, height + 2 * self.padding, width + 2 * self.padding])
inputs_padding[:, :, self.padding: self.padding + height, self.padding:self.padding + width] = inputs
# 如果有dilation,根据dilation之后的shape往kernel中插入0(注意,原self.weight不变)
dilation_shape = self.dilation[0] * (self.kernel_size[0] - 1) + 1, self.dilation[1] * (self.kernel_size[1] - 1) + 1
kernel = np.zeros((self.output_channel, input_channel, dilation_shape[0], dilation_shape[1]))
if self.dilation[0] > 1:
for i in range(self.kernel_size[0]):
for j in range(self.kernel_size[1]):
kernel[:, :, self.dilation[0] * i, self.dilation[1] * j] = self.weight[:, :, i, j]
else:
kernel = self.weight
# 开始前向计算
for h in range(output_h):
for w in range(output_w):
input_ = inputs_padding[
:,
:,
h * self.stride:h * self.stride + dilation_shape[0],
w * self.stride:w * self.stride + dilation_shape[1]
]
# input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
# kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)
# output shape: batch_size, output_channel
output = input_ * kernel_
output = np.sum(output, axis=(-1, -2, -3))
outputs[:, :, h, w] = output
if self.bias is not None:
bias_ = np.tile(self.bias.reshape(-1, 1), (1, output_h * output_w)).\
reshape(self.output_channel, output_h, output_w)
outputs += bias_
return outputs
if __name__ == '__main__':
x_torch = torch.randn(1, 3, 10, 10)
# m.shape (5, 3, 3, 3): output_channel, input_channel, kernel_h, kernel_w
m = nn.Conv2d(3, 5, 3, stride=2, dilation=2)
y_torch = m(x_torch)
print('torch output shape: ', y_torch.shape)
print('torch output: ', y_torch)
# 把torch的输入转成numpy
x_np = np.array(x_torch)
m_np = Conv2d(3, 5, 3, stride=2, dilation=2)
# 为了比较效果,把torch的Conv2d参数传递给numpy的Conv2d
m_np.weight = m.weight.detach().numpy()
m_np.bias = m.bias.detach().numpy()
y_np = m_np(x_np)
print('numpy output shape: ', y_np.shape)
print('numpy output: ', y_np)
结果如下:
可以看到,两者的结果是一致的,验证成功。
总结
- 在代码的50行左右,这两句类似广播操作,把input_和kernel_扩展到batch_size和output_channel的维度,这样就不用遍历batch_size和output_channel这两个维度。
# input_ shape : batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
input_ = np.repeat(input_[:, np.newaxis, :, :, :], self.output_channel, axis=1)
# kernel_ shape: batch_size, output_channel, input_channel, dilation_shape[0], dilation_shape[1]
kernel_ = np.repeat(kernel[np.newaxis, :, :, :, :], batch_size, axis=0)
- pytorch的Conv2d中如果有空洞卷积(dilation > 1),并不会往原来的kernel插入0,例如原来的kernel_size = 3x3,dilation=2,卷积核大小还是3x3,不会变成5x5,只是计算的时候会从输入间隔取值。这里numpy的做法是根据原来weight,生成另一份用于计算的插零kernel,否则无法将torch版本的权重参数传递给numpy版本。
- 其他,还有一个参数group后续再补充。
(ps:代码应该还有可优化的地方)
结束。