实验十卷积神经网络（1）

最新推荐文章于 2024-10-05 19:19:55 发布

Simon•﹏•

最新推荐文章于 2024-10-05 19:19:55 发布

阅读量74

点赞数 2

文章标签： cnn 深度学习 pytorch

本文链接：https://blog.csdn.net/m0_62581697/article/details/134290065

版权

1、自定义二维卷积算子

import torch
import torch.nn as nn
class Conv2D(nn.Module):
    def __init__(self, kernel_size, weight_attr=torch.tensor([[0., 1.], [2., 3.]])):  # 类初始化，初始化权重属性为默认值,weight_attr为卷积核
        super(Conv2D, self).__init__()  # 继承torch.nn.Module中的Conv2D卷积算子
        self.weight = torch.nn.Parameter(weight_attr)

    def forward(self, X):
        u, v = self.weight.shape
        output = torch.zeros([X.shape[0], X.shape[1] - u + 1, X.shape[2] - v + 1])
        for i in range(output.shape[1]):
            for j in range(output.shape[2]):
                output[:, i, j] = torch.sum(X[:, i:i + u, j:j + v] * self.weight, dim=[1, 2])
        return output

# 随机构造一个二维输入矩阵
torch.manual_seed(100)
inputs = torch.tensor([[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]])

conv2d = Conv2D(kernel_size=2)
outputs = conv2d(inputs)
print("input: {}, \noutput: {}".format(inputs, outputs))

2、自定义带步长和零填充的二维卷积算子

import torch
import torch.nn as nn
class Conv2D(nn.Module):
    def __init__(self, kernel_size, stride=1, padding=0, weight_attr=False):
        super(Conv2D, self).__init__()
        if type(weight_attr) == bool:
            weight_attr = torch.ones(size=(kernel_size, kernel_size))
        self.weight = torch.nn.Parameter(weight_attr)
        # 步长
        self.stride = stride
        # 零填充
        self.padding = padding

    def forward(self, X):
        # 零填充
        new_X = torch.zeros([X.shape[0], X.shape[1] + 2 * self.padding, X.shape[2] + 2 * self.padding])
        new_X[:, self.padding:X.shape[1] + self.padding, self.padding:X.shape[2] + self.padding] = X
        u, v = self.weight.shape
        output_w = (new_X.shape[1] - u) // self.stride + 1
        output_h = (new_X.shape[2] - v) // self.stride + 1
        output = torch.zeros([X.shape[0], output_w, output_h])
        for i in range(0, output.shape[1]):
            for j in range(0, output.shape[2]):
                output[:, i, j] = torch.sum(
                    new_X[:, self.stride * i:self.stride * i + u, self.stride * j:self.stride * j + v] * self.weight,
                    dim=[1, 2])
        return output
inputs = torch.randn(size=[2, 8, 8])
conv2d_padding = Conv2D(kernel_size=3, padding=1, weight_attr=torch.zeros((3,3)))
outputs = conv2d_padding(inputs)
print("When kernel_size=3, padding=1 stride=1, input's shape: {}, output's shape: {}".format(inputs.shape, outputs.shape))
conv2d_stride = Conv2D(kernel_size=3, stride=2, padding=1)
outputs = conv2d_stride(inputs)
print("When kernel_size=3, padding=1 stride=2, input's shape: {}, output's shape: {}".format(inputs.shape, outputs.shape))

3、实现图像边缘检测

import torch
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import torch.nn as nn

class Conv2d(nn.Module):
    def __init__(self, kernel_size, stride=1, padding=1):  # 修改padding参数为1
        super(Conv2d, self).__init__()
        # 设置卷积核参数
        w = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32').reshape((3, 3))
        w = torch.from_numpy(w)
        self.weight = torch.nn.Parameter(w, requires_grad=True)
        self.stride = stride
        self.padding = padding

    def forward(self, X):
        # 零填充
        new_X = torch.nn.functional.pad(X, [self.padding, self.padding, self.padding, self.padding], mode='constant', value=0)  # 使用torch中的pad函数进行填充
        u, v = self.weight.shape
        output_w = (new_X.shape[1] - u) // self.stride + 1
        output_h = (new_X.shape[2] - v) // self.stride + 1
        output = torch.zeros([X.shape[0], output_w, output_h])
        for i in range(0, output.shape[1]):
            for j in range(0, output.shape[2]):
                output[:, i, j] = torch.sum(
                    new_X[:, self.stride * i:self.stride * i + u, self.stride * j:self.stride * j + v] * self.weight,
                    dim=[1, 2])
        return output

# 读取图片并转为灰度图
img = Image.open('C:\\Users\\hp\\Desktop\\Screenshot_2022_0603_085623.png').convert('L')
inputs = np.array(img, dtype='float32')

# 创建卷积算子，卷积核大小为3x3，并使用上面的设置好的数值作为卷积核权重的初始化参数
conv = Conv2d(kernel_size=3, stride=1, padding=1)  # 修改padding参数为1

# 将图片转为Tensor
inputs = torch.tensor(inputs)
inputs = torch.unsqueeze(inputs, dim=0)

# 进行卷积操作
outputs = conv(inputs)
nputs = np.array(img).astype('float32')
print("bf to_tensor, inputs:",inputs)
# 将图片转为Tensor
inputs = np.array(img, dtype='float32')
# 创建卷积算子，卷积核大小为3x3，并使用上面的设置好的数值作为卷积核权重的初始化参数
conv = Conv2d(kernel_size=3, stride=1, padding=0)
print("bf to_tensor, inputs:", inputs)
# 将图片转为Tensor
inputs = torch.tensor(inputs)
print("bf unsqueeze, inputs:", inputs)
inputs = torch.unsqueeze(inputs, dim=0)
print("af unsqueeze, inputs:", inputs)
outputs = conv(inputs)
print(outputs)
# 可视化结果
plt.figure(figsize=(8, 4))
f = plt.subplot(121)
f.set_title('input image', fontsize=15)
plt.imshow(img, cmap='gray')  # 显示灰度图
f = plt.subplot(122)
f.set_title('output feature map', fontsize=15)
plt.imshow(outputs.squeeze().detach().numpy(), cmap='gray')
plt.show()

4 自定义卷积层算子和汇聚层算子

自定义卷积层算子：

import torch
import numpy as np
import torch.nn as nn


class Conv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(Conv2D, self).__init__()
        # 创建卷积核
        weight = torch.zeros([out_channels, in_channels, kernel_size, kernel_size], dtype=torch.float32)
        weight = nn.init.constant_(weight, val=1.0)
        self.weight = nn.Parameter(weight)

        # 创建偏置
        bias = torch.zeros([out_channels, 1], dtype=torch.float32)
        self.bias = nn.init.constant_(bias, val=0.0)  # 值可调整
        self.bias = nn.Parameter(bias)

        # 步长
        self.stride = stride
        # 零填充
        self.padding = padding
        # 输入通道数
        self.in_channels = in_channels
        # 输出通道数
        self.out_channels = out_channels

    def single_forward(self, X, weight):
        """
        输入：
            - X：输入矩阵，shape=[B, M, N]，B为样本数量
        输出：
            - output：输出矩阵
        """
        new_X = torch.zeros([X.shape[0], X.shape[1] + 2 * self.padding, X.shape[2] + 2 * self.padding])  # 创建一个M'*N'的零矩阵
        new_X[:, self.padding:X.shape[1] + self.padding, self.padding:X.shape[2] + self.padding] = X  # 将原数据放回
        u, v = weight.shape
        output_w = (new_X.shape[1] - u) // self.stride + 1
        output_h = (new_X.shape[2] - v) // self.stride + 1
        output = torch.zeros([X.shape[0], output_w, output_h])
        for i in range(0, output.shape[1]):
            for j in range(0, output.shape[2]):
                output[:, i, j] = torch.sum(
                    new_X[:, self.stride * i:self.stride * i + u, self.stride * j:self.stride * j + v] * weight,
                    dim=[1, 2])
        return output

    def forward(self, inputs):
        """
        输入：
            - inputs：输入矩阵，shape=[B, D, M, N]
            - weights：P组二维卷积核，shape=[P, D, U, V]
            - bias：P个偏置，shape=[P, 1]
        """
        feature_maps = []
        # 进行多次多输入通道卷积运算
        p = 0
        for w, b in zip(self.weight, self.bias):  # P个(w,b),每次计算一个特征图Zp
            multi_outs = []
            # 循环计算每个输入特征图对应的卷积结果
            for i in range(self.in_channels):
                single = self.single_forward(inputs[:, i, :, :], w[i])
                multi_outs.append(single)
                # print("Conv2D in_channels:",self.in_channels,"i:",i,"single:",single.shape)
            # 将所有卷积结果相加
            feature_map = torch.sum(torch.stack(multi_outs), dim=0) + b  # Zp
            feature_maps.append(feature_map)
            # print("Conv2D out_channels:",self.out_channels, "p:",p,"feature_map:",feature_map.shape)
            p += 1
        # 将所有Zp进行堆叠
        out = torch.stack(feature_maps, 1)
        return out
# 调整输入数据格式为PyTorch张量
inputs = torch.tensor([[[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]])
conv2d = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=2)
print("inputs shape:",inputs.shape)
outputs = conv2d(inputs)
print("Conv2D outputs shape:",outputs.shape)

# 创建Conv2d对象
conv2d_torch = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=2)

# 设置weight和bias的值
conv2d_torch.weight = torch.nn.Parameter(torch.ones(3, 2, 2, 2))
conv2d_torch.bias = torch.nn.Parameter(torch.zeros(3))
outputs_torch = conv2d_torch(inputs)
# 自定义算子运算结果
print('Conv2D outputs:', outputs)
# torch API运算结果
print('nn.Conv2d outputs:', outputs_torch)

汇聚层算子

汇聚层的作用是进行特征选择，降低特征数量，从而减少参数数量。由于汇聚之后特征图会变得更小，如果后面连接的是全连接层，可以有效地减小神经元的个数，节省存储空间并提高计算效率。

常用的汇聚方法有两种，分别是：平均汇聚和最大汇聚。

平均汇聚：将输入特征图划分为2×22×2大小的区域，对每个区域内的神经元活性值取平均值作为这个区域的表示；
最大汇聚：使用输入特征图的每个子区域内所有神经元的最大活性值作为这个区域的表示。

图5.11 给出了两种汇聚层的示例。

图5.11：汇聚层

汇聚层输出的计算尺寸与卷积层一致，对于一个输入矩阵�∈��×�X∈RM×N和一个运算区域大小为�×�U×V的汇聚层，步长为�S，对输入矩阵进行零填充，那么最终输出矩阵大小则为

由于过大的采样区域会急剧减少神经元的数量，也会造成过多的信息丢失。目前，在卷积神经网络中比较典型的汇聚层是将每个输入特征图划分为2×22×2大小的不重叠区域，然后使用最大汇聚的方式进行下采样。

由于汇聚是使用某一位置的相邻输出的总体统计特征代替网络在该位置的输出，所以其好处是当输入数据做出少量平移时，经过汇聚运算后的大多数输出还能保持不变。比如：当识别一张图像是否是人脸时，我们需要知道人脸左边有一只眼睛，右边也有一只眼睛，而不需要知道眼睛的精确位置，这时候通过汇聚某一片区域的像素点来得到总体统计特征会显得很有用。这也就体现了汇聚层的平移不变特性。

汇聚层的参数量和计算量

由于汇聚层中没有参数，所以参数量为00；最大汇聚中，没有乘加运算，所以计算量为00，而平均汇聚中，输出特征图上每个点都对应了一次求平均运算。

import torch
import numpy as np
import torch.nn as nn


class Pool2D(nn.Module):
    def __init__(self, size=(2, 2), mode='max', stride=1):
        super(Pool2D, self).__init__()
        # 汇聚方式
        self.mode = mode
        self.h, self.w = size
        self.stride = stride

    def forward(self, x):
        output_w = (x.shape[2] - self.w) // self.stride + 1
        output_h = (x.shape[3] - self.h) // self.stride + 1
        output = torch.zeros([x.shape[0], x.shape[1], output_w, output_h])
        # 汇聚
        for i in range(output.shape[2]):
            for j in range(output.shape[3]):
                # 最大汇聚
                if self.mode == 'max':
                    value_m = max(torch.max(
                        x[:, :, self.stride * i:self.stride * i + self.w, self.stride * j:self.stride * j + self.h],
                        dim=3).values[0][0])
                    output[:, :, i, j] = torch.tensor(value_m)
                # 平均汇聚
                elif self.mode == 'avg':
                    output[:, :, i, j] = torch.mean(
                        x[:, :, self.stride * i:self.stride * i + self.w, self.stride * j:self.stride * j + self.h],
                        dim=[2, 3])

        return output


inputs = torch.tensor([[[[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.]]]])
pool2d = Pool2D(stride=2)
outputs = pool2d(inputs)
print("input: {}, \noutput: {}".format(inputs.shape, outputs.shape))

# 比较Maxpool2D与torch API运算结果
maxpool2d_torch = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
outputs_torch = maxpool2d_torch(inputs)
# 自定义算子运算结果
print('Maxpool2D outputs:', outputs)
# torch API运算结果
print('nn.Maxpool2D outputs:', outputs_torch)

# 比较Avgpool2D与torch API运算结果
avgpool2d_torch = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
outputs_torch = avgpool2d_torch(inputs)
pool2d = Pool2D(mode='avg', stride=2)
outputs = pool2d(inputs)
# 自定义算子运算结果
print('Avgpool2D outputs:', outputs)
# torch API运算结果
print('nn.Avgpool2D outputs:', outputs_torch)

5、学习torch.nn.Conv2d()、torch.nn.MaxPool2d()；torch.nn.avg_pool2d()，简要介绍使用方法。

1. torch.nn.Conv2d

torch.nn.Conv2d()是PyTorch中用于创建二维卷积层的函数，常用于图像处理任务。其主要参数包括输入通道数（in_channels）、输出通道数（out_channels）、卷积核大小（kernel_size）、步长（stride）、填充（padding）等。
通过实例化torch.nn.Conv2d()类并传入相应的参数，可以创建一个二维卷积层对象，然后可以通过调用该对象对输入数据进行卷积操作。

官方文档：torch.nn — PyTorch 2.1 documentation
CLASS torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
主要参数：

in_channels（int）：输入图像中的通道数
out_channels（int）：卷积产生的通道数
kernel_size（int或tuple）：卷积内核的大小
默认参数：

stride（int或tuple，可选）：卷积的步幅。默认值：1
padding（int，tuple或str，可选）：添加到输入的所有四边的填充。默认值：0
padding_mode（str，可选）：‘zeros’ 、 ‘reflect’ 、 ‘replicate’ 或 ‘circular’ 。默认值： ‘zeros’
dilation（int或tuple，可选）：内核元素之间的间距。默认值：1
groups（int，可选）：从输入通道到输出通道的阻塞连接数。默认值：1
bias（bool，可选）：如果 True ，则向输出添加可学习的偏置。默认值： True

2. torch.nn.MaxPool2d

torch.nn.MaxPool2d()用于创建二维最大池化层，常用于减小特征图尺寸和提取关键特征。主要参数包括池化核大小（kernel_size）、步长（stride）、填充（padding）等。
通过实例化torch.nn.MaxPool2d()类并传入相应的参数，可以创建一个二维最大池化层对象，然后可以通过调用该对象对输入数据进行最大池化操作。

官方文档：MaxPool2d — PyTorch 2.1 documentation
CLASS torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
参数 kernel_size 、 stride 、 padding 、 dilation 可以是：

单个 int-在这种情况下，高度和宽度尺寸使用相同的值
两个int的 tuple -在这种情况下，第一个int用于高度维度，第二个int用于宽度维度
主要参数：

kernel_size（Union[int，Tuple[int，int]]）-窗口的最大值
默认参数：

stride（Union[int，Tuple[int，int]]）-窗口的步幅。默认值为 kernel_size
padding（Union[int，Tuple[int，int]]）-要在两边添加的隐式负无穷大填充
dilation（Union[int，Tuple[int，int]]）-控制窗口中元素步幅的参数
return_indices（bool）-如果 True ，将返回最大索引沿着输出。 torch.nn.MaxUnpool2d 以后有用
ceil_mode（bool）-当为True时，将使用ceil而不是floor来计算输出形状

3.torch.nn.avg_pool2d()AvgPool2d — PyTorch 2.1 documentation

torch.nn.AvgPool2d()用于创建二维平均池化层，同样常用于减小特征图尺寸。其参数与torch.nn.MaxPool2d()类似，包括池化核大小（kernel_size）、步长（stride）、填充（padding）等。
通过实例化torch.nn.AvgPool2d()类并传入相应的参数，可以创建一个二维平均池化层对象，然后可以通过调用该对象对输入数据进行平均池化操作。

是一个用于二维平均池化的函数。它的输入和输出都是四维张量，分别表示batch_size、通道数、高度和宽度。它的参数包括池化核大小、步长、填充等。

举个例子

import torch
from torch import nn


class Pool2D(nn.Module):
    def __init__(self, size=(2, 2), mode='max', stride=1):
        super(Pool2D, self).__init__()
        # 汇聚方式
        self.mode = mode
        self.h, self.w = size
        self.stride = stride

    def forward(self, x):
        output_w = (x.shape[2] - self.w) // self.stride + 1
        output_h = (x.shape[3] - self.h) // self.stride + 1
        output = torch.zeros([x.shape[0], x.shape[1], output_w, output_h])
        # 汇聚
        for i in range(output.shape[2]):
            for j in range(output.shape[3]):
                # 最大汇聚
                if self.mode == 'max':
                    value_m = max(torch.max(
                        x[:, :, self.stride * i:self.stride * i + self.w, self.stride * j:self.stride * j + self.h],
                        dim=3).values[0][0])
                    output[:, :, i, j] = torch.as_tensor(value_m)
                # 平均汇聚
                elif self.mode == 'avg':
                    value_m = max(torch.mean(
                        x[:, :, self.stride * i:self.stride * i + self.w, self.stride * j:self.stride * j + self.h],
                        dim=3)[0][0])
                    output[:, :, i, j] = torch.as_tensor(value_m)

        return output


# 实现一个简单汇聚层
inputs = torch.tensor([[[[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.]]]])
pool2d = Pool2D(stride=2)
outputs = pool2d(inputs)
print("input: {}, \noutput: {}".format(inputs.shape, outputs.shape))
# 自定义算子上述代码已经实现，下面比较Maxpool2D与torch API运算结果
maxpool2d_torch = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
outputs_torch = maxpool2d_torch(inputs)
# 自定义算子运算结果
print('Maxpool2D outputs:\n', outputs)
# torch API运算结果
print('nn.Maxpool2D outputs:\n', outputs_torch)

# 比较Avgpool2D与torch API运算结果
avgpool2d_torch = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
outputs_torch = avgpool2d_torch(inputs)
pool2d = Pool2D(mode='avg', stride=2)
outputs = pool2d(inputs)
# 自定义算子运算结果
print('Avgpool2D outputs:\n', outputs)
# torch API运算结果
print('nn.Avgpool2D outputs:\n', outputs_torch)

6、分别用自定义卷积算子和torch.nn.Conv2d()编程实现下面的卷积运算

torch.nn.Conv2d()

import torch
import torch.nn as nn

# 比较与paddle API运算结果
conv2d = nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, padding=1, stride=2)
x = torch.tensor([[[[-1, 1, 0], [0, 1, 0], [0, 1, 1]], [[-1, -1, 0], [0, 0, 0], [0, -1, 0]] , [[0, 0, -1] , [0, 1, 0], [1, -1, -1]]],[[[1, 1, -1],[-1, -1, 1],[0, -1, 1]],[[0, 1, 0],[-1, 0, -1],[-1, 1, 0]],[[-1, 0, 0],[-1, 0, 1],[-1, 0, 0]]]], dtype=torch.float32)
inputs=torch.tensor([[[0, 1, 1, 0, 2], [2, 2, 2, 2, 1], [1, 0, 0, 2, 0], [0, 1, 1, 0, 0], [1, 2, 0, 0, 2]]
                                , [[1, 0, 2, 2, 0], [0, 0, 0, 2, 0], [1, 2, 1, 2, 1], [1, 0, 0, 0, 0], [1, 2, 1, 1, 1]],
                             [[2, 1, 2, 0, 0], [1, 0, 0, 1, 0], [0, 2, 1, 0, 1], [0, 1, 2, 2, 2], [2, 1, 0, 0, 1]]],dtype=torch.float32)
conv2d.weight = nn.Parameter(x)
conv2d.bias = nn.Parameter((torch.tensor([1.0, 0.0])).float())

outputs= conv2d(inputs)
print('nn.Conv2D outputs:', outputs)

自定义

import torch.nn as nn
import torch


class Conv2D(nn.Module):
    def __init__(self, in_channels, Kernel, out_channels, kernel_size, stride=1, padding=0):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(Kernel)

        # 创建偏置
        self.bias = nn.Parameter(torch.tensor([1, 0], dtype=torch.float32))

        # 步长
        self.stride = stride
        # 零填充
        self.padding = padding
        # 输入通道数
        self.in_channels = in_channels
        # 输出通道数
        self.out_channels = out_channels

    def single_forward(self, X, weight):
        """
        输入：
            - X：输入矩阵，shape=[B, M, N]，B为样本数量
        输出：
            - output：输出矩阵
        """
        new_X = torch.zeros([X.shape[0], X.shape[1] + 2 * self.padding, X.shape[2] + 2 * self.padding])  # 创建一个M'*N'的零矩阵
        new_X[:, self.padding:X.shape[1] + self.padding, self.padding:X.shape[2] + self.padding] = X  # 将原数据放回
        u, v = weight.shape
        output_w = (new_X.shape[1] - u) // self.stride + 1
        output_h = (new_X.shape[2] - v) // self.stride + 1
        output = torch.zeros([X.shape[0], output_w, output_h])
        for i in range(0, output.shape[1]):
            for j in range(0, output.shape[2]):
                output[:, i, j] = torch.sum(
                    new_X[:, self.stride * i:self.stride * i + u, self.stride * j:self.stride * j + v] * weight,
                    dim=[1, 2])
        return output

    def forward(self, inputs):
        """
        输入：
            - inputs：输入矩阵，shape=[B, D, M, N]
            - weights：P组二维卷积核，shape=[P, D, U, V]
            - bias：P个偏置，shape=[P, 1]
        """
        feature_maps = []
        # 进行多次多输入通道卷积运算
        p = 0
        for w, b in zip(self.weight, self.bias):  # P个(w,b),每次计算一个特征图Zp
            multi_outs = []
            # 循环计算每个输入特征图对应的卷积结果
            for i in range(self.in_channels):
                single = self.single_forward(inputs[:, i, :, :], w[i])
                multi_outs.append(single)
                # print("Conv2D in_channels:",self.in_channels,"i:",i,"single:",single.shape)
            # 将所有卷积结果相加
            feature_map = torch.sum(torch.stack(multi_outs), dim=0) + b  # Zp
            feature_maps.append(feature_map)
            # print("Conv2D out_channels:",self.out_channels, "p:",p,"feature_map:",feature_map.shape)
            p += 1
        # 将所有Zp进行堆叠
        out = torch.stack(feature_maps, 1)

        return out


x = torch.tensor([
    [[0, 1, 1, 0, 2],
     [2, 2, 2, 2, 1],
     [1, 0, 0, 2, 0],
     [0, 1, 1, 0, 0],
     [1, 2, 0, 0, 2]],
    [[1, 0, 2, 2, 0],
     [0, 0, 0, 2, 0],
     [1, 2, 1, 2, 1],
     [1, 0, 0, 0, 0],
     [1, 2, 1, 1, 1]],
    [[2, 1, 2, 0, 0],
     [1, 0, 0, 1, 0],
     [0, 2, 1, 0, 1],
     [0, 1, 2, 2, 2],
     [2, 1, 0, 0, 1]]], dtype=torch.float32).reshape([1, 3, 5, 5])
Kernel = torch.tensor([
    [[[-1, 1, 0],
      [0, 1, 0],
      [0, 1, 1]],
     [[-1, -1, 0],
      [0, 0, 0],
      [0, -1, 0]],
     [[0, 0, -1],
      [0, 1, 0],
      [1, -1, -1]]],
    [[[1, 1, -1],
      [-1, -1, 1],
      [0, -1, 1]],
     [[0, 1, 0],
      [-1, 0, -1],
      [-1, 1, 0]],
     [[-1, 0, 0],
      [-1, 0, 1],
      [-1, 0, 0]]]], dtype=torch.float32).reshape([2, 3, 3, 3])
conv2d = Conv2D(in_channels=3, Kernel=Kernel, out_channels=2, kernel_size=3, padding=1, stride=2)
outputs = conv2d(x)
print(outputs)

总结：

1、本次实验比之前的难，主要是学习卷积层的一些知识，像自定义卷积算子、自定义步长和零填充还完成了边缘检测任务，并比较torch.nn.Conved和自定义的区别。

2、卷积核是卷积操作的核心，它决定了卷积操作提取特征的方式。在实际应用中，选择合适的卷积核非常重要。一般来说，根据任务的特点和数据集的特性来选择卷积核是比较好的实践。就拿我做边缘检测的实验来说，整了好久，因为我用彩色图片做的第一遍，然后原图变成绿色了，难受，我就先转成灰度值，没改参数就一直报错，后来我又尝试改变参数才解决。所以，卷积核的参数设置特别重要

3、torch.nn.Conv2d里的一些新学的

dilation（int或tuple，可选）：内核元素之间的间距。默认值：1
groups（int，可选）：从输入通道到输出通道的阻塞连接数。默认值：1
bias（bool，可选）：如果 True ，则向输出添加可学习的偏置。默认值： True

4、MaxPool2d里的新学的

dilation（Union[int，Tuple[int，int]]）-控制窗口中元素步幅的参数
return_indices（bool）-如果 True ，将返回最大索引沿着输出。 torch.nn.MaxUnpool2d 以后有用
ceil_mode（bool）-当为True时，将使用ceil而不是floor来计算输出形状

参考文章

AvgPool2d — PyTorch 2.1 documentation

Pytorch学习：卷积神经网络—nn.Conv2d、nn.MaxPool2d、nn.ReLU、nn.Linear和nn.Dropout_緈福的街口的博客-CSDN博客

HBU-NNDL 实验六卷积神经网络（1）卷积_np.array(img).astype('float32')_不是蒋承翰的博客-CSDN博客