【22-23春】AI作业7-卷积-CSDN博客

本文链接：https://blog.csdn.net/winter_poi1120/article/details/130520904

一、简单描述卷积、卷积核、多通道、特征图、特征选择概念。

卷积：卷积是指在滑动中提取特征的过程，是两个变量在某范围内相乘之后求和的结果，可以视为状态的叠加。
卷积核：图像处理时，给定输入图像，输入图像中一个小区域中像素加权平均后成为输出图像中每个对应像素，权值由一个函数定义，这个函数就是卷积核。又称为滤波器。
多通道：输入数据具有多个维度，输出特征图通常也具有多个通道。
特征图：Feature Map是输入图像经过神经网络卷积产生的结果，表征的是神经空间内一种特征，其分辨率的大小取决于先前卷积核的步长。
特征选择：特征选择是特征工程里的一个重要问题，其目标是寻找最优特征子集。特征选择能剔除掉不相关的或冗余的特征，从而达到减少特征个数、提高模型精确度，减少运行时间的目的。

二、探究不同卷积核的作用，研究背后的原理。

卷积分为：转置卷积、可分离卷积、扩张卷积/空洞卷积、可变形卷积。
不同大小的卷积核可以用于提取不同大小的特征。例如，较小的卷积核可以用于提取图像中的细节和局部特征，而较大的卷积核则可以用于提取图像中的整体特征和全局特征。
不同形状的卷积核也可以用于提取不同类型的特征。例如，卷积核可以是垂直的、水平的或对角线的，可以用于提取不同方向的特征。

三、编程实现：灰度图的边缘检测、锐化、模糊。

边缘检测：

import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from PIL import Image
import matplotlib.pyplot as plt
 
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
# https://blog.csdn.net/weixin_40123108/article/details/83510592
file_path = 'deer.jpg'
im = Image.open(file_path).convert('L')  # 读入一张灰度图的图片
im = np.array(im, dtype='float32')  # 将其转换为一个矩阵
print(im.shape[0], im.shape[1])
plt.imshow(im.astype('uint8'), cmap='gray')  # 可视化图片
plt.title('原图')
plt.show()
 
im = torch.from_numpy(im.reshape((1, 1, im.shape[0], im.shape[1])))
conv1 = nn.Conv2d(1, 1, 3, bias=False)  # 定义卷积
 
sobel_kernel = np.array([[-1, -1, -1],
                         [-1, 8, -1],
                         [-1, -1, -1]], dtype='float32')  # 定义轮廓检测算子
sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))  # 适配卷积的输入输出
conv1.weight.data = torch.from_numpy(sobel_kernel)  # 给卷积的 kernel 赋值
 
edge1 = conv1(Variable(im))  # 作用在图片上
 
x = edge1.data.squeeze().numpy()
print(x.shape)  # 输出大小
 
plt.imshow(x, cmap='gray')
plt.show()

锐化：

import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from PIL import Image
import matplotlib.pyplot as plt
 
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
# https://blog.csdn.net/weixin_40123108/article/details/83510592
file_path = 'deer.jpg'
im = Image.open(file_path).convert('L')  # 读入一张灰度图的图片
im = np.array(im, dtype='float32')  # 将其转换为一个矩阵
print(im.shape[0], im.shape[1])
plt.imshow(im.astype('uint8'), cmap='gray')  # 可视化图片
plt.title('原图')
plt.show()
 
im = torch.from_numpy(im.reshape((1, 1, im.shape[0], im.shape[1])))
conv1 = nn.Conv2d(1, 1, 3, bias=False)  # 定义卷积
 
sobel_kernel = np.array([[-1, -1, -1],
                         [-1, 8, -1],
                         [-1, -1, -1]], dtype='float32')  # 定义轮廓检测算子
sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))  # 适配卷积的输入输出
conv1.weight.data = torch.from_numpy(sobel_kernel)  # 给卷积的 kernel 赋值
 
edge1 = conv1(Variable(im))  # 作用在图片上

for i in range(edge1.shape[2]):
    for j in range(edge1.shape[3]):
        if edge1[0][0][i][j]>255:
            edge1[0][0][i][j]=255
        if edge1[0][0][i][j]<0:
            edge1[0][0][i][j]=0

x = edge1.data.squeeze().numpy()
print(x.shape)  # 输出大小
 
plt.imshow(x, cmap='gray')
plt.show()

模糊：

import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from PIL import Image
import matplotlib.pyplot as plt
 
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
# https://blog.csdn.net/weixin_40123108/article/details/83510592
file_path = 'deer.jpg'
im = Image.open(file_path).convert('L')  # 读入一张灰度图的图片
im = np.array(im, dtype='float32')  # 将其转换为一个矩阵
print(im.shape[0], im.shape[1])
plt.imshow(im.astype('uint8'), cmap='gray')  # 可视化图片
plt.title('原图')
plt.show()
 
im = torch.from_numpy(im.reshape((1, 1, im.shape[0], im.shape[1])))
conv1 = nn.Conv2d(1, 1, 3, bias=False)  # 定义卷积
 
sobel_kernel = np.array([[-1, -1, -1],
                         [-1, 8, -1],
                         [-1, -1, -1]], dtype='float32')  # 定义轮廓检测算子
sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))  # 适配卷积的输入输出
conv1.weight.data = torch.from_numpy(sobel_kernel)  # 给卷积的 kernel 赋值
 
edge1 = conv1(Variable(im))  # 作用在图片上
 
for i in range(edge1.shape[2]):
    for j in range(edge1.shape[3]):
        if edge1[0][0][i][j]>255:
            edge1[0][0][i][j]=255
        if edge1[0][0][i][j]<0:
            edge1[0][0][i][j]=0

x = edge1.data.squeeze().numpy()
print(x.shape)  # 输出大小
 
plt.imshow(x, cmap='gray')
plt.show()