NNDL 作业6：基于CNN的XO识别

最新推荐文章于 2023-03-21 21:15:42 发布

_Gypsophila___

最新推荐文章于 2023-03-21 21:15:42 发布

阅读量59

点赞数 1

文章标签： 1024程序员节

本文链接：https://blog.csdn.net/weixin_73486730/article/details/127487618

版权

一、实现卷积-池化-激活

1. Numpy版本：手工实现卷积-池化-激活

2. Pytorch版本：调用函数实现卷积-池化-激活

一、实现卷积-池化-激活

1. Numpy版本：手工实现卷积-池化-激活

自定义卷积算子、池化算子实现

import numpy as np

# 初始化一张X图片矩阵
x = np.array([[-1, -1, -1, -1, -1, -1, -1, -1, -1],
              [-1, 1, -1, -1, -1, -1, -1, 1, -1],
              [-1, -1, 1, -1, -1, -1, 1, -1, -1],
              [-1, -1, -1, 1, -1, 1, -1, -1, -1],
              [-1, -1, -1, -1, 1, -1, -1, -1, -1],
              [-1, -1, -1, 1, -1, 1, -1, -1, -1],
              [-1, -1, 1, -1, -1, -1, 1, -1, -1],
              [-1, 1, -1, -1, -1, -1, -1, 1, -1],
              [-1, -1, -1, -1, -1, -1, -1, -1, -1]])
# 输出原矩阵
print("x=\n", x)
# 初始化 三个 卷积核
Kernel = [[0 for i in range(0, 3)] for j in range(0, 3)]
# 参考图卷积核1
Kernel[0] = np.array([[1, -1, -1],
                      [-1, 1, -1],
                      [-1, -1, 1]])
# 参考图卷积核2
Kernel[1] = np.array([[1, -1, 1],
                      [-1, 1, -1],
                      [1, -1, 1]])
# 参考图卷积核3
Kernel[2] = np.array([[-1, -1, 1],
                      [-1, 1, -1],
                      [1, -1, -1]])

#卷积 
stride = 1  # 步长
feature_map_h = 7  # 特征图的高
feature_map_w = 7  # 特征图的宽
feature_map = [0 for i in range(0, 3)]  # 初始化3个特征图
for i in range(0, 3):
    feature_map[i] = np.zeros((feature_map_h, feature_map_w))  # 初始化特征图
for h in range(feature_map_h):  # 向下滑动，得到卷积后的固定行
    for w in range(feature_map_w):  # 向右滑动，得到卷积后的固定行的列
        v_start = h * stride  # 滑动窗口的起始行（高）
        v_end = v_start + 3  # 滑动窗口的结束行（高）
        h_start = w * stride  # 滑动窗口的起始列（宽）
        h_end = h_start + 3  # 滑动窗口的结束列（宽）
        window = x[v_start:v_end, h_start:h_end]  # 从图切出一个滑动窗口
        for i in range(0, 3):
            feature_map[i][h, w] = np.divide(np.sum(np.multiply(window, Kernel[i][:, :])), 9)
print("feature_map:\n", np.around(feature_map, decimals=2))

#池化
pooling_stride = 2  # 步长
pooling_h = 4  # 特征图的高
pooling_w = 4  # 特征图的宽
feature_map_pad_0 = [[0 for i in range(0, 8)] for j in range(0, 8)]
for i in range(0, 3):  # 特征图 补 0 ，行 列 都要加 1 (因为上一层是奇数，池化窗口用的偶数)
    feature_map_pad_0[i] = np.pad(feature_map[i], ((0, 1), (0, 1)), 'constant', constant_values=(0, 0))
# print("feature_map_pad_0 0:\n", np.around(feature_map_pad_0[0], decimals=2))

pooling = [0 for i in range(0, 3)]
for i in range(0, 3):
    pooling[i] = np.zeros((pooling_h, pooling_w))  # 初始化特征图
for h in range(pooling_h):  # 向下滑动，得到卷积后的固定行
    for w in range(pooling_w):  # 向右滑动，得到卷积后的固定行的列
        v_start = h * pooling_stride  # 滑动窗口的起始行（高）
        v_end = v_start + 2  # 滑动窗口的结束行（高）
        h_start = w * pooling_stride  # 滑动窗口的起始列（宽）
        h_end = h_start + 2  # 滑动窗口的结束列（宽）
        for i in range(0, 3):
            pooling[i][h, w] = np.max(feature_map_pad_0[i][v_start:v_end, h_start:h_end])
print("pooling:\n", np.around(pooling[0], decimals=2))
print("pooling:\n", np.around(pooling[1], decimals=2))
print("pooling:\n", np.around(pooling[2], decimals=2))


#激活
def relu(x):
    return (abs(x) + x) / 2


relu_map_h = 7  # 特征图的高
relu_map_w = 7  # 特征图的宽
relu_map = [0 for i in range(0, 3)]  # 初始化3个特征图
for i in range(0, 3):
    relu_map[i] = np.zeros((relu_map_h, relu_map_w))  # 初始化特征图

for i in range(0, 3):
    relu_map[i] = relu(feature_map[i])

print("relu map :\n", np.around(relu_map[0], decimals=2))
print("relu map :\n", np.around(relu_map[1], decimals=2))
print("relu map :\n", np.around(relu_map[2], decimals=2))

运行结果：

x=
 [[-1 -1 -1 -1 -1 -1 -1 -1 -1]
 [-1  1 -1 -1 -1 -1 -1  1 -1]
 [-1 -1  1 -1 -1 -1  1 -1 -1]
 [-1 -1 -1  1 -1  1 -1 -1 -1]
 [-1 -1 -1 -1  1 -1 -1 -1 -1]
 [-1 -1 -1  1 -1  1 -1 -1 -1]
 [-1 -1  1 -1 -1 -1  1 -1 -1]
 [-1  1 -1 -1 -1 -1 -1  1 -1]
 [-1 -1 -1 -1 -1 -1 -1 -1 -1]]
feature_map:
 [[[ 0.78 -0.11  0.11  0.33  0.56 -0.11  0.33]
  [-0.11  1.   -0.11  0.33 -0.11  0.11 -0.11]
  [ 0.11 -0.11  1.   -0.33  0.11 -0.11  0.56]
  [ 0.33  0.33 -0.33  0.56 -0.33  0.33  0.33]
  [ 0.56 -0.11  0.11 -0.33  1.   -0.11  0.11]
  [-0.11  0.11 -0.11  0.33 -0.11  1.   -0.11]
  [ 0.33 -0.11  0.56  0.33  0.11 -0.11  0.78]]

 [[ 0.33 -0.56  0.11 -0.11  0.11 -0.56  0.33]
  [-0.56  0.56 -0.56  0.33 -0.56  0.56 -0.56]
  [ 0.11 -0.56  0.56 -0.78  0.56 -0.56  0.11]
  [-0.11  0.33 -0.78  1.   -0.78  0.33 -0.11]
  [ 0.11 -0.56  0.56 -0.78  0.56 -0.56  0.11]
  [-0.56  0.56 -0.56  0.33 -0.56  0.56 -0.56]
  [ 0.33 -0.56  0.11 -0.11  0.11 -0.56  0.33]]

 [[ 0.33 -0.11  0.56  0.33  0.11 -0.11  0.78]
  [-0.11  0.11 -0.11  0.33 -0.11  1.   -0.11]
  [ 0.56 -0.11  0.11 -0.33  1.   -0.11  0.11]
  [ 0.33  0.33 -0.33  0.56 -0.33  0.33  0.33]
  [ 0.11 -0.11  1.   -0.33  0.11 -0.11  0.56]
  [-0.11  1.   -0.11  0.33 -0.11  0.11 -0.11]
  [ 0.78 -0.11  0.11  0.33  0.56 -0.11  0.33]]]
pooling:
 [[1.   0.33 0.56 0.33]
 [0.33 1.   0.33 0.56]
 [0.56 0.33 1.   0.11]
 [0.33 0.56 0.11 0.78]]
pooling:
 [[0.56 0.33 0.56 0.33]
 [0.33 1.   0.56 0.11]
 [0.56 0.56 0.56 0.11]
 [0.33 0.11 0.11 0.33]]
pooling:
 [[0.33 0.56 1.   0.78]
 [0.56 0.56 1.   0.33]
 [1.   1.   0.11 0.56]
 [0.78 0.33 0.56 0.33]]
relu map :
 [[0.78 0.   0.11 0.33 0.56 0.   0.33]
 [0.   1.   0.   0.33 0.   0.11 0.  ]
 [0.11 0.   1.   0.   0.11 0.   0.56]
 [0.33 0.33 0.   0.56 0.   0.33 0.33]
 [0.56 0.   0.11 0.   1.   0.   0.11]
 [0.   0.11 0.   0.33 0.   1.   0.  ]
 [0.33 0.   0.56 0.33 0.11 0.   0.78]]
relu map :
 [[0.33 0.   0.11 0.   0.11 0.   0.33]
 [0.   0.56 0.   0.33 0.   0.56 0.  ]
 [0.11 0.   0.56 0.   0.56 0.   0.11]
 [0.   0.33 0.   1.   0.   0.33 0.  ]
 [0.11 0.   0.56 0.   0.56 0.   0.11]
 [0.   0.56 0.   0.33 0.   0.56 0.  ]
 [0.33 0.   0.11 0.   0.11 0.   0.33]]
relu map :
 [[0.33 0.   0.56 0.33 0.11 0.   0.78]
 [0.   0.11 0.   0.33 0.   1.   0.  ]
 [0.56 0.   0.11 0.   1.   0.   0.11]
 [0.33 0.33 0.   0.56 0.   0.33 0.33]
 [0.11 0.   1.   0.   0.11 0.   0.56]
 [0.   1.   0.   0.33 0.   0.11 0.  ]
 [0.78 0.   0.11 0.33 0.56 0.   0.33]]

Process finished with exit code -1073741749 (0xC000004B)

2. Pytorch版本：调用函数实现卷积-池化-激活

调用框架自带算子实现，对比自定义算子

import torch
import torch.nn as nn

x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, -1, -1, 1, -1, -1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)

print("卷积")
conv1 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
                                    [-1, 1, -1],
                                    [-1, -1, 1]]
                                   ]])
conv2 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, 1]]
                                   ]])
conv3 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, -1]]
                                   ]])

feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)

print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)

print("池化")
max_pool = nn.MaxPool2d(2, padding=0, stride=2)  # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1))  # pad 0 , Left Right Up Down

feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)

print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)

print("激活")
activation_function = nn.ReLU()

feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)

torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
          [-1.,  1., -1., -1., -1., -1., -1.,  1., -1.],
          [-1., -1.,  1., -1., -1., -1.,  1., -1., -1.],
          [-1., -1., -1.,  1., -1.,  1., -1., -1., -1.],
          [-1., -1., -1., -1.,  1., -1., -1., -1., -1.],
          [-1., -1., -1.,  1., -1.,  1., -1., -1., -1.],
          [-1., -1.,  1., -1., -1., -1.,  1., -1., -1.],
          [-1.,  1., -1., -1., -1., -1., -1.,  1., -1.],
          [-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
卷积
tensor([[[[ 0.7787, -0.1102,  0.1120,  0.3343,  0.5565, -0.1102,  0.3343],
          [-0.1102,  1.0009, -0.1102,  0.3343, -0.1102,  0.1120, -0.1102],
          [ 0.1120, -0.1102,  1.0009, -0.3324,  0.1120, -0.1102,  0.5565],
          [ 0.3343,  0.3343, -0.3324,  0.5565, -0.3324,  0.3343,  0.3343],
          [ 0.5565, -0.1102,  0.1120, -0.3324,  1.0009, -0.1102,  0.1120],
          [-0.1102,  0.1120, -0.1102,  0.3343, -0.1102,  1.0009, -0.1102],
          [ 0.3343, -0.1102,  0.5565,  0.3343,  0.1120, -0.1102,  0.7787]]]],
       grad_fn=<DivBackward0>)
tensor([[[[ 0.3359, -0.5530,  0.1137, -0.1085,  0.1137, -0.5530,  0.3359],
          [-0.5530,  0.5581, -0.5530,  0.3359, -0.5530,  0.5581, -0.5530],
          [ 0.1137, -0.5530,  0.5581, -0.7752,  0.5581, -0.5530,  0.1137],
          [-0.1085,  0.3359, -0.7752,  1.0026, -0.7752,  0.3359, -0.1085],
          [ 0.1137, -0.5530,  0.5581, -0.7752,  0.5581, -0.5530,  0.1137],
          [-0.5530,  0.5581, -0.5530,  0.3359, -0.5530,  0.5581, -0.5530],
          [ 0.3359, -0.5530,  0.1137, -0.1085,  0.1137, -0.5530,  0.3359]]]],
       grad_fn=<DivBackward0>)
tensor([[[[ 0.2992, -0.1452,  0.5214,  0.2992,  0.0770, -0.1452,  0.7436],
          [-0.1452,  0.0770, -0.1452,  0.2992, -0.1452,  0.9659, -0.1452],
          [ 0.5214, -0.1452,  0.0770, -0.3675,  0.9659, -0.1452,  0.0770],
          [ 0.2992,  0.2992, -0.3675,  0.5214, -0.3675,  0.2992,  0.2992],
          [ 0.0770, -0.1452,  0.9659, -0.3675,  0.0770, -0.1452,  0.5214],
          [-0.1452,  0.9659, -0.1452,  0.2992, -0.1452,  0.0770, -0.1452],
          [ 0.7436, -0.1452,  0.0770,  0.2992,  0.5214, -0.1452,  0.2992]]]],
       grad_fn=<DivBackward0>)
池化
torch.Size([1, 1, 4, 4])
tensor([[[[1.0009, 0.3343, 0.5565, 0.3343],
          [0.3343, 1.0009, 0.3343, 0.5565],
          [0.5565, 0.3343, 1.0009, 0.1120],
          [0.3343, 0.5565, 0.1120, 0.7787]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5581, 0.3359, 0.5581, 0.3359],
          [0.3359, 1.0026, 0.5581, 0.1137],
          [0.5581, 0.5581, 0.5581, 0.1137],
          [0.3359, 0.1137, 0.1137, 0.3359]]]], grad_fn=<DivBackward0>)
tensor([[[[0.2992, 0.5214, 0.9659, 0.7436],
          [0.5214, 0.5214, 0.9659, 0.2992],
          [0.9659, 0.9659, 0.0770, 0.5214],
          [0.7436, 0.2992, 0.5214, 0.2992]]]], grad_fn=<DivBackward0>)
激活
tensor([[[[0.7787, 0.0000, 0.1120, 0.3343, 0.5565, 0.0000, 0.3343],
          [0.0000, 1.0009, 0.0000, 0.3343, 0.0000, 0.1120, 0.0000],
          [0.1120, 0.0000, 1.0009, 0.0000, 0.1120, 0.0000, 0.5565],
          [0.3343, 0.3343, 0.0000, 0.5565, 0.0000, 0.3343, 0.3343],
          [0.5565, 0.0000, 0.1120, 0.0000, 1.0009, 0.0000, 0.1120],
          [0.0000, 0.1120, 0.0000, 0.3343, 0.0000, 1.0009, 0.0000],
          [0.3343, 0.0000, 0.5565, 0.3343, 0.1120, 0.0000, 0.7787]]]],
       grad_fn=<DivBackward0>)
tensor([[[[0.3359, 0.0000, 0.1137, 0.0000, 0.1137, 0.0000, 0.3359],
          [0.0000, 0.5581, 0.0000, 0.3359, 0.0000, 0.5581, 0.0000],
          [0.1137, 0.0000, 0.5581, 0.0000, 0.5581, 0.0000, 0.1137],
          [0.0000, 0.3359, 0.0000, 1.0026, 0.0000, 0.3359, 0.0000],
          [0.1137, 0.0000, 0.5581, 0.0000, 0.5581, 0.0000, 0.1137],
          [0.0000, 0.5581, 0.0000, 0.3359, 0.0000, 0.5581, 0.0000],
          [0.3359, 0.0000, 0.1137, 0.0000, 0.1137, 0.0000, 0.3359]]]],
       grad_fn=<DivBackward0>)
tensor([[[[0.2992, 0.0000, 0.5214, 0.2992, 0.0770, 0.0000, 0.7436],
          [0.0000, 0.0770, 0.0000, 0.2992, 0.0000, 0.9659, 0.0000],
          [0.5214, 0.0000, 0.0770, 0.0000, 0.9659, 0.0000, 0.0770],
          [0.2992, 0.2992, 0.0000, 0.5214, 0.0000, 0.2992, 0.2992],
          [0.0770, 0.0000, 0.9659, 0.0000, 0.0770, 0.0000, 0.5214],
          [0.0000, 0.9659, 0.0000, 0.2992, 0.0000, 0.0770, 0.0000],
          [0.7436, 0.0000, 0.0770, 0.2992, 0.5214, 0.0000, 0.2992]]]],
       grad_fn=<DivBackward0>)

Process finished with exit code -1073741749 (0xC000004B)

3. 可视化：了解数字与图像之间的关系

可视化卷积核和特征图

import torch
import torch.nn as nn
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, -1, -1, 1, -1, -1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
img = x.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('原图')
plt.show()

print("卷积")
conv1 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
                                    [-1, 1, -1],
                                    [-1, -1, 1]]
                                   ]])
img = conv1.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 1)
plt.imshow(img, cmap='gray')
plt.title('Kernel 1')
conv2 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, 1]]
                                   ]])
img = conv2.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 5)
plt.imshow(img, cmap='gray')
plt.title('Kernel 2')
conv3 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, -1]]
                                   ]])
img = conv3.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 9)
plt.imshow(img, cmap='gray')
plt.title('Kernel 3')

feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)

print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)

img = feature_map1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 2)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图1')
img2 = feature_map2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 6)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图2')
img3 = feature_map3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 10)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图3')

print("池化")
max_pool = nn.MaxPool2d(2, padding=0, stride=2)  # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1))  # pad 0 , Left Right Up Down

feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)

print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
img = feature_pool_1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 3)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图1')
img2 = feature_pool_2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 7)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图2')
img3 = feature_pool_3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 11)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图3')

print("激活")
activation_function = nn.ReLU()

feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
img = feature_relu1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 4)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图1')
img2 = feature_relu2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 8)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图2')
img3 = feature_relu3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(3, 4, 12)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图3')
plt.show()

torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
          [-1.,  1., -1., -1., -1., -1., -1.,  1., -1.],
          [-1., -1.,  1., -1., -1., -1.,  1., -1., -1.],
          [-1., -1., -1.,  1., -1.,  1., -1., -1., -1.],
          [-1., -1., -1., -1.,  1., -1., -1., -1., -1.],
          [-1., -1., -1.,  1., -1.,  1., -1., -1., -1.],
          [-1., -1.,  1., -1., -1., -1.,  1., -1., -1.],
          [-1.,  1., -1., -1., -1., -1., -1.,  1., -1.],
          [-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
卷积
tensor([[[[ 0.7974, -0.0915,  0.1308,  0.3530,  0.5752, -0.0915,  0.3530],
          [-0.0915,  1.0197, -0.0915,  0.3530, -0.0915,  0.1308, -0.0915],
          [ 0.1308, -0.0915,  1.0197, -0.3137,  0.1308, -0.0915,  0.5752],
          [ 0.3530,  0.3530, -0.3137,  0.5752, -0.3137,  0.3530,  0.3530],
          [ 0.5752, -0.0915,  0.1308, -0.3137,  1.0197, -0.0915,  0.1308],
          [-0.0915,  0.1308, -0.0915,  0.3530, -0.0915,  1.0197, -0.0915],
          [ 0.3530, -0.0915,  0.5752,  0.3530,  0.1308, -0.0915,  0.7974]]]],
       grad_fn=<DivBackward0>)
tensor([[[[ 0.3236, -0.5652,  0.1014, -0.1208,  0.1014, -0.5652,  0.3236],
          [-0.5652,  0.5459, -0.5652,  0.3236, -0.5652,  0.5459, -0.5652],
          [ 0.1014, -0.5652,  0.5459, -0.7875,  0.5459, -0.5652,  0.1014],
          [-0.1208,  0.3236, -0.7875,  0.9903, -0.7875,  0.3236, -0.1208],
          [ 0.1014, -0.5652,  0.5459, -0.7875,  0.5459, -0.5652,  0.1014],
          [-0.5652,  0.5459, -0.5652,  0.3236, -0.5652,  0.5459, -0.5652],
          [ 0.3236, -0.5652,  0.1014, -0.1208,  0.1014, -0.5652,  0.3236]]]],
       grad_fn=<DivBackward0>)
tensor([[[[ 0.3383, -0.1062,  0.5605,  0.3383,  0.1160, -0.1062,  0.7827],
          [-0.1062,  0.1160, -0.1062,  0.3383, -0.1062,  1.0049, -0.1062],
          [ 0.5605, -0.1062,  0.1160, -0.3284,  1.0049, -0.1062,  0.1160],
          [ 0.3383,  0.3383, -0.3284,  0.5605, -0.3284,  0.3383,  0.3383],
          [ 0.1160, -0.1062,  1.0049, -0.3284,  0.1160, -0.1062,  0.5605],
          [-0.1062,  1.0049, -0.1062,  0.3383, -0.1062,  0.1160, -0.1062],
          [ 0.7827, -0.1062,  0.1160,  0.3383,  0.5605, -0.1062,  0.3383]]]],
       grad_fn=<DivBackward0>)
池化
torch.Size([1, 1, 4, 4])
tensor([[[[1.0197, 0.3530, 0.5752, 0.3530],
          [0.3530, 1.0197, 0.3530, 0.5752],
          [0.5752, 0.3530, 1.0197, 0.1308],
          [0.3530, 0.5752, 0.1308, 0.7974]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5459, 0.3236, 0.5459, 0.3236],
          [0.3236, 0.9903, 0.5459, 0.1014],
          [0.5459, 0.5459, 0.5459, 0.1014],
          [0.3236, 0.1014, 0.1014, 0.3236]]]], grad_fn=<DivBackward0>)
tensor([[[[0.3383, 0.5605, 1.0049, 0.7827],
          [0.5605, 0.5605, 1.0049, 0.3383],
          [1.0049, 1.0049, 0.1160, 0.5605],
          [0.7827, 0.3383, 0.5605, 0.3383]]]], grad_fn=<DivBackward0>)
激活
tensor([[[[0.7974, 0.0000, 0.1308, 0.3530, 0.5752, 0.0000, 0.3530],
          [0.0000, 1.0197, 0.0000, 0.3530, 0.0000, 0.1308, 0.0000],
          [0.1308, 0.0000, 1.0197, 0.0000, 0.1308, 0.0000, 0.5752],
          [0.3530, 0.3530, 0.0000, 0.5752, 0.0000, 0.3530, 0.3530],
          [0.5752, 0.0000, 0.1308, 0.0000, 1.0197, 0.0000, 0.1308],
          [0.0000, 0.1308, 0.0000, 0.3530, 0.0000, 1.0197, 0.0000],
          [0.3530, 0.0000, 0.5752, 0.3530, 0.1308, 0.0000, 0.7974]]]],
       grad_fn=<DivBackward0>)
tensor([[[[0.3236, 0.0000, 0.1014, 0.0000, 0.1014, 0.0000, 0.3236],
          [0.0000, 0.5459, 0.0000, 0.3236, 0.0000, 0.5459, 0.0000],
          [0.1014, 0.0000, 0.5459, 0.0000, 0.5459, 0.0000, 0.1014],
          [0.0000, 0.3236, 0.0000, 0.9903, 0.0000, 0.3236, 0.0000],
          [0.1014, 0.0000, 0.5459, 0.0000, 0.5459, 0.0000, 0.1014],
          [0.0000, 0.5459, 0.0000, 0.3236, 0.0000, 0.5459, 0.0000],
          [0.3236, 0.0000, 0.1014, 0.0000, 0.1014, 0.0000, 0.3236]]]],
       grad_fn=<DivBackward0>)
tensor([[[[0.3383, 0.0000, 0.5605, 0.3383, 0.1160, 0.0000, 0.7827],
          [0.0000, 0.1160, 0.0000, 0.3383, 0.0000, 1.0049, 0.0000],
          [0.5605, 0.0000, 0.1160, 0.0000, 1.0049, 0.0000, 0.1160],
          [0.3383, 0.3383, 0.0000, 0.5605, 0.0000, 0.3383, 0.3383],
          [0.1160, 0.0000, 1.0049, 0.0000, 0.1160, 0.0000, 0.5605],
          [0.0000, 1.0049, 0.0000, 0.3383, 0.0000, 0.1160, 0.0000],
          [0.7827, 0.0000, 0.1160, 0.3383, 0.5605, 0.0000, 0.3383]]]],
       grad_fn=<DivBackward0>)

Process finished with exit code -1073741749 (0xC000004B)

二、基于CNN的XO识别

1. 数据集

QQ群内下载的数据集没有分测试集和训练集。

共2000张图片，X、O各1000张。

从X、O文件夹，分别取出150张作为测试集。

文件夹train_data：放置训练集 1700张图片

文件夹test_data：放置测试集 300张图片

2. 构建模型

import torch
import torch.nn as nn
 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)
 
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)
 
    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

3. 训练模型

import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim



transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])

path = r'C:\Users\栖\Desktop\data_sm\test_data'
path_test = r'C:\Users\栖\Desktop\data_sm\train_data'

data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)

print("size of train_data:", len(data_train))
print("size of test_data:", len(data_test))

data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
model = Net()

criterion = torch.nn.CrossEntropyLoss()  # 损失函数 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.1)  # 优化函数：随机梯度下降

epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(data_loader):
        images, label = data
        out = model(images)
        loss = criterion(out, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i + 1) % 10 == 0:
            print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('finished train')

# 保存模型
torch.save(model, 'model_name.pth')  # 保存的是模型， 不止是w和b权重值

4. 测试训练好的模型

# 读取模型
model_load = torch.load('model_name.pth')
# 读取一张图片 images[0]，测试
print("label[0] truth:\t", label[0])
x = images[0]
x = x.reshape([1, 1, 116, 116])
predicted = torch.max(model_load(x), 1)
print("label[0] predict:\t", predicted.indices)

img = images[0].data.squeeze().numpy()  # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.show()

5. 计算模型的准确率

# 读取模型
model_load = Net()
model_load.load_state_dict(torch.load('model_name1.pth'))
correct = 0
total = 0
with torch.no_grad():  # 进行评测的时候网络不更新梯度
    for data in data_loader_test:  # 读取测试集
        images, labels = data
        outputs = model_load(images)
        _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
        total += labels.size(0)  # labels 的长度
        correct += (predicted == labels).sum().item()  # 预测正确的数目
print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))

6. 查看训练好的模型的特征图

import torch
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader

#  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])
path = r'C:\Users\栖\Desktop\data_sm\train_data'
data_train = datasets.ImageFolder(path, transform=transforms)
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
        self.fc2 = nn.Linear(1200, 64)  # full connect 2
        self.fc3 = nn.Linear(64, 2)  # full connect 3

    def forward(self, x):
        outputs = []
        x = self.conv1(x)
        outputs.append(x)
        x = self.relu(x)
        outputs.append(x)
        x = self.maxpool(x)
        outputs.append(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return outputs


# create model
model1 = Net()

# load model weights加载预训练权重
model_weight_path = "model_name1.pth"
model1.load_state_dict(torch.load(model_weight_path))

# 打印出模型的结构
print(model1)
x = images[0]
x = x.reshape([1, x.shape[0], x.shape[1], x.shape[2]])
# forward正向传播过程
out_put = model1(x)
for feature_map in out_put:
    im = np.squeeze(feature_map.detach().numpy())
    im = np.transpose(im, [1, 2, 0])
    print(im.shape)

    plt.figure()
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(im[:, :, i], cmap='gray')
    plt.show()

7. 查看训练好的模型的卷积核

import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
 
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
#  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])
path = r'training_data_sm'
data_train = datasets.ImageFolder(path, transform=transforms)
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
    images, labels = data
    # print(images.shape)
    # print(labels.shape)
    break
 
 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride
 
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
        self.fc2 = nn.Linear(1200, 64)  # full connect 2
        self.fc3 = nn.Linear(64, 2)  # full connect 3
 
    def forward(self, x):
        outputs = []
        x = self.maxpool(self.relu(self.conv1(x)))
        # outputs.append(x)
        x = self.maxpool(self.relu(self.conv2(x)))
        outputs.append(x)
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return outputs
 
 
# create model
model1 = Net()
 
# load model weights加载预训练权重
model_weight_path = "model_name1.pth"
model1.load_state_dict(torch.load(model_weight_path))
 
x = images[0]
 
# forward正向传播过程
out_put = model1(x)
 
weights_keys = model1.state_dict().keys()
for key in weights_keys:
    print("key :", key)
    # 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
    if key == "conv1.weight":
        weight_t = model1.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, 0, :, :]  # 获取第一个卷积核的信息参数
        # show 9 kernel ,1 channel
        plt.figure()
 
        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
            plt.imshow(k[i, :, :], cmap='gray')
            title_name = 'kernel' + str(i) + ',channel1'
            plt.title(title_name)
        plt.show()
 
    if key == "conv2.weight":
        weight_t = model1.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, :, :, :]  # 获取第一个卷积核的信息参数
        print(k.shape)
        print(k)
 
        plt.figure()
        for c in range(9):
            channel = k[:, c, :, :]
            for i in range(5):
                ax = plt.subplot(2, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
                plt.imshow(channel[i, :, :], cmap='gray')
                title_name = 'kernel' + str(i) + ',channel' + str(c)
                plt.title(title_name)
            plt.show()

8. 训练模型源代码

import torch
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
 
transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])
 
path = r'train_data'
path_test = r'test_data'
 
data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)
 
print("size of train_data:",len(data_train))
print("size of test_data:",len(data_test))
 
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
 
for i, data in enumerate(data_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break
 
for i, data in enumerate(data_loader_test):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break
 
 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride
 
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
        self.fc2 = nn.Linear(1200, 64)  # full connect 2
        self.fc3 = nn.Linear(64, 2)  # full connect 3
 
    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x
 
 
model = Net()
 
criterion = torch.nn.CrossEntropyLoss()  # 损失函数 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.1)  # 优化函数：随机梯度下降
 
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(data_loader):
        images, label = data
        out = model(images)
        loss = criterion(out, label)
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
 
        running_loss += loss.item()
        if (i + 1) % 10 == 0:
            print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
 
print('finished train')
 
# 保存模型 torch.save(model.state_dict(), model_path)
torch.save(model.state_dict(), 'model_name1.pth')  # 保存的是模型， 不止是w和b权重值
 
# 读取模型
model = torch.load('model_name1.pth')

9. 测试模型源代码

# https://blog.csdn.net/qq_53345829/article/details/124308515
import torch
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
 
transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])
 
path = r'train_data'
path_test = r'test_data'
 
data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)
 
print("size of train_data:", len(data_train))
print("size of test_data:", len(data_test))
 
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
print(len(data_loader))
print(len(data_loader_test))
 
 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride
 
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
        self.fc2 = nn.Linear(1200, 64)  # full connect 2
        self.fc3 = nn.Linear(64, 2)  # full connect 3
 
    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x
 
# 读取模型
model = Net()
model.load_state_dict(torch.load('model_name1.pth', map_location='cpu')) # 导入网络的参数
 
# model_load = torch.load('model_name1.pth')
# https://blog.csdn.net/qq_41360787/article/details/104332706
 
correct = 0
total = 0
with torch.no_grad():  # 进行评测的时候网络不更新梯度
    for data in data_loader_test:  # 读取测试集
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
        total += labels.size(0)  # labels 的长度
        correct += (predicted == labels).sum().item()  # 预测正确的数目
print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))
 
#  "_," 的解释 https://blog.csdn.net/weixin_48249563/article/details/111387501

总结：本次作业学会不同版本实现卷积-池化-激活和实现图片数据集的识别基于CNN的XO识别中间提取并可视化了一些训练过程更好地理解了图片卷积后的尺寸变化在CNN中神经网络的层次和结构都是不固定的可以调整的可根据训练数据集的不同，去修改全连接层或者参数。

ref:

https://blog.csdn.net/qq_38975453/article/details/127181162

卷积-池化-激活_HBU_David的博客-CSDN博客

CNN实现XO识别_HBU_David的博客-CSDN博客