NNDL 作业6：基于CNN的XO识别

不是蒋承翰

于 2022-10-22 23:30:06 发布

阅读量354

点赞数 4

本文链接：https://blog.csdn.net/m0_57215376/article/details/127435207

版权

一、实现卷积-池化-激活

1. Numpy版本：手工实现卷积-池化-激活

自定义卷积算子、池化算子实现

import numpy as np
import matplotlib.pyplot as plt

x = np.array([[-1, -1, -1, -1, -1, -1, -1, -1, -1],
              [-1, 1, -1, -1, -1, -1, -1, 1, -1],
              [-1, -1, 1, -1, -1, -1, 1, -1, -1],
              [-1, -1, -1, 1, -1, 1, -1, -1, -1],
              [-1, -1, -1, -1, 1, -1, -1, -1, -1],
              [-1, -1, -1, 1, -1, 1, -1, -1, -1],
              [-1, -1, 1, -1, -1, -1, 1, -1, -1],
              [-1, 1, -1, -1, -1, -1, -1, 1, -1],
              [-1, -1, -1, -1, -1, -1, -1, -1, -1]])
print("x=\n", x)
plt.imshow(x,cmap='gray')
plt.title('Original')
plt.show()
# 初始化 三个 卷积核
Kernel = [[0 for i in range(0, 3)] for j in range(0, 3)]
Kernel[0] = np.array([[1, -1, -1],
                      [-1, 1, -1],
                      [-1, -1, 1]])
Kernel[1] = np.array([[1, -1, 1],
                      [-1, 1, -1],
                      [1, -1, 1]])
Kernel[2] = np.array([[-1, -1, 1],
                      [-1, 1, -1],
                      [1, -1, -1]])

# --------------- 卷积  ---------------
stride = 1  # 步长
feature_map_h = 7  # 特征图的高
feature_map_w = 7  # 特征图的宽
feature_map = [0 for i in range(0, 3)]  # 初始化3个特征图
for i in range(0, 3):
    feature_map[i] = np.zeros((feature_map_h, feature_map_w))  # 初始化特征图
for h in range(feature_map_h):  # 向下滑动，得到卷积后的固定行
    for w in range(feature_map_w):  # 向右滑动，得到卷积后的固定行的列
        v_start = h * stride  # 滑动窗口的起始行（高）
        v_end = v_start + 3  # 滑动窗口的结束行（高）
        h_start = w * stride  # 滑动窗口的起始列（宽）
        h_end = h_start + 3  # 滑动窗口的结束列（宽）
        window = x[v_start:v_end, h_start:h_end]  # 从图切出一个滑动窗口
        for i in range(0, 3):
            feature_map[i][h, w] = np.divide(np.sum(np.multiply(window, Kernel[i][:, :])), 9)
print("feature_map:\n", np.around(feature_map, decimals=2))
plt.subplot(131)
plt.imshow(np.around(feature_map, decimals=2)[0],cmap='gray')
plt.title('feature map-1')
plt.subplot(132)
plt.imshow(np.around(feature_map, decimals=2)[1],cmap='gray')
plt.title('feature map-2')
plt.subplot(133)
plt.imshow(np.around(feature_map, decimals=2)[2],cmap='gray')
plt.title('feature map-3')
plt.show()


# --------------- 池化  ---------------
pooling_stride = 2  # 步长
pooling_h = 4  # 特征图的高
pooling_w = 4  # 特征图的宽
feature_map_pad_0 = [[0 for i in range(0, 8)] for j in range(0, 8)]
for i in range(0, 3):  # 特征图 补 0 ，行 列 都要加 1 (因为上一层是奇数，池化窗口用的偶数)
    feature_map_pad_0[i] = np.pad(feature_map[i], ((0, 1), (0, 1)), 'constant', constant_values=(0, 0))
# print("feature_map_pad_0 0:\n", np.around(feature_map_pad_0[0], decimals=2))

pooling = [0 for i in range(0, 3)]
for i in range(0, 3):
    pooling[i] = np.zeros((pooling_h, pooling_w))  # 初始化特征图
for h in range(pooling_h):  # 向下滑动，得到卷积后的固定行
    for w in range(pooling_w):  # 向右滑动，得到卷积后的固定行的列
        v_start = h * pooling_stride  # 滑动窗口的起始行（高）
        v_end = v_start + 2  # 滑动窗口的结束行（高）
        h_start = w * pooling_stride  # 滑动窗口的起始列（宽）
        h_end = h_start + 2  # 滑动窗口的结束列（宽）
        for i in range(0, 3):
            pooling[i][h, w] = np.max(feature_map_pad_0[i][v_start:v_end, h_start:h_end])
print("pooling:\n", np.around(pooling[0], decimals=2))
plt.subplot(131)
plt.imshow(np.around(pooling[0], decimals=2),cmap='gray')
plt.title('pooling-1')
print("pooling:\n", np.around(pooling[1], decimals=2))
plt.subplot(132)
plt.imshow(np.around(pooling[1], decimals=2),cmap='gray')
plt.title('pooling-2')
print("pooling:\n", np.around(pooling[2], decimals=2))
plt.subplot(133)
plt.imshow(np.around(pooling[2], decimals=2),cmap='gray')
plt.title('pooling-3')
plt.show()

# --------------- 激活  ---------------
def relu(x):
    return (abs(x) + x) / 2


relu_map_h = 7  # 特征图的高
relu_map_w = 7  # 特征图的宽
relu_map = [0 for i in range(0, 3)]  # 初始化3个特征图
for i in range(0, 3):
    relu_map[i] = np.zeros((relu_map_h, relu_map_w))  # 初始化特征图

for i in range(0, 3):
    relu_map[i] = relu(feature_map[i])
plt.figure()
print("relu map :\n", np.around(relu_map[0], decimals=2))
plt.subplot(131)
plt.imshow(np.around(relu_map[0], decimals=2),cmap='gray')
plt.title('relu map-1')
print("relu map :\n", np.around(relu_map[1], decimals=2))
plt.subplot(132)
plt.imshow(np.around(relu_map[1], decimals=2),cmap='gray')
plt.title('relu map-2')
print("relu map :\n", np.around(relu_map[2], decimals=2))
plt.subplot(133)
plt.imshow(np.around(relu_map[2], decimals=2),cmap='gray')
plt.title('relu map-3')
plt.show()

x=
[[-1 -1 -1 -1 -1 -1 -1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 -1 -1 1 -1 -1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 -1 -1 -1 -1 -1 -1 -1]]
feature_map:
[[[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]]

[[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.11 0.33 -0.78 1. -0.78 0.33 -0.11]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]]

[[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]]]
pooling:
[[1. 0.33 0.56 0.33]
[0.33 1. 0.33 0.56]
[0.56 0.33 1. 0.11]
[0.33 0.56 0.11 0.78]]
pooling:
[[0.56 0.33 0.56 0.33]
[0.33 1. 0.56 0.11]
[0.56 0.56 0.56 0.11]
[0.33 0.11 0.11 0.33]]
pooling:
[[0.33 0.56 1. 0.78]
[0.56 0.56 1. 0.33]
[1. 1. 0.11 0.56]
[0.78 0.33 0.56 0.33]]
relu map :
[[0.78 0. 0.11 0.33 0.56 0. 0.33]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.33 0. 0.56 0.33 0.11 0. 0.78]]
relu map :
[[0.33 0. 0.11 0. 0.11 0. 0.33]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.33 0. 1. 0. 0.33 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.33 0. 0.11 0. 0.11 0. 0.33]]
relu map :
[[0.33 0. 0.56 0.33 0.11 0. 0.78]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.78 0. 0.11 0.33 0.56 0. 0.33]]

2. Pytorch版本：调用函数实现卷积-池化-激活

调用框架自带算子实现，对比自定义算子

import numpy as np
import torch
import torch.nn as nn

x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, -1, -1, 1, -1, -1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)

print("--------------- 卷积  ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
                                    [-1, 1, -1],
                                    [-1, -1, 1]]
                                   ]])
conv2 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, 1]]
                                   ]])
conv3 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, -1]]
                                   ]])

feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)

print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)

print("--------------- 池化  ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2)  # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1))  # pad 0 , Left Right Up Down

feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)

print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)

print("--------------- 激活  ---------------")
activation_function = nn.ReLU()

feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)

torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., -1., -1., 1., -1., -1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
--------------- 卷积 ---------------
tensor([[[[ 0.7419, -0.1470, 0.0752, 0.2975, 0.5197, -0.1470, 0.2975],
[-0.1470, 0.9641, -0.1470, 0.2975, -0.1470, 0.0752, -0.1470],
[ 0.0752, -0.1470, 0.9641, -0.3692, 0.0752, -0.1470, 0.5197],
[ 0.2975, 0.2975, -0.3692, 0.5197, -0.3692, 0.2975, 0.2975],
[ 0.5197, -0.1470, 0.0752, -0.3692, 0.9641, -0.1470, 0.0752],
[-0.1470, 0.0752, -0.1470, 0.2975, -0.1470, 0.9641, -0.1470],
[ 0.2975, -0.1470, 0.5197, 0.2975, 0.0752, -0.1470, 0.7419]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3256, -0.5633, 0.1034, -0.1188, 0.1034, -0.5633, 0.3256],
[-0.5633, 0.5479, -0.5633, 0.3256, -0.5633, 0.5479, -0.5633],
[ 0.1034, -0.5633, 0.5479, -0.7855, 0.5479, -0.5633, 0.1034],
[-0.1188, 0.3256, -0.7855, 0.9923, -0.7855, 0.3256, -0.1188],
[ 0.1034, -0.5633, 0.5479, -0.7855, 0.5479, -0.5633, 0.1034],
[-0.5633, 0.5479, -0.5633, 0.3256, -0.5633, 0.5479, -0.5633],
[ 0.3256, -0.5633, 0.1034, -0.1188, 0.1034, -0.5633, 0.3256]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3637, -0.0808, 0.5859, 0.3637, 0.1414, -0.0808, 0.8081],
[-0.0808, 0.1414, -0.0808, 0.3637, -0.0808, 1.0303, -0.0808],
[ 0.5859, -0.0808, 0.1414, -0.3030, 1.0303, -0.0808, 0.1414],
[ 0.3637, 0.3637, -0.3030, 0.5859, -0.3030, 0.3637, 0.3637],
[ 0.1414, -0.0808, 1.0303, -0.3030, 0.1414, -0.0808, 0.5859],
[-0.0808, 1.0303, -0.0808, 0.3637, -0.0808, 0.1414, -0.0808],
[ 0.8081, -0.0808, 0.1414, 0.3637, 0.5859, -0.0808, 0.3637]]]],
grad_fn=<DivBackward0>)
--------------- 池化 ---------------
torch.Size([1, 1, 4, 4])
tensor([[[[0.9641, 0.2975, 0.5197, 0.2975],
[0.2975, 0.9641, 0.2975, 0.5197],
[0.5197, 0.2975, 0.9641, 0.0752],
[0.2975, 0.5197, 0.0752, 0.7419]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5479, 0.3256, 0.5479, 0.3256],
[0.3256, 0.9923, 0.5479, 0.1034],
[0.5479, 0.5479, 0.5479, 0.1034],
[0.3256, 0.1034, 0.1034, 0.3256]]]], grad_fn=<DivBackward0>)
tensor([[[[0.3637, 0.5859, 1.0303, 0.8081],
[0.5859, 0.5859, 1.0303, 0.3637],
[1.0303, 1.0303, 0.1414, 0.5859],
[0.8081, 0.3637, 0.5859, 0.3637]]]], grad_fn=<DivBackward0>)
--------------- 激活 ---------------
tensor([[[[0.7419, 0.0000, 0.0752, 0.2975, 0.5197, 0.0000, 0.2975],
[0.0000, 0.9641, 0.0000, 0.2975, 0.0000, 0.0752, 0.0000],
[0.0752, 0.0000, 0.9641, 0.0000, 0.0752, 0.0000, 0.5197],
[0.2975, 0.2975, 0.0000, 0.5197, 0.0000, 0.2975, 0.2975],
[0.5197, 0.0000, 0.0752, 0.0000, 0.9641, 0.0000, 0.0752],
[0.0000, 0.0752, 0.0000, 0.2975, 0.0000, 0.9641, 0.0000],
[0.2975, 0.0000, 0.5197, 0.2975, 0.0752, 0.0000, 0.7419]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3256, 0.0000, 0.1034, 0.0000, 0.1034, 0.0000, 0.3256],
[0.0000, 0.5479, 0.0000, 0.3256, 0.0000, 0.5479, 0.0000],
[0.1034, 0.0000, 0.5479, 0.0000, 0.5479, 0.0000, 0.1034],
[0.0000, 0.3256, 0.0000, 0.9923, 0.0000, 0.3256, 0.0000],
[0.1034, 0.0000, 0.5479, 0.0000, 0.5479, 0.0000, 0.1034],
[0.0000, 0.5479, 0.0000, 0.3256, 0.0000, 0.5479, 0.0000],
[0.3256, 0.0000, 0.1034, 0.0000, 0.1034, 0.0000, 0.3256]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3637, 0.0000, 0.5859, 0.3637, 0.1414, 0.0000, 0.8081],
[0.0000, 0.1414, 0.0000, 0.3637, 0.0000, 1.0303, 0.0000],
[0.5859, 0.0000, 0.1414, 0.0000, 1.0303, 0.0000, 0.1414],
[0.3637, 0.3637, 0.0000, 0.5859, 0.0000, 0.3637, 0.3637],
[0.1414, 0.0000, 1.0303, 0.0000, 0.1414, 0.0000, 0.5859],
[0.0000, 1.0303, 0.0000, 0.3637, 0.0000, 0.1414, 0.0000],
[0.8081, 0.0000, 0.1414, 0.3637, 0.5859, 0.0000, 0.3637]]]],
grad_fn=<DivBackward0>)

3. 可视化：了解数字与图像之间的关系

可视化卷积核和特征图

# https://blog.csdn.net/qq_26369907/article/details/88366147
# https://zhuanlan.zhihu.com/p/405242579
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况，需要u'内容
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, -1, -1, 1, -1, -1, -1, -1],
                    [-1, -1, -1, 1, -1, 1, -1, -1, -1],
                    [-1, -1, 1, -1, -1, -1, 1, -1, -1],
                    [-1, 1, -1, -1, -1, -1, -1, 1, -1],
                    [-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
img = x.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('原图')
plt.show()

print("--------------- 卷积  ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
                                    [-1, 1, -1],
                                    [-1, -1, 1]]
                                   ]])
img = conv1.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.figure()
plt.subplot(131)
plt.imshow(img, cmap='gray')
plt.title('Kernel 1')
conv2 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, 1]]
                                   ]])
img = conv2.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(132)
plt.imshow(img, cmap='gray')
plt.title('Kernel 2')

conv3 = nn.Conv2d(1, 1, (3, 3), 1)  # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
                                    [-1, 1, -1],
                                    [1, -1, -1]]
                                   ]])
img = conv3.weight.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(133)
plt.imshow(img, cmap='gray')
plt.title('Kernel 3')
plt.show()

feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)

print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)
plt.figure()
img = feature_map1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(131)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图1')
img = feature_map2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(132)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图2')
img = feature_map3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(133)
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图3')
plt.show()

print("--------------- 池化  ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2)  # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1))  # pad 0 , Left Right Up Down

feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)

print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
img = feature_pool_1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.figure()
plt.subplot(131)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图1')
img = feature_pool_2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(132)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图2')
img = feature_pool_3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(133)
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图3')
plt.show()

print("--------------- 激活  ---------------")
activation_function = nn.ReLU()

feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
plt.figure()
img = feature_relu1.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(131)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图1')
img = feature_relu2.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(132)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图2')
img = feature_relu3.data.squeeze().numpy()  # 将输出转换为图片的格式
plt.subplot(133)
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图3')
plt.show()

二、基于CNN的XO识别

1. 数据集

from torch.utils.data import DataLoader
from torchvision import transforms, datasets

transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])

data_train = datasets.ImageFolder('train_data', transforms)
data_test = datasets.ImageFolder('test_data', transforms)

train_loader=DataLoader(data_train,batch_size=64,shuffle=True)
test_loader=DataLoader(data_test,batch_size=64,shuffle=True)
for i, data in enumerate(train_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break

for i, data in enumerate(test_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break

torch.Size([64, 1, 116, 116])
torch.Size([64])
torch.Size([64, 1, 116, 116])
torch.Size([64])

可视化一下数据集

import matplotlib.pyplot as plt
a=0
plt.figure()
index=0
for i in labels:
    if i == 0 and a<5:
        plt.subplot(151+a)
        plt.imshow(images[index].data.squeeze().numpy(),cmap='gray')
        plt.title('circle '+str(a+1))
        a+=1
    if a==5:
        break
    index+=1
plt.show()
a=0
plt.figure()
index=0
for i in labels:
    if i == 1 and a<5:
        plt.subplot(151+a)
        plt.imshow(images[index].data.squeeze().numpy(),cmap='gray')
        plt.title('crosses '+str(a+1))
        a+=1
    if a==5:
        break
    index+=1
plt.show()

2. 构建模型

import torch.nn as nn


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)
        self.relu = nn.ReLU()

        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

关于全连接层的展平操作，第一次卷积后size应该为116-3+1=114，第一次池化后size为114/2=57，第二次卷积size=57-3+1=55，第二次池化55/2=27.5，这个27.5应该算27还是28呢？

老师在课上讲解的例子中，遇到奇数size时，应该向上取整，如下图所示

但要是在pytorch中，展平大小为28*28*5的话，就会报错，RuntimeError: shape '[-1, 3920]' is invalid for input of size 233280，看来nn.MaxPool2d函数应该是向下取整，所以是27*27*5。

3、训练模型

model = CNN()

loss = nn.CrossEntropyLoss()
opti = torch.optim.SGD(model.parameters(), lr=0.1)

epochs = 10

for epoch in range(epochs):
    total_loss = 0
    for i, data in enumerate(train_loader):
        images, labels = data
        out = model(images)
        one_loss = loss(out, labels)
        opti.zero_grad()
        one_loss.backward()
        opti.step()
        total_loss += one_loss
        if (i + 1) % 10 == 0:
            print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, total_loss / 100))
            total_loss = 0.0

print('finished train')

# 保存模型
torch.save(model, 'model.pth')  # 保存的是模型， 不止是w和b权重值

[1 10] loss: 0.069
[1 20] loss: 0.069
[2 10] loss: 0.069
[2 20] loss: 0.069
[3 10] loss: 0.068
[3 20] loss: 0.067
[4 10] loss: 0.057
[4 20] loss: 0.068
[5 10] loss: 0.057
[5 20] loss: 0.046
[6 10] loss: 0.020
[6 20] loss: 0.011
[7 10] loss: 0.004
[7 20] loss: 0.004
[8 10] loss: 0.002
[8 20] loss: 0.002
[9 10] loss: 0.002
[9 20] loss: 0.001
[10 10] loss: 0.001
[10 20] loss: 0.000
finished train

4、模型测试

import matplotlib.pyplot as plt

# 读取模型
model_load = torch.load('model.pth')
# 读取一张图片 images[0]，测试
print("labels[0] truth:\t", labels[0])
x = images[0].unsqueeze(0)
predicted = torch.max(model_load(x), 1)
print("labels[0] predict:\t", predicted.indices)

img = images[0].data.squeeze().numpy()  # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.show()

labels[0] truth: tensor(0)
labels[0] predict: tensor([0])

老师给的代码 x = images[0] 会报错RuntimeError: Expected 4-dimensional input for 4-dimensional weight [9, 1, 3, 3], but got 3-dimensional input of size [1, 116, 116] instead，

原因是维度不匹配，要把输入图片改成和卷积核一样的维度4，x = images[0].unsqueeze(0)

5. 计算模型的准确率

# 读取模型
model_load = torch.load('model.pth')

correct = 0
total = 0
with torch.no_grad():  # 进行评测的时候网络不更新梯度
    for data in test_loader:  # 读取测试集
        images, labels = data
        outputs = model_load(images)
        _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
        total += labels.size(0)  # labels 的长度
        correct += (predicted == labels).sum().item()  # 预测正确的数目
print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))

Accuracy of the network on the test images: 99.333333 %

6、查看训练好的模型特征图

import torch.optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])

data_train = datasets.ImageFolder('train_data', transforms)
data_test = datasets.ImageFolder('test_data', transforms)

train_loader = DataLoader(data_train, batch_size=64, shuffle=True)
test_loader = DataLoader(data_test, batch_size=64, shuffle=True)
for i, data in enumerate(train_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break

for i, data in enumerate(test_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break

import torch.nn as nn


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)
        self.relu = nn.ReLU()

        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        outputs = []
        x = self.conv1(x)
        outputs.append(x)
        x = self.relu(x)
        outputs.append(x)
        x = self.pool(x)
        outputs.append(x)
        x = self.conv2(x)

        x = self.relu(x)

        x = self.pool(x)

        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return outputs





import matplotlib.pyplot as plt
import numpy as np
# 读取模型
model = torch.load('model.pth')
print(model)
x = images[0].unsqueeze(0)

# forward正向传播过程
out_put = model(x)
for feature_map in out_put:
    # [N, C, H, W] -> [C, H, W]    维度变换
    im = np.squeeze(feature_map.detach().numpy())
    print(im.shape)
    # [C, H, W] -> [H, W, C]
    im = np.transpose(im, [1, 2, 0])
    print(im.shape)

    # show 9 feature maps
    plt.figure()
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
        # [H, W, C]
        # 特征矩阵每一个channel对应的是一个二维的特征矩阵，就像灰度图像一样，channel=1
        # plt.imshow(im[:, :, i])
        plt.imshow(im[:, :, i], cmap='gray')
    plt.show()

CNN(
(conv1): Conv2d(1, 9, kernel_size=(3, 3), stride=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(9, 5, kernel_size=(3, 3), stride=(1, 1))
(relu): ReLU()
(fc1): Linear(in_features=3645, out_features=1200, bias=True)
(fc2): Linear(in_features=1200, out_features=64, bias=True)
(fc3): Linear(in_features=64, out_features=2, bias=True)
)
(9, 114, 114)
(114, 114, 9)
(9, 114, 114)
(114, 114, 9)
(9, 57, 57)
(57, 57, 9)

7. 查看训练好的模型的卷积核

import matplotlib.pyplot as plt

# 读取模型
model = torch.load('model.pth')
print(model)
x = images[0].unsqueeze(0)

# forward正向传播过程
out_put = model(x)
weights_keys = model.state_dict().keys()
for key in weights_keys:
    print("key :", key)
    # 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
    if key == "conv1.weight":
        weight_t = model.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, 0, :, :]  # 获取第一个卷积核的信息参数
        # show 9 kernel ,1 channel
        plt.figure()

        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
            plt.imshow(k[i, :, :], cmap='gray')
            title_name = 'kernel' + str(i) + ',channel1'
            plt.title(title_name)
        plt.show()

    if key == "conv2.weight":
        weight_t = model.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, :, :, :]  # 获取第一个卷积核的信息参数
        print(k.shape)
        print(k)

        plt.figure()
        for c in range(9):
            channel = k[:, c, :, :]
            for i in range(5):
                ax = plt.subplot(2, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
                plt.imshow(channel[i, :, :], cmap='gray')
                title_name = 'kernel' + str(i) + ',channel' + str(c)
                plt.title(title_name)
            plt.show()