目录
一、实现卷积-池化-激活
1. Numpy版本:手工实现 卷积-池化-激活
import numpy as np
x = np.array([[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]])
print("x=\n", x)
# 初始化 三个 卷积核
Kernel = [[0 for i in range(0, 3)] for j in range(0, 3)]
Kernel[0] = np.array([[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]])
Kernel[1] = np.array([[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]])
Kernel[2] = np.array([[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]])
# --------------- 卷积 ---------------
stride = 1 # 步长
feature_map_h = 7 # 特征图的高
feature_map_w = 7 # 特征图的宽
feature_map = [0 for i in range(0, 3)] # 初始化3个特征图
for i in range(0, 3):
feature_map[i] = np.zeros((feature_map_h, feature_map_w)) # 初始化特征图
for h in range(feature_map_h): # 向下滑动,得到卷积后的固定行
for w in range(feature_map_w): # 向右滑动,得到卷积后的固定行的列
v_start = h * stride # 滑动窗口的起始行(高)
v_end = v_start + 3 # 滑动窗口的结束行(高)
h_start = w * stride # 滑动窗口的起始列(宽)
h_end = h_start + 3 # 滑动窗口的结束列(宽)
window = x[v_start:v_end, h_start:h_end] # 从图切出一个滑动窗口
for i in range(0, 3):
feature_map[i][h, w] = np.divide(np.sum(np.multiply(window, Kernel[i][:, :])), 9)
print("feature_map:\n", np.around(feature_map, decimals=2))
# --------------- 池化 ---------------
pooling_stride = 2 # 步长
pooling_h = 4 # 特征图的高
pooling_w = 4 # 特征图的宽
feature_map_pad_0 = [[0 for i in range(0, 8)] for j in range(0, 8)]
for i in range(0, 3): # 特征图 补 0 ,行 列 都要加 1 (因为上一层是奇数,池化窗口用的偶数)
feature_map_pad_0[i] = np.pad(feature_map[i], ((0, 1), (0, 1)), 'constant', constant_values=(0, 0))
# print("feature_map_pad_0 0:\n", np.around(feature_map_pad_0[0], decimals=2))
pooling = [0 for i in range(0, 3)]
for i in range(0, 3):
pooling[i] = np.zeros((pooling_h, pooling_w)) # 初始化特征图
for h in range(pooling_h): # 向下滑动,得到卷积后的固定行
for w in range(pooling_w): # 向右滑动,得到卷积后的固定行的列
v_start = h * pooling_stride # 滑动窗口的起始行(高)
v_end = v_start + 2 # 滑动窗口的结束行(高)
h_start = w * pooling_stride # 滑动窗口的起始列(宽)
h_end = h_start + 2 # 滑动窗口的结束列(宽)
for i in range(0, 3):
pooling[i][h, w] = np.max(feature_map_pad_0[i][v_start:v_end, h_start:h_end])
print("pooling:\n", np.around(pooling[0], decimals=2))
print("pooling:\n", np.around(pooling[1], decimals=2))
print("pooling:\n", np.around(pooling[2], decimals=2))
# --------------- 激活 ---------------
def relu(x):
return (abs(x) + x) / 2
relu_map_h = 7 # 特征图的高
relu_map_w = 7 # 特征图的宽
relu_map = [0 for i in range(0, 3)] # 初始化3个特征图
for i in range(0, 3):
relu_map[i] = np.zeros((relu_map_h, relu_map_w)) # 初始化特征图
for i in range(0, 3):
relu_map[i] = relu(feature_map[i])
print("relu map :\n",np.around(relu_map[0], decimals=2))
print("relu map :\n",np.around(relu_map[1], decimals=2))
print("relu map :\n",np.around(relu_map[2], decimals=2))
运行结果:
x=
[[-1 -1 -1 -1 -1 -1 -1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 -1 -1 1 -1 -1 -1 -1]
[-1 -1 -1 1 -1 1 -1 -1 -1]
[-1 -1 1 -1 -1 -1 1 -1 -1]
[-1 1 -1 -1 -1 -1 -1 1 -1]
[-1 -1 -1 -1 -1 -1 -1 -1 -1]]
feature_map:
[[[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]]
[[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.11 0.33 -0.78 1. -0.78 0.33 -0.11]
[ 0.11 -0.56 0.56 -0.78 0.56 -0.56 0.11]
[-0.56 0.56 -0.56 0.33 -0.56 0.56 -0.56]
[ 0.33 -0.56 0.11 -0.11 0.11 -0.56 0.33]]
[[ 0.33 -0.11 0.56 0.33 0.11 -0.11 0.78]
[-0.11 0.11 -0.11 0.33 -0.11 1. -0.11]
[ 0.56 -0.11 0.11 -0.33 1. -0.11 0.11]
[ 0.33 0.33 -0.33 0.56 -0.33 0.33 0.33]
[ 0.11 -0.11 1. -0.33 0.11 -0.11 0.56]
[-0.11 1. -0.11 0.33 -0.11 0.11 -0.11]
[ 0.78 -0.11 0.11 0.33 0.56 -0.11 0.33]]]
pooling:
[[1. 0.33 0.56 0.33]
[0.33 1. 0.33 0.56]
[0.56 0.33 1. 0.11]
[0.33 0.56 0.11 0.78]]
pooling:
[[0.56 0.33 0.56 0.33]
[0.33 1. 0.56 0.11]
[0.56 0.56 0.56 0.11]
[0.33 0.11 0.11 0.33]]
pooling:
[[0.33 0.56 1. 0.78]
[0.56 0.56 1. 0.33]
[1. 1. 0.11 0.56]
[0.78 0.33 0.56 0.33]]
relu map :
[[0.78 0. 0.11 0.33 0.56 0. 0.33]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.33 0. 0.56 0.33 0.11 0. 0.78]]
relu map :
[[0.33 0. 0.11 0. 0.11 0. 0.33]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.33 0. 1. 0. 0.33 0. ]
[0.11 0. 0.56 0. 0.56 0. 0.11]
[0. 0.56 0. 0.33 0. 0.56 0. ]
[0.33 0. 0.11 0. 0.11 0. 0.33]]
relu map :
[[0.33 0. 0.56 0.33 0.11 0. 0.78]
[0. 0.11 0. 0.33 0. 1. 0. ]
[0.56 0. 0.11 0. 1. 0. 0.11]
[0.33 0.33 0. 0.56 0. 0.33 0.33]
[0.11 0. 1. 0. 0.11 0. 0.56]
[0. 1. 0. 0.33 0. 0.11 0. ]
[0.78 0. 0.11 0.33 0.56 0. 0.33]]
2.Pytorch版本:调用函数实现 卷积-池化-激活
import numpy as np
import torch
import torch.nn as nn
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
print("--------------- 卷积 ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]]
]])
conv2 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]]
]])
conv3 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]]
]])
feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)
print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)
print("--------------- 池化 ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2) # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1)) # pad 0 , Left Right Up Down
feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)
print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
print("--------------- 激活 ---------------")
activation_function = nn.ReLU()
feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
运行结果:
torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., -1., -1., 1., -1., -1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
--------------- 卷积 ---------------
tensor([[[[ 0.7624, -0.1265, 0.0958, 0.3180, 0.5402, -0.1265, 0.3180],
[-0.1265, 0.9847, -0.1265, 0.3180, -0.1265, 0.0958, -0.1265],
[ 0.0958, -0.1265, 0.9847, -0.3487, 0.0958, -0.1265, 0.5402],
[ 0.3180, 0.3180, -0.3487, 0.5402, -0.3487, 0.3180, 0.3180],
[ 0.5402, -0.1265, 0.0958, -0.3487, 0.9847, -0.1265, 0.0958],
[-0.1265, 0.0958, -0.1265, 0.3180, -0.1265, 0.9847, -0.1265],
[ 0.3180, -0.1265, 0.5402, 0.3180, 0.0958, -0.1265, 0.7624]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3578, -0.5310, 0.1356, -0.0866, 0.1356, -0.5310, 0.3578],
[-0.5310, 0.5801, -0.5310, 0.3578, -0.5310, 0.5801, -0.5310],
[ 0.1356, -0.5310, 0.5801, -0.7533, 0.5801, -0.5310, 0.1356],
[-0.0866, 0.3578, -0.7533, 1.0245, -0.7533, 0.3578, -0.0866],
[ 0.1356, -0.5310, 0.5801, -0.7533, 0.5801, -0.5310, 0.1356],
[-0.5310, 0.5801, -0.5310, 0.3578, -0.5310, 0.5801, -0.5310],
[ 0.3578, -0.5310, 0.1356, -0.0866, 0.1356, -0.5310, 0.3578]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3411, -0.1033, 0.5633, 0.3411, 0.1189, -0.1033, 0.7856],
[-0.1033, 0.1189, -0.1033, 0.3411, -0.1033, 1.0078, -0.1033],
[ 0.5633, -0.1033, 0.1189, -0.3256, 1.0078, -0.1033, 0.1189],
[ 0.3411, 0.3411, -0.3256, 0.5633, -0.3256, 0.3411, 0.3411],
[ 0.1189, -0.1033, 1.0078, -0.3256, 0.1189, -0.1033, 0.5633],
[-0.1033, 1.0078, -0.1033, 0.3411, -0.1033, 0.1189, -0.1033],
[ 0.7856, -0.1033, 0.1189, 0.3411, 0.5633, -0.1033, 0.3411]]]],
grad_fn=<DivBackward0>)
--------------- 池化 ---------------
torch.Size([1, 1, 4, 4])
tensor([[[[0.9847, 0.3180, 0.5402, 0.3180],
[0.3180, 0.9847, 0.3180, 0.5402],
[0.5402, 0.3180, 0.9847, 0.0958],
[0.3180, 0.5402, 0.0958, 0.7624]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5801, 0.3578, 0.5801, 0.3578],
[0.3578, 1.0245, 0.5801, 0.1356],
[0.5801, 0.5801, 0.5801, 0.1356],
[0.3578, 0.1356, 0.1356, 0.3578]]]], grad_fn=<DivBackward0>)
tensor([[[[0.3411, 0.5633, 1.0078, 0.7856],
[0.5633, 0.5633, 1.0078, 0.3411],
[1.0078, 1.0078, 0.1189, 0.5633],
[0.7856, 0.3411, 0.5633, 0.3411]]]], grad_fn=<DivBackward0>)
--------------- 激活 ---------------
tensor([[[[0.7624, 0.0000, 0.0958, 0.3180, 0.5402, 0.0000, 0.3180],
[0.0000, 0.9847, 0.0000, 0.3180, 0.0000, 0.0958, 0.0000],
[0.0958, 0.0000, 0.9847, 0.0000, 0.0958, 0.0000, 0.5402],
[0.3180, 0.3180, 0.0000, 0.5402, 0.0000, 0.3180, 0.3180],
[0.5402, 0.0000, 0.0958, 0.0000, 0.9847, 0.0000, 0.0958],
[0.0000, 0.0958, 0.0000, 0.3180, 0.0000, 0.9847, 0.0000],
[0.3180, 0.0000, 0.5402, 0.3180, 0.0958, 0.0000, 0.7624]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3578, 0.0000, 0.1356, 0.0000, 0.1356, 0.0000, 0.3578],
[0.0000, 0.5801, 0.0000, 0.3578, 0.0000, 0.5801, 0.0000],
[0.1356, 0.0000, 0.5801, 0.0000, 0.5801, 0.0000, 0.1356],
[0.0000, 0.3578, 0.0000, 1.0245, 0.0000, 0.3578, 0.0000],
[0.1356, 0.0000, 0.5801, 0.0000, 0.5801, 0.0000, 0.1356],
[0.0000, 0.5801, 0.0000, 0.3578, 0.0000, 0.5801, 0.0000],
[0.3578, 0.0000, 0.1356, 0.0000, 0.1356, 0.0000, 0.3578]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3411, 0.0000, 0.5633, 0.3411, 0.1189, 0.0000, 0.7856],
[0.0000, 0.1189, 0.0000, 0.3411, 0.0000, 1.0078, 0.0000],
[0.5633, 0.0000, 0.1189, 0.0000, 1.0078, 0.0000, 0.1189],
[0.3411, 0.3411, 0.0000, 0.5633, 0.0000, 0.3411, 0.3411],
[0.1189, 0.0000, 1.0078, 0.0000, 0.1189, 0.0000, 0.5633],
[0.0000, 1.0078, 0.0000, 0.3411, 0.0000, 0.1189, 0.0000],
[0.7856, 0.0000, 0.1189, 0.3411, 0.5633, 0.0000, 0.3411]]]],
grad_fn=<DivBackward0>)
3. 可视化:了解数字与图像之间的关系
可视化卷积核和特征图
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 #有中文出现的情况,需要u'内容
x = torch.tensor([[[[-1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, -1, -1, 1, -1, -1, -1, -1],
[-1, -1, -1, 1, -1, 1, -1, -1, -1],
[-1, -1, 1, -1, -1, -1, 1, -1, -1],
[-1, 1, -1, -1, -1, -1, -1, 1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1]]]], dtype=torch.float)
print(x.shape)
print(x)
img = x.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('原图')
plt.show()
print("--------------- 卷积 ---------------")
conv1 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv1.weight.data = torch.Tensor([[[[1, -1, -1],
[-1, 1, -1],
[-1, -1, 1]]
]])
img = conv1.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 1')
plt.show()
conv2 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv2.weight.data = torch.Tensor([[[[1, -1, 1],
[-1, 1, -1],
[1, -1, 1]]
]])
img = conv2.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 2')
plt.show()
conv3 = nn.Conv2d(1, 1, (3, 3), 1) # in_channel , out_channel , kennel_size , stride
conv3.weight.data = torch.Tensor([[[[-1, -1, 1],
[-1, 1, -1],
[1, -1, -1]]
]])
img = conv3.weight.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('Kernel 3')
plt.show()
feature_map1 = conv1(x)
feature_map2 = conv2(x)
feature_map3 = conv3(x)
print(feature_map1 / 9)
print(feature_map2 / 9)
print(feature_map3 / 9)
img = feature_map1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积后的特征图1')
plt.show()
print("--------------- 池化 ---------------")
max_pool = nn.MaxPool2d(2, padding=0, stride=2) # Pooling
zeroPad = nn.ZeroPad2d(padding=(0, 1, 0, 1)) # pad 0 , Left Right Up Down
feature_map_pad_0_1 = zeroPad(feature_map1)
feature_pool_1 = max_pool(feature_map_pad_0_1)
feature_map_pad_0_2 = zeroPad(feature_map2)
feature_pool_2 = max_pool(feature_map_pad_0_2)
feature_map_pad_0_3 = zeroPad(feature_map3)
feature_pool_3 = max_pool(feature_map_pad_0_3)
print(feature_pool_1.size())
print(feature_pool_1 / 9)
print(feature_pool_2 / 9)
print(feature_pool_3 / 9)
img = feature_pool_1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积池化后的特征图1')
plt.show()
print("--------------- 激活 ---------------")
activation_function = nn.ReLU()
feature_relu1 = activation_function(feature_map1)
feature_relu2 = activation_function(feature_map2)
feature_relu3 = activation_function(feature_map3)
print(feature_relu1 / 9)
print(feature_relu2 / 9)
print(feature_relu3 / 9)
img = feature_relu1.data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.title('卷积 + relu 后的特征图1')
plt.show()
运行结果:
卷积核
特征图:
torch.Size([1, 1, 9, 9])
tensor([[[[-1., -1., -1., -1., -1., -1., -1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., -1., -1., 1., -1., -1., -1., -1.],
[-1., -1., -1., 1., -1., 1., -1., -1., -1.],
[-1., -1., 1., -1., -1., -1., 1., -1., -1.],
[-1., 1., -1., -1., -1., -1., -1., 1., -1.],
[-1., -1., -1., -1., -1., -1., -1., -1., -1.]]]])
--------------- 卷积 ---------------
tensor([[[[ 0.7988, -0.0901, 0.1322, 0.3544, 0.5766, -0.0901, 0.3544],
[-0.0901, 1.0210, -0.0901, 0.3544, -0.0901, 0.1322, -0.0901],
[ 0.1322, -0.0901, 1.0210, -0.3123, 0.1322, -0.0901, 0.5766],
[ 0.3544, 0.3544, -0.3123, 0.5766, -0.3123, 0.3544, 0.3544],
[ 0.5766, -0.0901, 0.1322, -0.3123, 1.0210, -0.0901, 0.1322],
[-0.0901, 0.1322, -0.0901, 0.3544, -0.0901, 1.0210, -0.0901],
[ 0.3544, -0.0901, 0.5766, 0.3544, 0.1322, -0.0901, 0.7988]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.3513, -0.5376, 0.1291, -0.0931, 0.1291, -0.5376, 0.3513],
[-0.5376, 0.5735, -0.5376, 0.3513, -0.5376, 0.5735, -0.5376],
[ 0.1291, -0.5376, 0.5735, -0.7598, 0.5735, -0.5376, 0.1291],
[-0.0931, 0.3513, -0.7598, 1.0180, -0.7598, 0.3513, -0.0931],
[ 0.1291, -0.5376, 0.5735, -0.7598, 0.5735, -0.5376, 0.1291],
[-0.5376, 0.5735, -0.5376, 0.3513, -0.5376, 0.5735, -0.5376],
[ 0.3513, -0.5376, 0.1291, -0.0931, 0.1291, -0.5376, 0.3513]]]],
grad_fn=<DivBackward0>)
tensor([[[[ 0.2998, -0.1446, 0.5220, 0.2998, 0.0776, -0.1446, 0.7443],
[-0.1446, 0.0776, -0.1446, 0.2998, -0.1446, 0.9665, -0.1446],
[ 0.5220, -0.1446, 0.0776, -0.3669, 0.9665, -0.1446, 0.0776],
[ 0.2998, 0.2998, -0.3669, 0.5220, -0.3669, 0.2998, 0.2998],
[ 0.0776, -0.1446, 0.9665, -0.3669, 0.0776, -0.1446, 0.5220],
[-0.1446, 0.9665, -0.1446, 0.2998, -0.1446, 0.0776, -0.1446],
[ 0.7443, -0.1446, 0.0776, 0.2998, 0.5220, -0.1446, 0.2998]]]],
grad_fn=<DivBackward0>)
--------------- 池化 ---------------
torch.Size([1, 1, 4, 4])
tensor([[[[1.0210, 0.3544, 0.5766, 0.3544],
[0.3544, 1.0210, 0.3544, 0.5766],
[0.5766, 0.3544, 1.0210, 0.1322],
[0.3544, 0.5766, 0.1322, 0.7988]]]], grad_fn=<DivBackward0>)
tensor([[[[0.5735, 0.3513, 0.5735, 0.3513],
[0.3513, 1.0180, 0.5735, 0.1291],
[0.5735, 0.5735, 0.5735, 0.1291],
[0.3513, 0.1291, 0.1291, 0.3513]]]], grad_fn=<DivBackward0>)
tensor([[[[0.2998, 0.5220, 0.9665, 0.7443],
[0.5220, 0.5220, 0.9665, 0.2998],
[0.9665, 0.9665, 0.0776, 0.5220],
[0.7443, 0.2998, 0.5220, 0.2998]]]], grad_fn=<DivBackward0>)
--------------- 激活 ---------------
tensor([[[[0.7988, 0.0000, 0.1322, 0.3544, 0.5766, 0.0000, 0.3544],
[0.0000, 1.0210, 0.0000, 0.3544, 0.0000, 0.1322, 0.0000],
[0.1322, 0.0000, 1.0210, 0.0000, 0.1322, 0.0000, 0.5766],
[0.3544, 0.3544, 0.0000, 0.5766, 0.0000, 0.3544, 0.3544],
[0.5766, 0.0000, 0.1322, 0.0000, 1.0210, 0.0000, 0.1322],
[0.0000, 0.1322, 0.0000, 0.3544, 0.0000, 1.0210, 0.0000],
[0.3544, 0.0000, 0.5766, 0.3544, 0.1322, 0.0000, 0.7988]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.3513, 0.0000, 0.1291, 0.0000, 0.1291, 0.0000, 0.3513],
[0.0000, 0.5735, 0.0000, 0.3513, 0.0000, 0.5735, 0.0000],
[0.1291, 0.0000, 0.5735, 0.0000, 0.5735, 0.0000, 0.1291],
[0.0000, 0.3513, 0.0000, 1.0180, 0.0000, 0.3513, 0.0000],
[0.1291, 0.0000, 0.5735, 0.0000, 0.5735, 0.0000, 0.1291],
[0.0000, 0.5735, 0.0000, 0.3513, 0.0000, 0.5735, 0.0000],
[0.3513, 0.0000, 0.1291, 0.0000, 0.1291, 0.0000, 0.3513]]]],
grad_fn=<DivBackward0>)
tensor([[[[0.2998, 0.0000, 0.5220, 0.2998, 0.0776, 0.0000, 0.7443],
[0.0000, 0.0776, 0.0000, 0.2998, 0.0000, 0.9665, 0.0000],
[0.5220, 0.0000, 0.0776, 0.0000, 0.9665, 0.0000, 0.0776],
[0.2998, 0.2998, 0.0000, 0.5220, 0.0000, 0.2998, 0.2998],
[0.0776, 0.0000, 0.9665, 0.0000, 0.0776, 0.0000, 0.5220],
[0.0000, 0.9665, 0.0000, 0.2998, 0.0000, 0.0776, 0.0000],
[0.7443, 0.0000, 0.0776, 0.2998, 0.5220, 0.0000, 0.2998]]]],
grad_fn=<DivBackward0>)
二、 基于CNN的XO识别
1.数据集
共2000张图片,X、O各1000张。
从X、O文件夹,分别取出150张作为测试集。
文件夹train_data:放置训练集 1700张图片
文件夹test_data: 放置测试集 300张图片
2.构建模型
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3)
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200)
self.fc2 = nn.Linear(1200, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x):
x = self.maxpool(self.relu(self.conv1(x)))
x = self.maxpool(self.relu(self.conv2(x)))
x = x.view(-1, 27 * 27 * 5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
3.训练模型
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim
transforms = transforms.Compose([
transforms.ToTensor(), # 把图片进行归一化,并把数据转换成Tensor类型
transforms.Grayscale(1) # 把图片 转为灰度图
])
path = r'D:\Desktop\本学期(实验)作业\神经网络与深度学习\data\test_data'
path_test = r'D:\Desktop\本学期(实验)作业\神经网络与深度学习\data\train_data'
data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)
print("size of train_data:",len(data_train))
print("size of test_data:",len(data_test))
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
model = Net()
criterion = torch.nn.CrossEntropyLoss() # 损失函数 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.1) # 优化函数:随机梯度下降
epochs = 10
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(data_loader):
images, label = data
out = model(images)
loss = criterion(out, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
print('finished train')
# 保存模型
torch.save(model, 'model_name.pth') # 保存的是模型, 不止是w和b权重值
运行结果:
4. 测试训练好的模型
# 读取模型
model_load = torch.load('model_name.pth')
# 读取一张图片 images[0],测试
print("label[0] truth:\t", label[0])
x = images[0]
x = x.reshape([1,1,116,116])
predicted = torch.max(model_load(x), 1)
print("label[0] predict:\t", predicted.indices)
img = images[0].data.squeeze().numpy() # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.show()
运行结果:
5.计算模型的准确率
# 读取模型
model_load = Net()
model_load.load_state_dict(torch.load('model_name1.pth'))
correct = 0
total = 0
with torch.no_grad(): # 进行评测的时候网络不更新梯度
for data in data_loader_test: # 读取测试集
images, labels = data
outputs = model_load(images)
_, predicted = torch.max(outputs.data, 1) # 取出 最大值的索引 作为 分类结果
total += labels.size(0) # labels 的长度
correct += (predicted == labels).sum().item() # 预测正确的数目
print('Accuracy of the network on the test images: %f %%' % (100. * correct / total))
6.查看训练好的模型的特征图
# 看看每层的 卷积核 长相,特征图 长相
# 获取网络结构的特征矩阵并可视化
import torch
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
# 定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
transforms = transforms.Compose([
transforms.ToTensor(), # 把图片进行归一化,并把数据转换成Tensor类型
transforms.Grayscale(1) # 把图片 转为灰度图
])
path = r'D:\Desktop\本学期(实验)作业\神经网络与深度学习\data\train_data'
data_train = datasets.ImageFolder(path, transform=transforms)
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
images, labels = data
print(images.shape)
print(labels.shape)
break
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3) # in_channel , out_channel , kennel_size , stride
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3) # in_channel , out_channel , kennel_size , stride
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200) # full connect 1
self.fc2 = nn.Linear(1200, 64) # full connect 2
self.fc3 = nn.Linear(64, 2) # full connect 3
def forward(self, x):
outputs = []
x = self.conv1(x)
outputs.append(x)
x = self.relu(x)
outputs.append(x)
x = self.maxpool(x)
outputs.append(x)
x = self.conv2(x)
x = self.relu(x)
x = self.maxpool(x)
x = x.view(-1, 27 * 27 * 5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return outputs
# create model
model1 = Net()
# load model weights加载预训练权重
model_weight_path = "model_name1.pth"
model1.load_state_dict(torch.load(model_weight_path))
# 打印出模型的结构
print(model1)
x = images[0]
x = x.reshape([1, x.shape[0], x.shape[1], x.shape[2]])
# forward正向传播过程
out_put = model1(x)
for feature_map in out_put:
im = np.squeeze(feature_map.detach().numpy())
im = np.transpose(im, [1, 2, 0])
print(im.shape)
plt.figure()
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(im[:, :, i], cmap='gray')
plt.show()
运行结果:
7.查看训练好的模型的卷积核
# 看看每层的 卷积核 长相,特征图 长相
# 获取网络结构的特征矩阵并可视化
import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 #有中文出现的情况,需要u'内容
# 定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
transforms = transforms.Compose([
transforms.ToTensor(), # 把图片进行归一化,并把数据转换成Tensor类型
transforms.Grayscale(1) # 把图片 转为灰度图
])
path = r'training_data_sm'
data_train = datasets.ImageFolder(path, transform=transforms)
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
images, labels = data
# print(images.shape)
# print(labels.shape)
break
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3) # in_channel , out_channel , kennel_size , stride
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3) # in_channel , out_channel , kennel_size , stride
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200) # full connect 1
self.fc2 = nn.Linear(1200, 64) # full connect 2
self.fc3 = nn.Linear(64, 2) # full connect 3
def forward(self, x):
outputs = []
x = self.maxpool(self.relu(self.conv1(x)))
# outputs.append(x)
x = self.maxpool(self.relu(self.conv2(x)))
outputs.append(x)
x = x.view(-1, 27 * 27 * 5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return outputs
# create model
model1 = Net()
# load model weights加载预训练权重
model_weight_path = "model_name1.pth"
model1.load_state_dict(torch.load(model_weight_path))
x = images[0]
# forward正向传播过程
out_put = model1(x)
weights_keys = model1.state_dict().keys()
for key in weights_keys:
print("key :", key)
# 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
if key == "conv1.weight":
weight_t = model1.state_dict()[key].numpy()
print("weight_t.shape", weight_t.shape)
k = weight_t[:, 0, :, :] # 获取第一个卷积核的信息参数
# show 9 kernel ,1 channel
plt.figure()
for i in range(9):
ax = plt.subplot(3, 3, i + 1) # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
plt.imshow(k[i, :, :], cmap='gray')
title_name = 'kernel' + str(i) + ',channel1'
plt.title(title_name)
plt.show()
if key == "conv2.weight":
weight_t = model1.state_dict()[key].numpy()
print("weight_t.shape", weight_t.shape)
k = weight_t[:, :, :, :] # 获取第一个卷积核的信息参数
print(k.shape)
print(k)
plt.figure()
for c in range(9):
channel = k[:, c, :, :]
for i in range(5):
ax = plt.subplot(2, 3, i + 1) # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
plt.imshow(channel[i, :, :], cmap='gray')
title_name = 'kernel' + str(i) + ',channel' + str(c)
plt.title(title_name)
plt.show()
运行结果:
key : conv1.weight
weight_t.shape (9, 1, 3, 3)
key : conv1.bias
key : conv2.weight
weight_t.shape (5, 9, 3, 3)
(5, 9, 3, 3)
[[[[ 6.98458925e-02 5.90659641e-02 3.81753966e-02]
[-6.13637008e-02 1.34813562e-01 -7.33171105e-02]
[ 6.13309480e-02 -5.23449155e-04 4.80439179e-02]]
[[-4.00464647e-02 1.12880126e-01 -7.68649280e-02]
[ 7.87192211e-02 -2.36817487e-02 -5.60271740e-02]
[ 8.12683925e-02 2.73875427e-02 -9.82172340e-02]]
[[ 4.08881977e-02 5.25458008e-02 1.05747022e-03]
[ 1.61746703e-02 -1.88982282e-02 -8.98489915e-03]
[ 2.05815379e-02 6.91843778e-02 -5.07958978e-02]]
[[ 2.40698364e-02 1.23903854e-02 9.09792781e-02]
[-4.54008244e-02 1.94115769e-02 1.08469449e-01]
[-5.29113822e-02 5.41664548e-02 4.23667813e-03]]
[[-9.22499970e-02 -2.69414317e-02 5.68897910e-02]
[-9.98499840e-02 6.78706095e-02 -7.92549327e-02]
[-4.87312488e-02 -5.90884238e-02 7.44135678e-02]]
[[-8.56655538e-02 -8.28199610e-02 8.21587667e-02]
[ 2.01716665e-02 8.60504657e-02 -4.26697880e-02]
[-4.74872179e-02 1.12074219e-01 -5.09875007e-02]]
[[ 3.27787511e-02 -7.82772973e-02 1.08398296e-01]
[ 2.77021844e-02 3.72366756e-02 -6.03387356e-02]
[ 3.33355628e-02 -3.38297561e-02 1.04788266e-01]]
[[-5.93599454e-02 7.44077489e-02 1.03815481e-01]
[-1.66368708e-02 -9.41569544e-03 3.45407762e-02]
[ 3.65562551e-03 -4.04765494e-02 6.14924431e-02]]
[[ 1.00850463e-01 3.62970792e-02 1.14644319e-01]
[-9.65513289e-02 -2.99563049e-03 9.44290385e-02]
[-5.09380549e-02 8.94127786e-02 8.03329796e-02]]]
[[[ 2.16829151e-01 1.99093044e-01 5.70072345e-02]
[ 2.30329320e-01 4.84957658e-02 -2.16806829e-02]
[ 2.35382617e-01 1.77218691e-01 1.56592391e-02]]
[[-1.14416555e-01 -9.05001387e-02 3.45594026e-02]
[ 7.52960369e-02 4.63159829e-02 -1.27104968e-01]
[-6.35510609e-02 9.67403129e-02 -2.61216182e-02]]
[[ 5.76992482e-02 -4.30453755e-03 -4.52346914e-02]
[-9.95732378e-03 9.97230113e-02 -7.94924702e-03]
[ 2.94310488e-02 -6.90363571e-02 -6.99906722e-02]]
[[ 1.05409855e-02 1.29709765e-02 3.56001705e-02]
[-1.83131136e-02 1.66197456e-02 -1.48420064e-02]
[ 7.52180666e-02 -7.81482905e-02 -9.23441350e-03]]
[[-9.07110646e-02 -2.30383456e-01 -2.82495804e-02]
[-1.90353051e-01 -4.77160737e-02 5.00900671e-02]
[-1.54941514e-01 -1.49308849e-04 2.56049205e-02]]
[[ 4.10602428e-03 -9.44532547e-03 5.52874431e-02]
[ 3.63270789e-02 2.35939752e-02 -6.39206991e-02]
[ 1.41671211e-01 7.78410360e-02 -8.86475742e-02]]
[[-4.53491695e-02 1.07495286e-01 5.71262278e-02]
[-1.80877224e-02 1.15991592e-01 -1.03785552e-01]
[-6.88361079e-02 3.28751318e-02 3.43425907e-02]]
[[ 1.12147458e-01 8.87614563e-02 -6.72129467e-02]
[-3.03914417e-02 -5.64941065e-03 -8.04529339e-03]
[ 9.83864889e-02 1.11261480e-01 -9.43764746e-02]]
[[ 1.46605387e-01 1.30711570e-01 3.23899859e-03]
[ 8.96535888e-02 2.28065569e-02 -1.84486173e-02]
[ 4.97324243e-02 1.14006259e-01 -4.68446128e-02]]]
[[[ 7.06592649e-02 1.04841381e-01 -8.21368843e-02]
[ 7.56961759e-03 3.07901427e-02 1.26209920e-02]
[-4.03140374e-02 1.02723790e-02 -3.61617506e-02]]
[[-1.27743632e-01 7.13284835e-02 -1.00524239e-02]
[ 8.56978819e-03 -8.83464962e-02 -1.46895722e-01]
[-9.48935673e-02 -3.92020792e-02 -5.77922463e-02]]
[[ 6.93405345e-02 -6.33155406e-02 -1.07146412e-01]
[-2.40081158e-02 1.51393469e-02 -8.50542560e-02]
[ 1.03771903e-01 3.33332270e-02 -3.12288590e-02]]
[[ 8.57113898e-02 -5.51949926e-02 1.79728121e-02]
[ 6.85685426e-02 7.82678053e-02 -2.39495300e-02]
[-5.95061481e-02 -1.02192089e-01 -9.46415141e-02]]
[[-1.26110375e-01 -2.46208310e-02 -1.84309453e-01]
[ 1.56330317e-02 -1.09103754e-01 7.10281078e-03]
[ 2.81431363e-03 2.14111097e-02 -6.03581294e-02]]
[[-3.36449742e-02 -1.04325727e-01 4.81084827e-03]
[ 3.85972895e-02 -1.06708854e-01 1.05884507e-01]
[-5.22216298e-02 -1.23345144e-02 1.25243125e-04]]
[[ 6.98298663e-02 -7.50838518e-02 2.16020737e-02]
[ 2.92224884e-02 -2.44561583e-03 9.43801999e-02]
[-2.86634099e-02 -3.26344781e-02 2.32272912e-02]]
[[ 1.64350141e-02 4.88247387e-02 6.04826063e-02]
[-2.23416858e-03 2.94138119e-02 7.57313818e-02]
[-7.45175313e-03 -8.91718641e-02 1.07973032e-01]]
[[-5.71223162e-02 -1.00419804e-01 -9.37241390e-02]
[-6.41234219e-02 1.57135073e-02 -1.89927071e-02]
[ 3.19132023e-02 3.52603830e-02 -5.65539226e-02]]]
[[[-7.09892437e-02 -7.83143938e-02 -6.47149161e-02]
[-3.67178880e-02 -7.28780478e-02 4.98269778e-03]
[-8.59382451e-02 7.90102500e-03 8.22086632e-02]]
[[-9.37657654e-02 2.11495031e-02 -9.26158875e-02]
[-4.10797484e-02 -3.01056765e-02 -1.08754538e-01]
[-1.06991671e-01 -9.82748047e-02 -1.05811022e-01]]
[[ 1.04221590e-01 6.28329888e-02 -3.75137329e-02]
[ 3.30754891e-02 3.89316007e-02 -7.92906582e-02]
[ 7.55222887e-03 5.62973954e-02 -1.11638568e-02]]
[[ 8.80549848e-03 -6.03972003e-02 4.41536307e-03]
[-3.37611958e-02 1.01789005e-01 -7.89848119e-02]
[-9.12546590e-02 1.03225388e-01 -9.06298384e-02]]
[[-1.10043190e-01 -2.76146475e-02 -5.13514876e-02]
[-5.09044081e-02 1.09844722e-01 4.63054404e-02]
[ 1.08196847e-01 -9.77096856e-02 -6.03941232e-02]]
[[ 5.21519445e-02 -9.87846181e-02 -8.58242884e-02]
[ 7.53185526e-02 -1.06183343e-01 -2.14587171e-02]
[-1.01366051e-01 1.07559510e-01 -7.94448555e-02]]
[[ 6.74566850e-02 9.86777246e-04 5.59085682e-02]
[ 2.23980322e-02 2.54910961e-02 -9.46119949e-02]
[ 1.42240748e-02 8.70785415e-02 1.08000107e-01]]
[[ 3.61804152e-03 -4.88866568e-02 -6.17030747e-02]
[-9.81834158e-02 -3.27483080e-02 8.97164792e-02]
[-8.79103914e-02 4.35663499e-02 -5.99438958e-02]]
[[-3.83332074e-02 9.47101191e-02 -1.09892726e-01]
[-7.83142447e-03 -8.67927223e-02 6.75773546e-02]
[-1.04694366e-02 -5.16315363e-02 9.21809003e-02]]]
[[[ 1.57988280e-01 3.60716939e-01 2.89625138e-01]
[ 3.33296895e-01 3.04307610e-01 2.04062670e-01]
[ 3.17273408e-01 2.26779416e-01 2.79705107e-01]]
[[ 1.01867160e-02 -7.60785118e-02 6.69518635e-02]
[ 5.54578081e-02 2.87681669e-02 -4.60735569e-03]
[ 1.03337668e-01 -1.09475963e-02 -3.04092001e-02]]
[[-9.26796421e-02 -4.36972715e-02 2.43944284e-02]
[ 5.30454889e-02 2.84075048e-02 1.89190935e-02]
[ 8.29062089e-02 -4.56020273e-02 -4.39854078e-02]]
[[-7.51409829e-02 7.07833990e-02 -2.27690194e-04]
[ 5.10067791e-02 2.99685933e-02 -6.13842085e-02]
[-6.48679212e-03 1.06581427e-01 4.64117266e-02]]
[[-8.37772042e-02 -1.55725271e-01 -1.20179079e-01]
[ 1.86880562e-03 1.55261280e-02 -1.57763153e-01]
[-4.79777083e-02 2.06353310e-02 -1.45830363e-01]]
[[ 2.58288588e-02 2.36822367e-02 5.96350171e-02]
[-2.38394365e-02 2.01987803e-01 7.84047414e-03]
[ 6.91130161e-02 1.77542195e-01 7.41390362e-02]]
[[-3.30275223e-02 9.67713445e-02 -8.22414979e-02]
[-2.45989729e-02 1.26603991e-01 -5.13266437e-02]
[ 1.19841211e-01 8.46858248e-02 -3.77128050e-02]]
[[ 1.54069349e-01 1.45480275e-01 9.25109535e-03]
[ 1.04882568e-01 1.83798082e-03 6.50073215e-02]
[ 6.25121519e-02 1.17242105e-01 -2.45001912e-03]]
[[ 1.30424738e-01 1.92920014e-01 5.10507822e-03]
[ 7.14448616e-02 1.48681432e-01 -2.15764381e-02]
[ 1.92923009e-01 1.89543799e-01 -3.10594495e-02]]]]
key : conv2.bias
key : fc1.weight
key : fc1.bias
key : fc2.weight
key : fc2.bias
key : fc3.weight
key : fc3.bias
8.训练模型源代码
import torch
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
transforms = transforms.Compose([
transforms.ToTensor(), # 把图片进行归一化,并把数据转换成Tensor类型
transforms.Grayscale(1) # 把图片 转为灰度图
])
path = r'train_data'
path_test = r'test_data'
data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)
print("size of train_data:",len(data_train))
print("size of test_data:",len(data_test))
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
images, labels = data
print(images.shape)
print(labels.shape)
break
for i, data in enumerate(data_loader_test):
images, labels = data
print(images.shape)
print(labels.shape)
break
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3) # in_channel , out_channel , kennel_size , stride
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3) # in_channel , out_channel , kennel_size , stride
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200) # full connect 1
self.fc2 = nn.Linear(1200, 64) # full connect 2
self.fc3 = nn.Linear(64, 2) # full connect 3
def forward(self, x):
x = self.maxpool(self.relu(self.conv1(x)))
x = self.maxpool(self.relu(self.conv2(x)))
x = x.view(-1, 27 * 27 * 5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Net()
criterion = torch.nn.CrossEntropyLoss() # 损失函数 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.1) # 优化函数:随机梯度下降
epochs = 10
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(data_loader):
images, label = data
out = model(images)
loss = criterion(out, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
print('finished train')
# 保存模型 torch.save(model.state_dict(), model_path)
torch.save(model.state_dict(), 'model_name1.pth') # 保存的是模型, 不止是w和b权重值
# 读取模型
model = torch.load('model_name1.pth')
9.测试模型源代码
# https://blog.csdn.net/qq_53345829/article/details/124308515
import torch
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
transforms = transforms.Compose([
transforms.ToTensor(), # 把图片进行归一化,并把数据转换成Tensor类型
transforms.Grayscale(1) # 把图片 转为灰度图
])
path = r'train_data'
path_test = r'test_data'
data_train = datasets.ImageFolder(path, transform=transforms)
data_test = datasets.ImageFolder(path_test, transform=transforms)
print("size of train_data:", len(data_train))
print("size of test_data:", len(data_test))
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)
print(len(data_loader))
print(len(data_loader_test))
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 9, 3) # in_channel , out_channel , kennel_size , stride
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(9, 5, 3) # in_channel , out_channel , kennel_size , stride
self.relu = nn.ReLU()
self.fc1 = nn.Linear(27 * 27 * 5, 1200) # full connect 1
self.fc2 = nn.Linear(1200, 64) # full connect 2
self.fc3 = nn.Linear(64, 2) # full connect 3
def forward(self, x):
x = self.maxpool(self.relu(self.conv1(x)))
x = self.maxpool(self.relu(self.conv2(x)))
x = x.view(-1, 27 * 27 * 5)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
# 读取模型
model = Net()
model.load_state_dict(torch.load('model_name1.pth', map_location='cpu')) # 导入网络的参数
# model_load = torch.load('model_name1.pth')
# https://blog.csdn.net/qq_41360787/article/details/104332706
correct = 0
total = 0
with torch.no_grad(): # 进行评测的时候网络不更新梯度
for data in data_loader_test: # 读取测试集
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1) # 取出 最大值的索引 作为 分类结果
total += labels.size(0) # labels 的长度
correct += (predicted == labels).sum().item() # 预测正确的数目
print('Accuracy of the network on the test images: %f %%' % (100. * correct / total))
# "_," 的解释 https://blog.csdn.net/weixin_48249563/article/details/111387501
总结
本次实验实现了图片数据集的识别,自己区分了训练集与测试集,自己完整进行了测试过程,并进行了可视化,能够清晰看出图片卷积后的变化,卷积是课程重点,应该花更多时间进行学习。