NNDL 作业7 基于CNN的XO识别

最新推荐文章于 2024-09-13 17:56:14 发布

02（网络界泥石流）

最新推荐文章于 2024-09-13 17:56:14 发布

阅读量36

点赞数 1

文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/m0_63591032/article/details/134363242

版权

一、用自己的语言解释以下概念

1.局部感知、权值共享

局部感知是指在神经网络中，每个神经元只关注周边一小部分的输入数据，而不是整个输入。这种局部观察的方式帮助神经元捕捉局部的模式和特征。

权值共享是指在卷积神经网络中，多个神经元共享相同的权重参数。这样做可以大大减少网络的参数量，提高计算效率，并且有助于网络更好地学习和泛化能力。

2.池化（子采样、降采样、汇聚）。会带来那些好处和坏处？

池化是一种操作，在神经网络中常用于减小输入数据的尺寸。子采样、降采样和汇聚是池化的三种方式。池化操作的好处是减少计算量、减小资源占用和参数数量，并且提高模型的平移不变性和对位置的容忍度。然而，池化也可能会导致信息的丢失，例如丢失细节和位置精确性。

3.全卷积网络（课上讲的这个概念不准确，同学们查资料纠正一下）

全卷积网络是一种神经网络结构，通常用于图像语义分割任务。与传统的卷积神经网络不同，全卷积网络只使用卷积层和转置卷积层来处理数据，没有全连接层。这种架构允许网络接受任意尺寸的输入，并能够在像素级别对图像进行分类，从而实现图像的分割。

4.低级特征、中级特征、高级特征

低级特征、中级特征和高级特征是在神经网络中提取的不同层次的特征表示。低级特征包括一些基本的视觉特征，比如边缘、纹理等；中级特征相对抽象一些，可以表示一些形状、结构和局部的特征；高级特征更加语义化，能够表示更复杂的概念，如物体、场景等。

5.多通道。N输入，M输出是如何实现的？

多通道是指神经网络中输入和输出的数据具有多个通道。例如，如果有N个输入通道和M个输出通道，可以使用大小为N×M的卷积核来实现。多通道的设计可以帮助提取更丰富的特征信息，并增加模型的表达能力和性能。

6.1×1的卷积核有什么作用

1×1的卷积核在卷积神经网络中起到多个作用。首先，它可以用来调整特征的通道数，即降维或升维操作。其次，1×1卷积核可以进行特征的非线性组合和调整特征权重。此外，它还可以用来控制网络的复杂度和减少参数数量。在实践中，1×1卷积核常常用于深度残差网络和瓶颈结构，以提高网络的表示能力和效率。

二、使用CNN进行XO识别

1.复现参考资料中的代码

复现代码

from torch.utils.data import DataLoader
from torchvision import transforms, datasets
# 可视化数据集
import matplotlib.pyplot as plt
import torch.nn as nn
import torch
import matplotlib.pyplot as plt

# 建立数据集
transforms = transforms.Compose([  # 创建一个转换的组合,为了将其转化为可用于深度学习的数据格式
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图,参数1表示转换过程中保持原图的大小不变。
])

data_train = datasets.ImageFolder('train', transforms)
data_test = datasets.ImageFolder('test', transforms)
# 使用ImageFolder函数来创建一个数据集，该数据集从'training_data'文件夹中读取图像，并且应用前面定义的转换。ImageFolder函数会根据图像的文件名自动分类。
train_loader = DataLoader(data_train, batch_size=64, shuffle=True)
test_loader = DataLoader(data_test, batch_size=64, shuffle=True)
for i, data in enumerate(train_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break

for i, data in enumerate(test_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break


#可视化前五个标签是0的（圆）
'''a = 0
plt.figure()
index = 0
for i in labels:
    if i == 0 and a < 5:
        plt.subplot(151 + a)
        plt.imshow(images[index].data.squeeze().numpy(), cmap='gray')
        plt.title('circle ' + str(a + 1))
        a += 1
    if a == 5:
        break
    index += 1
plt.show()
#可视化前五个标签是1的（叉）
a = 0
plt.figure()
index = 0
for i in labels:
    if i == 1 and a < 5:
        plt.subplot(151 + a)
        plt.imshow(images[index].data.squeeze().numpy(), cmap='gray')
        plt.title('crosses ' + str(a + 1))
        a += 1
    if a == 5:
        break
    index += 1
plt.show()
'''


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.maxpool = nn.MaxPool2d(2, 2)  # 2x2的最大池化层
        self.conv2 = nn.Conv2d(9, 5, 3)

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# 模型训练
model = CNN()
loss = nn.CrossEntropyLoss()
opti = torch.optim.SGD(model.parameters(), lr=0.1)

epochs = 10

for epoch in range(epochs):
    total_loss = 0
    for i, data in enumerate(train_loader):
        images, labels = data
        out = model(images)
        one_loss = loss(out, labels)
        opti.zero_grad()
        one_loss.backward()  # 损失函数对模型参数的偏导数。这些梯度会存储在每个参数的.grad属性中。
        # 从输出节点开始，依次向输入节点回溯，对于每一个节点，它会计算这个节点的梯度（对于损失的偏导数），并将结果存储在每个参数的.grad属性中。
        opti.step()  # 使用前面计算的梯度来更新模型的权重。
        total_loss += one_loss
        if (i + 1) % 10 == 0:
            print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, total_loss / 100))
            total_loss = 0.0

print('finished train')

# 保存模型
torch.save(model, 'model.pth')  # 保存的是模型， 不止是w和b权重值
torch.save(model.state_dict(), 'model_name1.pth')  # 保存的是w和b权重值



# 模型测试


# 读取模型
model_load = torch.load('model.pth')
# 读取一张图片 images[0]，测试
print("labels[0] truth:\t", labels[0])
x = images[0].unsqueeze(0)
predicted = torch.max(model_load(x), 1)  # 第二个参数1指定在哪个维度上查找最大值,获取张量中每个元素的最大值，并返回一个包含最大值的张量和对应的索引。
print("labels[0] predict:\t", predicted.indices)

img = images[0].data.squeeze().numpy()  # 将输出转换为图片的格式
plt.imshow(img, cmap='gray')
plt.show()

# 计算模型的准确率
# 读取模型
model_load = torch.load('model.pth')

correct = 0
total = 0
with torch.no_grad():  # 进行评测的时候网络不更新梯度
    for data in test_loader:  # 读取测试集
        images, labels = data
        outputs = model_load(images)
        _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
        total += labels.size(0)  # labels 的长度
        correct += (predicted == labels).sum().item()  # 预测正确的数目
print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))

# 看看每层的 卷积核 长相，特征图 长相
# 获取网络结构的特征矩阵并可视化
import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader

#  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)

transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(1)  # 把图片 转为灰度图
])
path = r'train'
data_train = datasets.ImageFolder(path, transform=transforms)
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
for i, data in enumerate(data_loader):
    images, labels = data
    print(images.shape)
    print(labels.shape)
    break


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
        self.fc2 = nn.Linear(1200, 64)  # full connect 2
        self.fc3 = nn.Linear(64, 2)  # full connect 3

    def forward(self, x):
        outputs = []
        x = self.conv1(x)
        outputs.append(x)
        x = self.relu(x)
        outputs.append(x)
        x = self.maxpool(x)
        outputs.append(x)
        x = self.conv2(x)

        x = self.relu(x)

        x = self.maxpool(x)

        x = x.view(-1, 27 * 27 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return outputs


# create model
model1 = Net()

# load model weights加载预训练权重
# model_weight_path ="./AlexNet.pth"
model_weight_path = "model_name1.pth"
model1.load_state_dict(torch.load(model_weight_path))

# 打印出模型的结构
print(model1)

x = images[0]

# forward正向传播过程
out_put = model1(x)

for feature_map in out_put:
    # [N, C, H, W] -> [C, H, W]    维度变换
    im = np.squeeze(feature_map.detach().numpy())
    # [C, H, W] -> [H, W, C]
    im = np.transpose(im, [1, 2, 0])
    print(im.shape)

    # show 9 feature maps
    plt.figure()
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
        # [H, W, C]
        # 特征矩阵每一个channel对应的是一个二维的特征矩阵，就像灰度图像一样，channel=1
        # plt.imshow(im[:, :, i])
        plt.imshow(im[:, :, i], cmap='gray')
    plt.show()

# 查看训练好的模型的卷积核


# 读取模型
model = torch.load('model.pth')
print(model)
x = images[0].unsqueeze(0)

# forward正向传播过程
out_put = model(x)
weights_keys = model.state_dict().keys()
for key in weights_keys:
    print("key :", key)
    # 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
    if key == "conv1.weight":
        weight_t = model.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, 0, :, :]  # 获取第一个卷积核的信息参数
        # show 9 kernel ,1 channel
        plt.figure()

        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
            plt.imshow(k[i, :, :], cmap='gray')
            title_name = 'kernel' + str(i) + ',channel1'
            plt.title(title_name)
        plt.show()

    if key == "conv2.weight":
        weight_t = model.state_dict()[key].numpy()
        print("weight_t.shape", weight_t.shape)
        k = weight_t[:, :, :, :]  # 获取第一个卷积核的信息参数
        print(k.shape)
        print(k)

        plt.figure()
        for c in range(9):
            channel = k[:, c, :, :]
            for i in range(5):
                ax = plt.subplot(2, 3, i + 1)  # 参数意义：3：图片绘制行数，5：绘制图片列数，i+1：图的索引
                plt.imshow(channel[i, :, :], cmap='gray')
                title_name = 'kernel' + str(i) + ',channel' + str(c)
                plt.title(title_name)
            plt.show()

感谢这个世界上所有的前人栽树，后人乘凉。

言归正传，解析一下代码流程

代码的详细流程：

导入必要的库：torch.utils.data、transforms、datasets、nn和matplotlib.pyplot。
创建数据集的数据转换操作，将图像转换为深度学习可用的格式，使用ImageFolder函数创建一个数据训练集和测试集，该函数会自动根据图像的文件名进行分类，创建数据加载器，用于批量加载和处理数据，使用数据加载器遍历训练集和测试集，打印出每个批次的图像和标签的形状。学习网址pytorch加载自己的图片数据集的两种方法_pytorch中加载自建数据集,数据集组织形式如imagenet,每一个类一个文件夹__-周-_的博客-CSDN博客
定义一个卷积神经网络模型，该模型包括卷积层、最大池化层、ReLU激活函数和全连接层。
定义模型的前向传播方法，该方法根据输入计算模型的输出。
创建模型实例。定义损失函数和优化器，使用交叉熵损失函数和随机梯度下降优化器，进行模型训练，使用训练数据集进行多个epochs的训练。在每个epoch中，遍历训练数据集的每个批次，计算模型的输出和损失，并更新模型的权重。
保存模型，分别保存整个模型和模型的权重。
加载模型，用于模型测试。使用加载的模型预测一张图片的标签，并可视化图片，计算模型在测试集上的准确率。
可视化模型的特征图和卷积核。

在这个卷积神经网络中，使用了卷积层、最大池化层、ReLU激活函数和全连接层。卷积层用于提取图像的特征，最大池化层用于降低特征图的空间维度，ReLU激活函数用于引入非线性，全连接层用于将特征映射到类别。通过训练模型，可以使其学习到适合分类任务的特征提取方式，并用于对新的图像进行分类。

数据集

模型训练

计算模型准确率

查看训练好的卷积核

2.重新设计网络结构

至少增加一个卷积层，卷积层达到三层以上

class ModifiedCNN(nn.Module):
    def __init__(self):
        super(ModifiedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.conv2 = nn.Conv2d(9, 16, 3)  # 添加一层卷积层
        self.conv3 = nn.Conv2d(16, 32, 3)  # 再添加一层卷积层
        self.conv4 = nn.Conv2d(32, 64, 3)  # 再添加一层卷积层

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(64 * 22 * 22, 1200)  # 根据新增的卷积层计算全连接层输入尺寸
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = x.view(-1, 64 * 22 * 22)  # 根据新增的卷积层计算展开后的尺寸
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

去掉池化层，对比“有无池化”的效果

class ModifiedCNN(nn.Module):
    def __init__(self):
        super(ModifiedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.conv2 = nn.Conv2d(9, 5, 3)
        
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(23 * 23 * 5, 1200)  # 去掉池化层后的输入尺寸
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = x.view(-1, 23 * 23 * 5)  # 去掉池化层后的展开尺寸
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

修改“通道数”等超参数，观察变化

class ModifiedCNN(nn.Module):
    def __init__(self):
        super(ModifiedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3)  # 修改通道数为16
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3)  # 修改通道数为32

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(12 * 12 * 32, 1200)  # 根据修改后的卷积层计算全连接层输入尺寸
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 12 * 12 * 32)  # 根据修改后的卷积层计算展开后的尺寸
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

3.可视化

选择自己的最优模型
可视化部分卷积核和特征图
探索低级特征、中级特征、高级特征

确保你的权重文件路径一致

import torch
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms, datasets
import torch.nn as nn
from torch.utils.data import DataLoader

# 定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
data_transforms = transforms.Compose([
    transforms.ToTensor(),  # 把图片进行归一化，并把数据转换成Tensor类型
    transforms.Grayscale(num_output_channels=1)  # 把图片转为灰度图
])

path = 'train'
data_train = datasets.ImageFolder(path, transform=data_transforms)
data_loader = DataLoader(data_train, batch_size=1, shuffle=True)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1, 9, 3)
        self.maxpool = nn.MaxPool2d(2, 2)  # 2x2的最大池化层
        self.conv2 = nn.Conv2d(9, 5, 3)
        self.conv3 = nn.Conv2d(5, 5, 3)

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(5 * 5 * 5, 480)  # 5*5*5是根据之前的卷积层输出维度来确定的
        self.fc2 = nn.Linear(480, 320)
        self.fc3 = nn.Linear(320, 2)

    def forward(self, x):
        conv1_output = self.maxpool(self.relu(self.conv1(x)))
        conv2_output = self.maxpool(self.relu(self.conv2(conv1_output)))
        conv3_output = self.maxpool(self.relu(self.conv3(conv2_output)))
        return conv1_output, conv2_output, conv3_output


# create model
model = Net()

# load model weights加载预训练权重
model_weight_path = "model.pth"
model.load_state_dict(torch.load(model_weight_path))

# 打印出模型的结构
print(model)

# 获取测试数据
data_iterator = iter(data_loader)
images, labels = data_iterator.next()

# forward正向传播过程
conv1_output, conv2_output, conv3_output = model(images)

# 展示特征图
for feature_map in [conv1_output, conv2_output, conv3_output]:
    im = np.squeeze(feature_map.detach().numpy())  # [N, C, H, W] -> [C, H, W]
    im = np.transpose(im, [1, 2, 0])  # [C, H, W] -> [H, W, C]

    plt.figure()
    for i in range(im.shape[2]):
        ax = plt.subplot(1, im.shape[2], i + 1)  # 参数意义：1：图片绘制行数，im.shape[2]：绘制图片列数，i+1：图的索引
        plt.imshow(im[:, :, i], cmap='gray')
    plt.show()