NNDL 作业6:基于CNN的XO识别

本文详细介绍了如何使用numpy和PyTorch手动实现卷积、池化和激活操作,并通过可视化展示卷积核和特征图。接着,构建了一个基于CNN的XO棋识别模型,包括数据预处理、模型构建、训练、测试和模型评估。通过观察训练好的模型的卷积核,理解其对图像特征的强化和抑制作用。最后,展示了模型在不同层的特征图,揭示了卷积层如何逐步提取图像特征。
摘要由CSDN通过智能技术生成

一、实现卷积-池化-激活

1. Numpy版本:手工实现 卷积-池化-激活

自定义卷积算子、池化算子实现

#coding:utf-8
import numpy as np
from matplotlib import pyplot as plt

img=np.array([[0,0,0,0,0,0,0,0,0],
                [0,225,0,0,0,0,0,225,0],
                [0,0,225,0,0,0,225,0,0],
                [0,0,0,225,0,225,0,0,0],
                [0,0,0,0,225,0,0,0,0],
                [0,0,0,225,0,225,0,0,0],
                [0,0,225,0,0,0,225,0,0],
                [0,225,0,0,0,0,0,225,0],
                [0,0,0,0,0,0,0,0,0]
                               ])

kernel1=np.array([[1,-1,-1],
                  [-1,1,-1],
                  [-1,-1,1]])

kernel2=np.array([[1,-1,1],
                  [-1,1,-1],
                  [1,-1,1]])

kernel3=np.array([[-1,-1,1],
                  [-1,1,-1] ,
                  [1,-1,-1]])

def rgb_to_gray(rgb):
    r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray


def adjust(img):
    '''将像素值大于255的记为255,小于0的记为0'''
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            if img[i][j] > 255:
                img[i][j] = 255
            elif img[i][j] < 0:
                img[i][j] = 0
            else:
                continue
    return img

def myConv(img,kernel):
    k1=int((kernel.shape[0]-1)/2)
    k2 = int((kernel.shape[1] -1) / 2)
    out_img=[]
    for i in range(k1,img.shape[0]-k1):
        for j in range(k2,img.shape[1]-k2):
            sight=img[i-1:i+2,j-1:j+2]
            out_img.append(np.sum(np.multiply(np.array(sight),np.array(kernel))))
    return np.array(out_img).reshape((img.shape[0]-2*k1,img.shape[1]-2*k2))

def show_img(img):
    plt.figure()
    plt.imshow(out_img, cmap='gray')
    plt.show()

if __name__=='__main__':
    labels=['kernel1','kernel2','kernel3']
    plt.figure()
    n=1
    for i ,kernel in enumerate([kernel1,kernel2,kernel3]):
        plt.subplot(3,3,n)
        plt.imshow(img,cmap='gray')
        plt.title('original')
        n+=1
        plt.subplot(3,3,n)
        plt.imshow(kernel,cmap='gray')
        plt.title(labels[i])
        n+=1
        out_img=myConv(img,kernel=kernel)
        plt.subplot(3, 3, n)
        plt.imshow(out_img, cmap='gray')
        plt.title('output_img')
        n+=1
    plt.show()


在这里插入图片描述

2. Pytorch版本:调用函数实现 卷积-池化-激活

调用框架自带算子实现,对比自定义算子

#coding:utf-8
import numpy as np
import torch
from matplotlib import pyplot as plt

img=np.array([[0,0,0,0,0,0,0,0,0],
                [0,225,0,0,0,0,0,225,0],
                [0,0,225,0,0,0,225,0,0],
                [0,0,0,225,0,225,0,0,0],
                [0,0,0,0,225,0,0,0,0],
                [0,0,0,225,0,225,0,0,0],
                [0,0,225,0,0,0,225,0,0],
                [0,225,0,0,0,0,0,225,0],
                [0,0,0,0,0,0,0,0,0]
                               ])

kernel1=np.array([[1,-1,-1],
                  [-1,1,-1],
                  [-1,-1,1]])

kernel2=np.array([[1,-1,1],
                  [-1,1,-1],
                  [1,-1,1]])

kernel3=np.array([[-1,-1,1],
                  [-1,1,-1] ,
                  [1,-1,-1]])

def rgb_to_gray(rgb):
    r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray


def adjust(img):
    '''将像素值大于255的记为255,小于0的记为0'''
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            if img[i][j] > 255:
                img[i][j] = 255
            elif img[i][j] < 0:
                img[i][j] = 0
            else:
                continue
    return img

def conv(img, kernel):
    in_img = torch.from_numpy(img.astype(np.float32)).reshape((1, 1, img.shape[0], img.shape[1]))
    conv2d = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=kernel.shape)
    kernel = torch.from_numpy(kernel.astype(np.float32)).reshape((1, 1, kernel.shape[0], kernel.shape[1]))
    conv2d.weight.data = kernel
    out_img = conv2d(in_img)
    '''绘制卷积后的图像'''
    out_img = np.squeeze(out_img.detach().numpy())  # detach()将计算图中的非叶节点分离,否则没法numpy()
    return out_img

def show_img(img):
    plt.figure()
    plt.imshow(out_img, cmap='gray')
    plt.show()

if __name__=='__main__':
    labels=['kernel1','kernel2','kernel3']
    plt.figure()
    n=1
    for i ,kernel in enumerate([kernel1,kernel2,kernel3]):
        plt.subplot(3,3,n)
        plt.imshow(img,cmap='gray')
        plt.title('original')
        n+=1
        plt.subplot(3,3,n)
        plt.imshow(kernel,cmap='gray')
        plt.title(labels[i])
        n+=1
        out_img=conv(img,kernel=kernel)
        plt.subplot(3, 3, n)
        plt.imshow(out_img, cmap='gray')
        plt.title('output_img')
        n+=1
    plt.show()


在这里插入图片描述

3. 可视化:了解数字与图像之间的关系

可视化卷积核和特征图
手动numpy实现和调用pytorch实现的卷积结果一样。
对于卷积核的权重特征,我们发现它的特征图强化了源图像中具有卷积核特征的部分,同时抑制了没有卷积核特征的部分。这种特征越不明显的地方的抑制程度越大,反之特征明显的地方特征值越大。
参考代码
卷积-池化-激活_HBU_David的博客-CSDN博客

二、 基于CNN的XO识别

模拟的网络结构图
在这里插入图片描述
其中conv代表的是卷积层,relu代表relu激活层,fc代表的是全连接层。

1. 数据集

QQ群下载

【注】

QQ群内下载的数据集没有分测试集和训练集。

共2000张图片,X、O各1000张。

从X、O文件夹,分别取出150张作为测试集。

文件夹train_data:放置训练集 1700张图片

文件夹test_data: 放置测试集 300张图片

已经按照要求修改好。
在这里插入图片描述

2. 构建模型

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x1 = self.maxpool(self.relu(self.conv1(x)))
        x2 = self.maxpool(self.relu(self.conv2(x1)))
        x2 = x2.view(-1, 27 * 27 * 5)#-1 表自动计算
        x3 = self.relu(self.fc1(x2))
        x4 = self.relu(self.fc2(x3))
        x5 = self.fc3(x4)
        return x5

3. 训练模型

def train(model ,criterion,  optimizer  ,data_loader):
    print('start training')
    epochs = 10
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(data_loader):
            images, label = data
            out = model(images)
            loss = criterion(out, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (i + 1) % 10 == 0:
                print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0
    print('finished train')
    # 保存模型
    torch.save(model, 'model_name.pth')  # 保存的是模型, 不止是w和b权重值

4. 测试训练好的模型

def test(model_path,images,labels):
    # 读取模型
    model_load = torch.load(model_path)
    # 读取一张图片 images[0],测试
    print("labels[0] truth:\t", labels[0])
    x = images[0]
    predicted = torch.max(model_load(x), 1)
    print("labels[0] predict:\t", predicted.indices)

    img = images[0].data.squeeze().numpy()  # 将输出转换为图片的格式
    plt.imshow(img, cmap='gray')
    plt.show()

5. 计算模型的准确率

def accuracy(model_path,data_loader_test):
    # 读取模型
    model_load = torch.load(model_path)
    correct = 0
    total = 0
    with torch.no_grad():  # 进行评测的时候网络不更新梯度
        for data in data_loader_test:  # 读取测试集
            images, labels = data
            outputs = model_load(images)
            _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
            total += labels.size(0)  # labels 的长度
            correct += (predicted == labels).sum().item()  # 预测正确的数目
    print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))

6. 查看训练好的模型的特征图

def show_features(data_loader,model_weight_path = "model_name1.pth"):
    # 看看每层的 卷积核 长相,特征图 长相
    # 获取网络结构的特征矩阵并可视化
    for i, data in enumerate(data_loader):
        images, labels = data
        print(images.shape)
        print(labels.shape)
        break

    class _Net(nn.Module):
        def __init__(self):
            super(_Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
            self.maxpool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

            self.relu = nn.ReLU()
            self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
            self.fc2 = nn.Linear(1200, 64)  # full connect 2
            self.fc3 = nn.Linear(64, 2)  # full connect 3

        def forward(self, x):
            outputs = []
            x = self.conv1(x)
            outputs.append(x)#卷积后
            x = self.relu(x)
            outputs.append(x)#激活后
            x = self.maxpool(x)
            outputs.append(x)#池化后
            x = self.conv2(x)

            x = self.relu(x)

            x = self.maxpool(x)

            x = x.view(-1, 27 * 27 * 5)
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.fc3(x)
            return outputs

    model1 = _Net()
    model1.load_state_dict(torch.load(model_weight_path))
   
    x = images[0]#实际上实现了随机抽取一张,来自dataloader
    # forward正向传播过程
    out_put = model1(x)
    titles=['conv1 output','relu of conv1','maxpool1 output']
    for j,feature_map in enumerate(out_put):
        # [N, C, H, W] -> [C, H, W]    维度变换
        im = np.squeeze(feature_map.detach().numpy())
        # [C, H, W] -> [H, W, C]
        im = np.transpose(im, [1, 2, 0])
        print(im.shape)
        # show 9 feature maps
        plt.figure(titles[j])

        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
            # [H, W, C]
            # 特征矩阵每一个channel对应的是一个二维的特征矩阵,就像灰度图像一样,channel=1
            # plt.imshow(im[:, :, i])
            plt.imshow(im[:, :, i], cmap='gray')
        plt.show()

if __name__=='__main__':
    #  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
    transforms = transforms.Compose([
        transforms.ToTensor(),  # 把图片进行归一化,并把数据转换成Tensor类型
        transforms.Grayscale(1)  # 把图片 转为灰度图
    ])
    '''未分组的数据集'''
    path = r'training_data_sm'
    data_train = datasets.ImageFolder(path, transform=transforms)
    data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
    '''查看特征图'''
    show_features(data_loader)

如果要看第二次卷积的特征图,改变output在模拟模型中的写入位置即可。
第一次卷积输入1个通道,输出9个特征。output收集了第一次卷积的输出,第一次激活的输出和第一次池化的输出,每个输出对应一个含有9个特征图的图片。

torch.Size([64, 1, 116, 116])
torch.Size([64])
(114, 114, 9)
(114, 114, 9)
(57, 57, 9)

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

改变output的收集位置后,观看第二次卷积的特征图:

(55, 55, 5)
(55, 55, 5)
(27, 27, 5)

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
全部一起观察:

def show_features(data_loader,model_weight_path = "model_name1.pth"):
    for i, data in enumerate(data_loader):
        images, labels = data
        print(images.shape)
        print(labels.shape)
        break

    class _Net(nn.Module):
        def __init__(self):
            super(_Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
            self.maxpool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

            self.relu = nn.ReLU()
            self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
            self.fc2 = nn.Linear(1200, 64)  # full connect 2
            self.fc3 = nn.Linear(64, 2)  # full connect 3

        def forward(self, x):
            outputs = []
            x = self.conv1(x)
            outputs.append(x)#卷积后
            x = self.relu(x)
            outputs.append(x)#激活后
            x = self.maxpool(x)
            outputs.append(x)#池化后
            x = self.conv2(x)
            outputs.append(x)#卷积后
            x = self.relu(x)
            outputs.append(x)#激活后
            x = self.maxpool(x)
            outputs.append(x)#池化后
            x = x.view(-1, 27 * 27 * 5)
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.fc3(x)
            return outputs

    model1 = _Net()
    model1.load_state_dict(torch.load(model_weight_path))

    x = images[0]#实际上实现了随机抽取一张,来自dataloader
    # forward正向传播过程SS
    out_put = model1(x)
    titles=['conv1 output','relu of conv1','maxpool1 output','conv2 output','relu of conv2','maxpool2 output']
    for j,feature_map in enumerate(out_put):
        # [N, C, H, W] -> [C, H, W]    维度变换
        im = np.squeeze(feature_map.detach().numpy())
        # [C, H, W] -> [H, W, C]
        im = np.transpose(im, [1, 2, 0])
        print(im.shape)
        plt.figure(titles[j])
        '''前三次是9个特征图,后三次是5个特征图'''
        if j<3:
            for i in range(9):
                ax = plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                # [H, W, C]
                # 特征矩阵每一个channel对应的是一个二维的特征矩阵,就像灰度图像一样,channel=1
                # plt.imshow(im[:, :, i])
                plt.imshow(im[:, :, i], cmap='gray')
            plt.show()
        else:
            for i in range(5):
                ax = plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                # [H, W, C]
                # 特征矩阵每一个channel对应的是一个二维的特征矩阵,就像灰度图像一样,channel=1
                # plt.imshow(im[:, :, i])
                plt.imshow(im[:, :, i], cmap='gray')
            plt.show()

这个一会再用。

7. 查看训练好的模型的卷积核

def show_kernels(data_loader, model_weight_path = "model_name1.pth"):
    for i, data in enumerate(data_loader):
        images, labels = data
        break

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
            self.maxpool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

            self.relu = nn.ReLU()
            self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
            self.fc2 = nn.Linear(1200, 64)  # full connect 2
            self.fc3 = nn.Linear(64, 2)  # full connect 3

        def forward(self, x):
            outputs = []
            x = self.maxpool(self.relu(self.conv1(x)))
            # outputs.append(x)
            x = self.maxpool(self.relu(self.conv2(x)))
            outputs.append(x)
            x = x.view(-1, 27 * 27 * 5)
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.fc3(x)
            return outputs

    model1 = Net()
    model1.load_state_dict(torch.load(model_weight_path))
    x = images[0]
    _ = model1(x)#这个结果实际上没用到,只是作为临时网络

    weights_keys = model1.state_dict().keys()
    for key in weights_keys:
        print("key :", key)
        # 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
        if key == "conv1.weight":
            weight_t = model1.state_dict()[key].numpy()
            print("weight_t.shape", weight_t.shape)
            k = weight_t[:, 0, :, :]  # 获取第一个卷积核的信息参数
            # show 9 kernel ,1 channel
            plt.figure()

            for i in range(9):
                plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                plt.imshow(k[i, :, :], cmap='gray')
                title_name = 'kernel' + str(i) + ',channel1'
                plt.title(title_name)
            plt.show()

        if key == "conv2.weight":
            weight_t = model1.state_dict()[key].numpy()
            print("weight_t.shape", weight_t.shape)
            k = weight_t[:, :, :, :]  # 获取第一个卷积核的信息参数
            print(k.shape)
            print(k)

            plt.figure()
            for c in range(9):
                channel = k[:, c, :, :]
                for i in range(5):
                    ax = plt.subplot(2, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                    plt.imshow(channel[i, :, :], cmap='gray')
                    title_name = 'kernel' + str(i) + ',channel' + str(c)
                    plt.title(title_name)
                plt.show()

if __name__=='__main__':
    #  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
    transforms = transforms.Compose([
        transforms.ToTensor(),  # 把图片进行归一化,并把数据转换成Tensor类型
        transforms.Grayscale(1)  # 把图片 转为灰度图
    ])
    '''未分组的数据集'''
    path = r'training_data_sm'
    data_train = datasets.ImageFolder(path, transform=transforms)
    data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
    '''查看特征图'''
    #show_features(data_loader)
    '''查看卷积核'''
    show_kernels(data_loader)

第一次卷积有1个通道,9个输出,因此有1x9个卷积核;第二次卷积有9个通道,5个输出,所以有9x5个卷积核。
第一次卷积:
在这里插入图片描述
第二次卷积:
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

8. 全部源代码

import numpy as np
import torch
from matplotlib import pyplot as plt
from torch import nn, optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号 #有中文出现的情况,需要u'内容

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 9, 3)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(9, 5, 3)

        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(27 * 27 * 5, 1200)
        self.fc2 = nn.Linear(1200, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x1 = self.maxpool(self.relu(self.conv1(x)))
        x2 = self.maxpool(self.relu(self.conv2(x1)))
        x2 = x2.view(-1, 27 * 27 * 5)#-1 表自动计算
        x3 = self.relu(self.fc1(x2))
        x4 = self.relu(self.fc2(x3))
        x5 = self.fc3(x4)
        return x5

def train(model ,criterion,  optimizer  ,data_loader,epochs = 10):

    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(data_loader):
            images, label = data
            out = model(images)
            loss = criterion(out, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (i + 1) % 10 == 0:
                print('[%d  %5d]   loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0
    print('finished train')
    # 保存模型
    torch.save(model, 'model_name.pth')  # 保存的是模型, 不止是w和b权重值
    torch.save(model.state_dict(),'model_name1.pth')#保存模型的参数

def test(model_path,images,labels):
    # 读取模型
    model_load = torch.load(model_path)
    # 读取一张图片 images[0],测试
    print("labels[0] truth:\t", labels[0])
    x = images[0]
    predicted = torch.max(model_load(x), 1)
    print("labels[0] predict:\t", predicted.indices)

    img = images[0].data.squeeze().numpy()  # 将输出转换为图片的格式
    plt.imshow(img, cmap='gray')
    plt.show()

def accuracy(model_path,data_loader_test):
    model_load = torch.load(model_path)
    correct = 0
    total = 0
    with torch.no_grad():  # 不更新梯度
        for data in data_loader_test:
            images, labels = data
            outputs = model_load(images)
            _, predicted = torch.max(outputs.data, 1)  # 取出 最大值的索引 作为 分类结果
            total += labels.size(0)  # labels 的长度
            correct += (predicted == labels).sum().item()  # 预测正确的数目
    print('Accuracy of the network on the  test images: %f %%' % (100. * correct / total))

def show_kernels(data_loader, model_weight_path = "model_name1.pth"):
	'''查看每层的卷积核'''
    for i, data in enumerate(data_loader):
        images, labels = data
        break

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
            self.maxpool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

            self.relu = nn.ReLU()
            self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
            self.fc2 = nn.Linear(1200, 64)  # full connect 2
            self.fc3 = nn.Linear(64, 2)  # full connect 3

        def forward(self, x):
            outputs = []
            x = self.maxpool(self.relu(self.conv1(x)))
            # outputs.append(x)
            x = self.maxpool(self.relu(self.conv2(x)))
            outputs.append(x)
            x = x.view(-1, 27 * 27 * 5)
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.fc3(x)
            return outputs

    model1 = Net()
    model1.load_state_dict(torch.load(model_weight_path))
    x = images[0]
    _ = model1(x)#这个结果实际上没用到,只是作为临时网络

    weights_keys = model1.state_dict().keys()
    for key in weights_keys:
        #print("key :", key)
        # 卷积核通道排列顺序 [kernel_number, kernel_channel, kernel_height, kernel_width]
        if key == "conv1.weight":
            weight_t = model1.state_dict()[key].numpy()
            print("weight_t.shape", weight_t.shape)
            k = weight_t[:, 0, :, :]  # 获取第一个卷积核的信息参数
            plt.figure()
            for i in range(9):
                plt.subplot(3, 3, i + 1)
                plt.imshow(k[i, :, :], cmap='gray')
                title_name = 'kernel' + str(i) + ',channel1'
                plt.title(title_name)
            plt.show()

        if key == "conv2.weight":
            weight_t = model1.state_dict()[key].numpy()
            print("weight_t.shape", weight_t.shape)
            k = weight_t[:, :, :, :]  # 获取第一个卷积核的信息参数
            print(k.shape)
            print(k)

            plt.figure()
            for c in range(9):
                channel = k[:, c, :, :]
                for i in range(5):
                    ax = plt.subplot(2, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                    plt.imshow(channel[i, :, :], cmap='gray')
                    title_name = 'kernel' + str(i) + ',channel' + str(c)
                    plt.title(title_name)
                plt.show()


def show_features(data_loader,model_weight_path = "model_name1.pth"):
    '''查看每层的特征图'''
    for i, data in enumerate(data_loader):
        images, labels = data
        break

    class _Net(nn.Module):
        def __init__(self):
            super(_Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 9, 3)  # in_channel , out_channel , kennel_size , stride
            self.maxpool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(9, 5, 3)  # in_channel , out_channel , kennel_size , stride

            self.relu = nn.ReLU()
            self.fc1 = nn.Linear(27 * 27 * 5, 1200)  # full connect 1
            self.fc2 = nn.Linear(1200, 64)  # full connect 2
            self.fc3 = nn.Linear(64, 2)  # full connect 3

        def forward(self, x):
            outputs = []
            x = self.conv1(x)
            outputs.append(x)#卷积后
            x = self.relu(x)
            outputs.append(x)#激活后
            x = self.maxpool(x)
            outputs.append(x)#池化后
            x = self.conv2(x)
            outputs.append(x)#卷积后
            x = self.relu(x)
            outputs.append(x)#激活后
            x = self.maxpool(x)
            outputs.append(x)#池化后
            x = x.view(-1, 27 * 27 * 5)
            x = self.relu(self.fc1(x))
            x = self.relu(self.fc2(x))
            x = self.fc3(x)
            return outputs

    model1 = _Net()
    model1.load_state_dict(torch.load(model_weight_path))

    x = images[0]#实际上实现了随机抽取一张,来自dataloader
    # forward正向传播过程SS
    out_put = model1(x)
    titles=['conv1 output','relu of conv1','maxpool1 output','conv2 output','relu of conv2','maxpool2 output']
    for j,feature_map in enumerate(out_put):
        # [N, C, H, W] -> [C, H, W]    维度变换
        im = np.squeeze(feature_map.detach().numpy())
        # [C, H, W] -> [H, W, C]
        im = np.transpose(im, [1, 2, 0])
        print(im.shape)
        plt.figure(titles[j])
        '''前三次是9个特征图,后三次是5个特征图'''
        if j<3:
            for i in range(9):
                ax = plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                # [H, W, C]
                # 特征矩阵每一个channel对应的是一个二维的特征矩阵,就像灰度图像一样,channel=1
                # plt.imshow(im[:, :, i])
                plt.imshow(im[:, :, i], cmap='gray')
            plt.show()
        else:
            for i in range(5):
                ax = plt.subplot(3, 3, i + 1)  # 参数意义:3:图片绘制行数,5:绘制图片列数,i+1:图的索引
                # [H, W, C]
                # 特征矩阵每一个channel对应的是一个二维的特征矩阵,就像灰度图像一样,channel=1
                # plt.imshow(im[:, :, i])
                plt.imshow(im[:, :, i], cmap='gray')
            plt.show()

if __name__=='__main__':
    #  定义图像预处理过程(要与网络模型训练过程中的预处理过程一致)
    transforms = transforms.Compose([
        transforms.ToTensor(),  # 把图片进行归一化,并把数据转换成Tensor类型
        transforms.Grayscale(1)  # 把图片 转为灰度图
    ])
    '''未分组的数据集'''
    path = r'training_data_sm'
    data_train = datasets.ImageFolder(path, transform=transforms)
    data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
    '''查看特征图'''
    show_features(data_loader)
    '''查看卷积核'''
    show_kernels(data_loader)
    '''分组的数据集'''
    path = r'train_data'
    path_test = r'test_data'

    data_train = datasets.ImageFolder(path, transform=transforms)
    data_test = datasets.ImageFolder(path_test, transform=transforms)

    print("size of train_data:", len(data_train))
    print("size of test_data:", len(data_test))

    data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
    data_loader_test = DataLoader(data_test, batch_size=64, shuffle=True)

    model = Net()
    criterion = torch.nn.CrossEntropyLoss()  # 损失函数 交叉熵损失函数
    optimizer = optim.SGD(model.parameters(), lr=0.1)  # 优化函数:随机梯度下降
    '''训练模型'''
    train(model=model,criterion=criterion,optimizer=optimizer,data_loader=data_loader,epochs=10)
    '''打印测试集正确率'''
    accuracy(model_path='model_name.pth',data_loader_test=data_loader_test)


参考代码
CNN实现XO识别_HBU_David的博客-CSDN博客
comefrom:
NNDL 作业6:基于CNN的XO识别

总结:对老师给的代码进行了细致的分析,发现老师的代码有很多值得学习的地方。比如imshow图像输入的维度变换,还有建立临时网络模型的思想等等,学到了很多。通过对老师代码的复现,我理解了不同层卷积核的位置,数目的关系,同时观察到自训练模型卷积核的可视化结果,深深的领略到了卷积核参数的难以解释性。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值