PyTorch入门六 || 卷积神经网络（基础）

Anthony_CH

已于 2022-03-20 19:02:38 修改

阅读量1.3k

点赞数 2

于 2022-02-06 13:28:58 首次发布

本文链接：https://blog.csdn.net/qq_56039091/article/details/122797569

版权

深度学习专栏收录该内容

15 篇文章 2 订阅

订阅专栏

由于全连接层直接将图片的像素行连接起来会丢失图片的空间信息，故引入CNN

CNN的过程

卷积过程

多通道图像卷积过程

三个通道变成了一个：

n个通道变成m个：

每一个卷积核的通道数与输入通道数一致，有多少个这样的卷积核则输出就有多少个通道

CNN参数的核心就是输入的通道，输出的通道，卷积核的大小

code

import numpy as np
import torch
in_chaneels,out_channels = 5,10
width,height = 100,100

#大部分卷积核大小为奇数
kernel_size = 3
batch_size = 1

#生成随机张量
input = torch.randn(batch_size,
                    in_chaneels,
                    width,
                    height)


conv_layer = torch.nn.Conv2d(in_chaneels,
                             out_channels,
                             kernel_size=(5,3))

output = conv_layer(input)

print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)

padding

如果想保持图像卷积后大小不变，则需要在卷积前对原始图像做padding操作，即填充数字，最常用填充0

N：原始图像；n：卷积核尺寸（一般为奇数）

卷积后的输出为 N+1-n

stride

stride即为步长，卷积过程并不是一个接一个，如果步长为2，则中间会跳一个，如果步长为3，则中间会跳两个

N：原始图像；n：卷积核尺寸（一般为奇数）；s：步长

卷积后的输出为 ((N-n)/s ) + 1 ；其中 (N-n)/s 为上取整

code

import numpy as np
import torch

input = [3,4,6,5,7,
         2,4,6,8,2,
         1,6,7,8,4,
         9,7,4,6,2,
         3,7,5,4,1]

#第一个参数是minibatch
input = torch.Tensor(input).view(1,1,5,5)

# conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,padding=1,bias=False)
conv_layer = torch.nn.Conv2d(1,1,kernel_size=3,stride=2,bias=False)

kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1,1,3,3)

conv_layer.weight.data = kernel.data

output = conv_layer(input)
print(output)

下采样

最常用的是Max Pooling，该下采样无参数

n*n的Max Pooling 默认stride是n；图片的通道数量不变，每个通道单独计算

例如上述，将图片划分为2*2一组，则每组中选择一个最大的值

import numpy as np
import torch

input = [3,4,6,5,7,
         2,4,6,8,2,
         1,6,7,8,4,
         9,7,4,6,2,
         3,7,5,4,1]

#第一个参数是minibatch
input = torch.Tensor(input).view(1,1,5,5)

#默认stride步长和kernel_size一致
maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2)

output = maxpooling_layer(input)
print(output)

示例

采用 GPU 训练：

#使用 GPU 加速
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

#训练数据加载后 使用下面代码，测试数据同理
x,y = x.to(device),y.to(device)

所有代码

import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
import matplotlib.pyplot as plt

transform = transforms.Compose([
    transforms.ToTensor(),  #将图像转为tensor向量即每一行叠加起来，会丧失空间结构，且取值为0-1
    transforms.Normalize((0.1307,),(0.3081,))   #第一个是均值，第二个是标准差，需要提前算出，这两个参数都是mnist的
])

batch_size = 64

train_dataset = datasets.MNIST(root='../dataset/mnist',
                               train = True,
                               download=False,
                               transform=transform
                               )

train_loader = DataLoader(train_dataset,
                          shuffle=True,
                          batch_size=batch_size)

test_dataset = datasets.MNIST(root='../dataset/mnist',
                              train = False,
                              download=False,
                              transform=transform
                              )

test_loader = DataLoader(test_dataset,
                         shuffle=False,
                         batch_size=batch_size)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1,10,(5,5))
        self.conv2 = torch.nn.Conv2d(10,20,(5,5))
        self.maxpooling = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(320,10)

    def forward(self,x):
        # 将数据从 (n,1,28,28) 转为 (n,784)

        #统计 minibatch 的大小
        batch_size = x.size(0)

        x = F.relu(self.maxpooling(self.conv1(x)))
        x = F.relu(self.maxpooling(self.conv2(x)))

        #将批量输入的图片转为 张数 * N
        # 注意 批量数据矩阵 一行表示一个数据
        x = x.view(batch_size,-1)
        x = self.fc(x)
        return x

model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5)

#使用 GPU 加速
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

def train(epoch):
    running_loss = 0.0
    # batch_idx 的范围是从 0-937 共938个 因为 batch为64，共60000个数据，所以输入矩阵为 (64*N)
    for batch_idx,data in enumerate(train_loader,0):
        x ,y = data
        x,y = x.to(device),y.to(device) #装入GPU

        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred,y)  #计算交叉熵损失
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx%300 == 299:
            print("[%d,%5d] loss:%.3f"%(epoch+1,batch_idx+1,running_loss/300))
            running_loss = 0.0

def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            x,y = data
            x,y = x.to(device),y.to(device)
            y_pred = model(x)
            _,predicted = torch.max(y_pred.data,dim=1)
            total += y.size(0)
            correct += (predicted==y).sum().item()
    print('accuracy on test set:%d%% [%d/%d]'%(100*correct/total,correct,total))
    accuracy_list.append(100*correct/total)

if __name__ == '__main__':
    accuracy_list = []
    for epoch in range(10):
        train(epoch)
        test()
    plt.plot(np.linspace(1,10,10),accuracy_list)
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.show()

练习：

答案

#其余保持不变，关键计算第一个线性层的输入
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1,10,(5,5))
        self.conv2 = torch.nn.Conv2d(10,20,(3,3),padding=1)
        self.conv3 = torch.nn.Conv2d(20,30,(3,3),padding=1)
        self.maxpooling = torch.nn.MaxPool2d(2)
        self.linear1 = torch.nn.Linear(270,128)
        self.linear2 = torch.nn.Linear(128,64)
        self.linear3 = torch.nn.Linear(64,10)

    def forward(self,x):
        # 输入数据为 (n,1,28,28) 输出为 (n,10)

        #统计 minibatch 的大小
        batch_size = x.size(0)

        x = F.relu(self.maxpooling(self.conv1(x)))
        x = F.relu(self.maxpooling(self.conv2(x)))
        x = F.relu(self.maxpooling(self.conv3(x)))

        #将批量输入的图片转为 张数 * N
        # 注意 批量数据矩阵 一行表示一个数据
        x = x.view(batch_size,-1)

        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x