《PyTorch深度学习实践》11CNN高级_Inception块(GPU版本)

这篇博客介绍了如何使用PyTorch实现GoogleNet中的InceptionA块,详细展示了代码实现过程。作者通过构建InceptionA模块和整合模型,结合MNIST数据集进行训练,验证了GPU相对于CPU在运行速度上的优势。文章还分享了训练与测试的代码,并给出了实际运行时GPU比CPU快约三分之一的时间对比。
摘要由CSDN通过智能技术生成

1. 说明

本系列博客记录B站课程《PyTorch深度学习实践》的实践代码课程链接请点我

2. InceptionA块

作用:
卷积的超参数太难以选择,Inception块融合多个卷积,使其能够自动寻找最优卷积组合。
在这里插入图片描述

3. 代码如下

# ---------------------------
# @Time     : 2022/4/21 16:05
# @Author   : lcq
# @File     : 11_CNN_GPU_advance.py
# @Function : CNN高级--GoogleNet的Inception实验
# ---------------------------
import torch
import torch.nn as nn
import torchvision.utils
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import time

# 1.数据准备
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) # 做归一化和放长

# 1.1: 加载数据
train_dataset = datasets.MNIST(root='../dataset/mnist/',    # 获取数据
                               train=True,                  # 表示获取数据集
                               download=True,               # 若本地没有,则下载
                               transform=transform)
# 1.2: 按照batch_size划分成小样本
train_loader = DataLoader(dataset=train_dataset,
                          shuffle=True,
                          batch_size=batch_size)

test_dataset = datasets.MNIST(root='../dataset/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=batch_size)

# # 打印图片
# data_iter = iter(test_loader)
# images, labels = data_iter.__next__()
# plt.imshow(images[0].resize(images.shape[2], images.shape[3]), cmap="Greys")
# plt.title("label = {} ".format(labels[0]))
# plt.show()


# 2. 设计基础模型
class InceptionA(torch.nn.Module):
    def __init__(self, inChannels):                                 # inChannels表输入通道数
        super(InceptionA, self).__init__()
        # 2.1 第一层池化 + 1*1卷积
        self.branch1_1x1 = nn.Conv2d(in_channels=inChannels,  # 输入通道
                                           out_channels=24,         # 输出通道
                                           kernel_size=1)           # 卷积核大小1*1
        # 2.2 第二层1*1卷积
        self.branch2_1x1 = nn.Conv2d(inChannels, 16, kernel_size=1)

        # 2.3 第三层
        self.branch3_1_1x1 = nn.Conv2d(inChannels, 16, kernel_size=1)
        self.branch3_2_5x5 = nn.Conv2d(16, 24, kernel_size=5, padding=2)  # padding=2,因为要保持输出的宽高保持一致

        # 2.4 第四层
        self.branch4_1_1x1 = nn.Conv2d(inChannels, 16, kernel_size=1)
        self.branch4_2_3x3 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch4_3_3x3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)

    def forward(self, X_input):
        # 第一层
        branch1_pool = F.avg_pool2d(X_input,        # 输入
                                    kernel_size=3,  # 池化层的核大小3*3
                                    stride=1,       # 每次移动一步
                                    padding=1)
        branch1 = self.branch1_1x1(branch1_pool)
        # 第二层
        branch2 = self.branch2_1x1(X_input)
        # 第三层
        branch3_1= self.branch3_1_1x1(X_input)
        branch3 = self.branch3_2_5x5(branch3_1)
        # 第四层
        branch4_1 = self.branch4_1_1x1(X_input)
        branch4_2 = self.branch4_2_3x3(branch4_1)
        branch4 = self.branch4_3_3x3(branch4_2)
        # 输出
        output = [branch2, branch3, branch4, branch1]
        # (batch_size, channel, w, h)   dim=1: 即安装通道进行拼接。
        # eg: (1, 2, 3, 4) 和 (1, 4, 3, 4)按照dim=1拼接,则拼接后的shape为(1, 2+4, 3,  4)
        return torch.cat(output, dim=1)


# 3. 整合模型
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
        self.conv_2 = nn.Conv2d(in_channels=88, out_channels=20,
                                kernel_size=5)                  # in_channels=88, 原因在于经过InceptionA后拼接起来的通道数为24*3+16=88
        self.inceptionA_1 = InceptionA(inChannels=10)  # in_channels=10, 要和conv_1的out_channels=10对应
        self.inceptionA_2 = InceptionA(inChannels=20)

        self.maxPool = nn.MaxPool2d(kernel_size=2)
        self.fullConnect = nn.Linear(in_features=1408,          # 输入特征1408是下面,self.fullConnect(x)中的x的特征数,即列数
                                     out_features=10)           # 输出特征

    def forward(self, X_input):
        batchSize = X_input.size(0)
        # 第一层: 卷积
        x = self.conv_1(X_input)    # 卷积
        x = self.maxPool(x)         # 池化
        x = F.relu(x)               # 激活
        # 第二层: InceptionA
        x = self.inceptionA_1(x)
        # 第三层: 再卷积
        x = self.conv_2(x)
        x = self.maxPool(x)
        x = F.relu(x)
        # 第四层: 再InceptionA
        x = self.inceptionA_2(x)
        # 第五层,全连接层
        x = x.view(batchSize, -1)   # 表示将(batch_size, channels, w, h)按照batch_size进行拉伸成shape=(batchSize, chanenls*w*h)
                                    # eg: 原x.shape=(64, 2, 3, 4),调用 y =x.view(x.size(0), -1)后,y.shape = (64, 2*3*4)=(64, 24)
        y_pred = self.fullConnect(x)

        return y_pred


# 4. 创建损失函数和优化器
model = Net()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device=device)

Loss = nn.CrossEntropyLoss()    # 交叉熵损失,计算,# 如:y=[0, 1, 0],
                                # y_pred=[0.1, 0.6, 0.3] -> 交叉熵损失= -sum{ yln[y_pred]} = 0 + (-ln(0.6)) + 0
optimizer = optim.SGD(params=model.parameters(),    # 模型中需要被更新的可学习参数
                      lr=0.01,                      # 学习率
                      momentum=0.9)                 # 动量值,引入之后的梯度下降由 w_t = w_t_1  - lr*dw
                                                    # 变为(1) v_t = momentum*v_t_1 + dw, (2) w_t = w_t_1 - lr*v_t


# 5. 训练
def train(epoch):
    running_loss = 0.0
    for batch_index, data in enumerate(train_loader, 0):
        X_input, Y_label = data
        X_input, Y_label = X_input.to(device), Y_label.to(device)

        optimizer.zero_grad()

        y_pred = model.forward(X_input)
        loss = Loss(y_pred, Y_label)    # 这里得出的loss,是batch_size=64的64个样本的平均值
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_index % 300 == 299:
            # 打印图片
            # plt.imshow(X_input[0].resize(X_input.shape[2], X_input.shape[3]), cmap="Greys")
            # plt.title("batch_index={}, y={}, y_pred={} ".format(batch_index, Y_label[0], y_pred[0]))
            # plt.show()

            print('[%d, %5d] loss: %.3f' % (epoch, batch_index, running_loss / 300))
            running_loss = 0.0


# 6. 训练
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            X_test_input, Y_test_label = data
            X_test_input, Y_test_label = X_test_input.to(device), Y_test_label.to(device)
            y_test_pred = model.forward(X_test_input)

            _, predicted = torch.max(y_test_pred.data, dim=1)   # dim=1, 表示求出列的最大值; 返回两个数,第一个为该列最大值,第二个为最大值的行索引
            total += Y_test_label.size(0)
            correct += (predicted == Y_test_label).sum().item()
    print('测试集正确率: %d %% ' % (100 * correct / total))


if __name__ == '__main__':
    startTime = time.time()
    for epoch in range(1):
        train(epoch)
        test()
    endTime = time.time()
    print("GPU耗时: ", endTime-startTime)

4. 使用GPU跑和CPU跑模型的区别

对于图像化的运算,以前听说GPU跑起来比较快,我不太信。于是就自己多次测试了一下。
当 epoch=1 ,Time_cpu = 76s, Time_gpu = 54秒
快了差不多三分之一。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值