简单记录GoogLeNet网络，pytorch+GoogLeNet+CIFAR10

子根

已于 2022-08-02 13:38:04 修改

阅读量1.2k

点赞数 1

于 2022-07-30 12:06:22 首次发布

本文链接：https://blog.csdn.net/qq_42792802/article/details/126070175

版权

数据集

数据集使用的是CIFAR10，cifar 10 这个数据集一共有 50000 张训练集，10000 张测试集，两个数据集里面的图片都是 png 彩色图片，图片大小是 32 x 32 x 3，一共是 10 分类问题，分别为飞机、汽车、鸟、猫、鹿、狗、青蛙、马、船和卡车。这个数据集是对网络性能测试一个非常重要的指标，可以说如果一个网络在这个数据集上超过另外一个网络，那么这个网络性能上一定要比另外一个网络好，目前这个数据集最好的结果是 95% 左右的测试集准确率。

from torchvision.datasets import CIFAR10
import torch
import cv2
import numpy as np

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5  # 标准化，这个技巧之后会讲到
    x = cv2.resize(x, (224, 224))
    x = x.transpose((2, 0, 1))  # 将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = torch.from_numpy(x)
    return x

##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False,  transform=data_tf, download=False)

Net

这是GoogLeNet的网络结构

class BasicConv2d(nn.Module):
    def __init__(self,in_channels, out_channels, kernel, stride=1, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size=kernel, stride=stride, padding=padding,
                              bias=False)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)

'''
in_channels         输入数据的通道
out_channels_1x1    1*1卷积深度
out_channels_1x1_3  3*3前面的1*1卷积深度
out_channels_3x3    3*3卷积深度
out_channels_1x1_5  5*5前面的1*1卷积深度
out_channels_5x5    5*5卷积深度
out_channels_pool   池化后面的1*1卷积深度
'''
class Inception(nn.Module):
    def __init__(self, in_channels, out_channels_1x1,
                 out_channels_1x1_3,  out_channels_3x3,
                 out_channels_1x1_5, out_channels_5x5,
                 out_channels_pool ):
        super(Inception, self).__init__()
        ##第一条线
        self.branch1x1 = BasicConv2d(in_channels, out_channels_1x1, 1)

        ##第二条线
        self.branch3x3 = nn.Sequential(
            BasicConv2d(in_channels, out_channels_1x1_3, 1),
            BasicConv2d(out_channels_1x1_3, out_channels_3x3, 3, 1, 1)
        )

        ##第三条线
        self.branch5x5 = nn.Sequential(
            BasicConv2d(in_channels, out_channels_1x1_5, 1),
            BasicConv2d(out_channels_1x1_5, out_channels_5x5, 5, 1, 2)
        )

        ##第四条线
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, out_channels_pool, 1)
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch3x3 = self.branch3x3(x)

        branch5x5 = self.branch5x5(x)

        branch_pool = self.branch_pool(x)

        output = [branch1x1, branch3x3, branch5x5, branch_pool]
        return torch.cat(output, 1)
    
class GoogLeNet(nn.Module):
    def __init__(self, in_channels, num_class):
        super(GoogLeNet, self).__init__()
        ##第 1 个模块
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels, 64, 7, 2, 3),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 2 个模块
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 192, 3, 1, 1),
            nn.Conv2d(192, 192, 3, 1, 1),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 3 个模块
        self.block3 = nn.Sequential(
            Inception(192, 64, 96, 128, 16, 32, 32),
            Inception(256, 128, 128, 192, 32, 96, 64),
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 4 个模块
        self.block4 = nn.Sequential(
            Inception(480, 192, 96, 208, 16, 48, 64),
            Inception(512, 160, 112, 224, 24, 64, 64),  #这里究极体会输出
            Inception(512, 128, 128, 256, 24, 64, 64),
            Inception(512, 112, 144, 288, 32, 64, 64),
            Inception(528, 256, 160, 320, 32, 128, 128), #这里究极体会输出
            nn.MaxPool2d(3, 2, 1)
        )
        ##第 4 个模块
        self.block5 = nn.Sequential(
            Inception(832, 256, 160, 320, 32, 128, 128),
            Inception(832, 384, 192, 384, 48, 128, 128),
            nn.AvgPool2d(7, 1)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.4),
            nn.Linear(1024, num_class),
            # nn.Sigmoid(1024,out_channels)
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)

        x = torch.reshape(x, (x.shape[0], -1))
        x = self.classifier(x)

        return x

详细内容可以查看我这篇博客

简单记录一下，几个经典的网络结构_子根的博客-CSDN博客

接下来是把训练的代码模块化在uilt中

uilt

#  开发人员：    骆根强
#  开发时间：    2022/7/30 10:50
#  功能作用：    未知

import torch
import time
import tqdm

from torch.autograd import Variable

'''
参数介绍：
epoches, 训练几轮
train_data, 训练的数据
model, 训练模型
device, 使用的设备是
criterion, 损失函数
optimizer, 优化函数
pth_name    参数文件的名字
'''

def train(epoches, train_data, model, device, criterion, optimizer, pth_name):
    # 开始训练
    losses_men = []
    acces_men = []

    start = time.time()
    for epoche in range(epoches):
        train_loss = 0
        train_acc = 0
        time1 = time.time()
        print()
        print(f'开始，第 {epoche + 1} / {epoches} 个Epoche中：')
        for image_data, image_label in tqdm.tqdm(train_data):
            image_data = Variable(image_data.to(device))
            image_label = Variable(image_label.to(device))

            ##前向传播
            out = model(image_data)
            loss = criterion(out, image_label)
            # print(out.shape)
            ##反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            ##记录误差
            train_loss += loss.item()

            ##记录准确率
            _, pred_label = out.max(1)
            num_correct = (pred_label == image_label).sum().item()  ##计算每次batch_size正确的个数
            acc = num_correct / out.shape[0]
            train_acc += acc

        losses_men.append(train_loss / len(train_data))
        acces_men.append(train_acc / len(train_data))
        time2 = time.time()

        torch.save(model.state_dict(), f'./params/{pth_name}_{epoches}.pth')

        print(f'Epoch_time : ', time2 - time1)
        print()
        print('train_loss : ', losses_men)
        print()
        print('train_acc : ', acces_men)

    print(f'All time : {int((time.time() - start) / 3600)} H '
          f'{int((time.time() - start) / 60)} m {int((time.time() - start) % 60)} s  ')

然后就进行完整的训练过程

train

import torch
import cv2
import numpy as np
import torch.nn as nn

import four_net
import uilt

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch import optim

##定义一些参数
epoches = 1
batch_size = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5  # 标准化，这个技巧之后会讲到
    x = cv2.resize(x, (224, 224))
    x = x.transpose((2, 0, 1))  # 将 channel 放到第一维，只是 pytorch 要求的输入方式
    x = torch.from_numpy(x)
    return x

##下载数据集
train_set = CIFAR10('./data', train=True, transform=data_tf, download=False)
test_set = CIFAR10('./data', train=False,  transform=data_tf, download=False)

train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)

model = four_net.GoogLeNet(3,10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-1)

print()
print('目前使用的是： ', device)
uilt.train(epoches, train_data, model, device, criterion, optimizer, pth_name='googlenet'

总结

****************************************************************************************************************************************************************************************************************************

train_acc : [0.4877117966751918, 0.7165121483375959, 0.8027293797953964, 0.8466272378516624, 0.8776974104859335, 0.9011149296675192, 0.9178388746803069, 0.9341432225063938, 0.9479699488491049, 0.9559223145780051, 0.9642543158567775, 0.9674312659846548, 0.9730458759590793, 0.9789402173913043, 0.9809782608695652, 0.984934462915601, 0.9881713554987213, 0.9888507033248082, 0.9883112212276215, 0.9879515664961637, 0.9934063299232737, 0.9944453324808185, 0.9930466751918159, 0.9949048913043478, 0.9937859654731458, 0.9963834718670077, 0.9977221867007673, 0.998321611253197, 0.9973025895140665, 0.9927070012787724]

画出准确度曲线：

import matplotlib.pyplot as plt
acces_men = [0.4877117966751918, 0.7165121483375959, 0.8027293797953964, 
             0.8466272378516624, 0.8776974104859335, 0.9011149296675192, 
             0.9178388746803069, 0.9341432225063938, 0.9479699488491049, 
             0.9559223145780051, 0.9642543158567775, 0.9674312659846548, 
             0.9730458759590793, 0.9789402173913043, 0.9809782608695652, 
             0.984934462915601, 0.9881713554987213, 0.9888507033248082, 
             0.9883112212276215, 0.9879515664961637, 0.9934063299232737, 
             0.9944453324808185, 0.9930466751918159, 0.9949048913043478, 
             0.9937859654731458, 0.9963834718670077, 0.9977221867007673, 
             0.998321611253197, 0.9973025895140665, 0.9927070012787724]

###画出LOSS曲线和准确率曲线
plt.plot(np.arange(len(acces_men)), acces_men, label ='train acc')
plt.show()

GoogLeNet模型训练的收敛速度比VGG还快，光是第一个epoch，准确率就达到了0.3，要知道VGG（经过BatchNorm1d）是经过了第三个epoch才超过0.3的，如果Vgg没有BatchNorm1d那就是个弟弟，经过20轮的训练精确度连0.1都达不到

GoogLeNet经过20轮精确度高到99%

最后，老婆压场

喜欢的话，给我老婆点个赞吧☺

子根

关注

1
点赞
踩
27

收藏

觉得还不错? 一键收藏
0
评论
简单记录GoogLeNet网络，pytorch+GoogLeNet+CIFAR10

GoogLeNet模型训练的收敛速度比VGG还快，光是第一个epoch，准确率就达到了0.3，要知道VGG（经过BatchNorm1d）是经过了第三个epoch才超过0.3的，如果Vgg没有BatchNorm1d那就是个弟弟，经过20轮的训练精确度连0.1都达不到最后，老婆压场https。............
复制链接

扫一扫