GoogleNet代码复现

最新推荐文章于 2023-12-28 10:19:59 发布

小蒋的学习笔记

最新推荐文章于 2023-12-28 10:19:59 发布

阅读量86

点赞数

分类专栏：深度学习文章标签：深度学习人工智能

桂林电子科技大学计算机与信息安全学院蒋熹煜

本文链接：https://blog.csdn.net/qq_61735602/article/details/133942111

版权

深度学习专栏收录该内容

54 篇文章 3 订阅

订阅专栏

卷积比较常见的参数组合，能够保证输入输出维度大小一致
- k=3，p=1，s=1
- k=5，p=2，s=1
- k=7，p=2，s=1
代码可以对照着上篇GoogleNet的网络结构流程图和参数表来看
不懂的地方问GPT
一键免费部署你的跨平台私人 ChatGPT 应用
GoogleNet源码如下：

import torch.nn as nn
import torch
import torch.nn.functional as F


class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x

class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()

        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            BasicConv2d(in_channels, ch3x3red, kernel_size=1),
            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # 保证输出大小等于输入大小
        )

        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, ch5x5red, kernel_size=1), 
            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]
        return torch.cat(outputs, 1)#[batch,channel,H,W] 在通道维度拼接


class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(x) 
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        return x

class GoogLeNet(nn.Module):
    def __init__(self, num_classes=1000, aux_logits=False, init_weights=False):
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits

        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.conv2 = BasicConv2d(64, 64, kernel_size=1)
        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:   # eval model lose this layer
            return x, aux2, aux1
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def googlenet(num_classes):  
    model = GoogLeNet( num_classes=num_classes)
    return model

forward函数什么时候调用？

在PyTorch中，当你使用创建的模型对象调用model(input)时，PyTorch将自动调用模型中的forward方法。

代码段解释

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

这段代码是一个用于初始化神经网络权重的辅助函数 _initialize_weights。在这个函数中，遍历了网络中的所有模块（modules() 方法返回网络中所有的模块），并针对不同类型的模块执行不同的权重初始化操作。
- 对于类型为 nn.Conv2d 的模块，使用 Kaiming 正态分布初始化权重（nn.init.kaiming_normal_），其中 mode=‘fan_out’ 表示按照输出通道的数量进行缩放，nonlinearity=‘relu’ 表示采用 ReLU 作为激活函数。如果存在偏置（bias），则将偏置初始化为 0（nn.init.constant_）。
- 对于类型为 nn.Linear 的模块，使用正态分布初始化权重（nn.init.normal_），其中均值为 0，标准差为 0.01。同时将偏置初始化为 0（nn.init.constant_）。
这样的权重初始化操作有助于确保神经网络在训练初始阶段能够更快地收敛，以及避免梯度消失或爆炸的问题。 Kaiming 初始化方法是专门针对具有 ReLU 激活函数的神经网络层设计的一种初始化策略，有助于缓解梯度消失问题。

代码段解释

self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

这段代码创建了一个最大池化层 nn.MaxPool2d。下面是对其中参数的解释：
- 3：表示池化窗口大小为 3x3。这意味着在输入上应用 3x3 的滑动窗口来执行池化操作。
- stride=2：表示在应用池化操作时的步长为 2。这意味着池化窗口在输入上以步长为 2 的间隔滑动。
- ceil_mode=True：表示使用 “ceil” 模式来处理不完全适配的输入大小。当输入尺寸除以步长的结果不是整数时，“ceil” 模式会向上取整。这意味着输入的边缘部分也会被考虑在内，以确保没有信息丢失。
这样的最大池化操作将对输入张量进行降采样操作，选择每个池化窗口中的最大值作为输出。这有助于减少特征图的空间维度，同时保留最显著的特征信息，以便在后续层中进行处理。