深度学习——AlexNet网络模型代码AlexNet.py

书呆狗

已于 2023-08-03 14:35:15 修改

阅读量395

点赞数

分类专栏：深度学习入门 # 图像分类文章标签：网络深度学习机器学习

于 2023-08-02 15:24:51 首次发布

本文链接：https://blog.csdn.net/weixin_73223851/article/details/132063635

版权

图像分类同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

深度学习入门

3 篇文章 0 订阅

订阅专栏

import torch.nn as nn
import torch
from torchsummary import summary

class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__() # 调用父类（nn.Module）的初始化方法，以便执行父类的初始化逻辑
        self.features = nn.Sequential(  # 定义了AlexNet的特征提取部分。这部分包含一系列的卷积层、ReLU激活层和最大池化层，用于提取输入图像的特征。
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[96, 55, 55],Conv2d的参数是（输入数据的通道数，输出特征图的通道数，卷积核的尺寸，卷积步长，填充大小），这里由224变到55计算方法是[244-11+（1+2）]/4+1=55,即上面加上一行0，下面加上两行零，左面加上一列0，右面加上两列0。而现在大多数AlesNet模型输入图片的尺寸为227*227，padding = 0
                                                                    # output_size = (input_size - kernel_size + 2 * padding) / stride + 1
            nn.ReLU(inplace=True),                                  # inplace = True是指原地进行操作，操作完成后覆盖原来的变量
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[96, 27, 27]  输出图像尺寸 = ((输入图像尺寸 - kernel_size) / stride) + 1

            nn.Conv2d(96, 256, kernel_size=5, padding=2),           # output[256, 27, 27]  output_size = (input_size - kernel_size + 2 * padding) / stride + 1
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[256, 13, 13]

            nn.Conv2d(256, 384, kernel_size=3, padding=1),          # output[384, 13, 13]
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),          # output[384, 13, 13]
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),          # output[256, 13, 13]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[256, 6, 6]
        )
        self.classifier = nn.Sequential(  # 定义了AlexNet的分类器部分。这部分包含一系列的全连接层和ReLU激活层，用于将提取的特征映射到类别空间上。
            nn.Dropout(p=0.5),
            nn.Linear(256 * 6 * 6, 4096),  # 此行定义了一个全连接（线性）层，将输入张量映射到大小为4096的输出张量。输入大小通过计算得到，即256 * 6 * 6，这对应于来自前面层的输入特征数量。
            nn.ReLU(inplace=True),

            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096), # 引入了额外的非线性和模型容量，以捕获数据中更复杂的关系。它为模型提供了灵活性，使其能够在网络的后期阶段学习更复杂的模式和表示。
            nn.ReLU(inplace=True),
            
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1) # start_dim（整数，可选）：指定开始展平的维度索引。默认值为0，表示从最外层维度开始展平。
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):  # 这个条件语句检查当前模块是否为卷积层 (nn.Conv2d)
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')  
                if m.bias is not None:   # 这个条件语句检查当前卷积层是否有偏置项。
                    nn.init.constant_(m.bias, 0)  # 如果存在偏置项，则将其初始化为零
            elif isinstance(m, nn.Linear):  #  这是另一个条件语句，检查当前模块是否为全连接层 (nn.Linear)。
                nn.init.normal_(m.weight, 0, 0.01)  # 这行代码使用正态分布初始化方法来初始化当前全连接层的权重 m.weight，其中均值为0，标准差为0.01。
                nn.init.constant_(m.bias, 0)  # 将当前全连接层的偏置项初始化为零。

def alexnet(num_classes): 
    model = AlexNet(num_classes=num_classes)
    return model

# net = AlexNet(num_classes=1000)
# summary(net.to('cuda'), (3,224,224))
#########################################################################################################################################
# Total params: 62,378,344
# Trainable params: 62,378,344
# Non-trainable params: 0
# ----------------------------------------------------------------
# Input size (MB): 0.57
# Forward/backward pass size (MB): 11.09
# Params size (MB): 237.95
# Estimated Total Size (MB): 249.62
# ----------------------------------------------------------------
# conv_parameters:  3,747,200
# fnn_parameters:  58,631,144   93% 的参数量