VGG模型——pytorch实现

CV_Peach

已于 2023-01-22 14:17:16 修改

阅读量452

点赞数 1

文章标签： pytorch 深度学习人工智能

于 2022-12-31 18:36:58 首次发布

本文链接：https://blog.csdn.net/Peach_____/article/details/128507561

版权

论文传送门：VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION

class VGG(nn.Module):  # 定义VGG模型类
    def __init__(self, features, num_classes=2, init_weights=False):  # 定义初始化方法
        super(VGG, self).__init__()  # VGG类与父类共用初始化方法
        self.features = features  # VGG的特征提取网络
        self.classifier = nn.Sequential(  # VGG的线性分类网络
            nn.Linear(512 * 7 * 7, 4096),  # 线性层
            nn.ReLU(inplace=True),  # 激活函数
            nn.Dropout(),  # dropout，防止过拟合
            nn.Linear(4096, 4096),  # 线性层
            nn.ReLU(inplace=True),  # 激活函数
            nn.Dropout(),  # dropout，防止过拟合
            nn.Linear(4096, num_classes)  # 线性层，输出个数为类别数
        )
        if init_weights:  # 是否进行网络权重初始化
            self._initialize_weigthts()  # 网络权重初始化

    def forward(self, x):  # 前传函数
        x = self.features(x)  # 输入图像x经过特征提取层，输出为（n,512,7,7）
        x = torch.flatten(x, 1)  # 三维特征压缩至一维特征向量，方便进行线性连接，输出为（n,512x7x7）
        x = self.classifier(x)  # 一维特征向量经过线性分类网络，输出为（n,num_classes）

        return x

    def _initialize_weights(self):  # 定义模型权重初始化方法
        for m in self.modules():  # 遍历模型结构
            if isinstance(m, nn.Conv2d):  # 如果m为2d卷积操作
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")  # 使用kaiming初始化方法
                if m.bias is not None:  # 如果存在偏置项b
                    nn.init.constant_(m.bias, 0)  # 偏置b设为0
            elif isinstance(m, nn.BatchNorm2d):  # 如果m为2d批量标准化操作
                nn.init.constant_(m.weight, 1)  # 权重w设为1
                nn.init.constant_(m.bias, 0)  # 偏置b设为0
            elif isinstance(m, nn.Linear):  # 如果m为线性映射操作
                nn.init.normal_(m.weight, 0, 0.01)  # 权重w设为正态分布（0,0.01）
                nn.init.constant_(m.bias, 0)  # 偏置b设为0


def make_layers(cfg):  # 定义函数，用于生成特征提取网络
    layers = []  # 存放网络各层操作
    in_channels = 1  # 初始输入通道数，应为输入图片通道数
    for v in cfg:  # 循环设定的特征提取网络结构
        if v == "M":  # v为M，代表最大池化层
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]  # 增加一层2d最大池化层
        else:  # v为数字，代表卷积层+激活函数
            layers += [nn.Conv2d(in_channels, v, kernel_size=3, padding=1),  # 增加一层2d卷积层，数字v代表输出通道数
                       nn.ReLU(inplace=True)]  # 增加一层激活函数层
            in_channels = v  # 将输出通道数设为下个卷积层的输入通道数
    return nn.Sequential(*layers)  # 返回特征提取网络


cfgs = {  # 字典存放不同vgg的特征提取网络结构
    "vgg16": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"]  # vgg16的特征提取网络结构
}


def vgg16(**kwargs):  # 定义函数，用于生成整体vgg模型
    model = VGG(make_layers(cfgs["vgg16"]), **kwargs)  # 将特征提取网络输入VGG类中并实例化，得到vgg16模型
    return model  # 返回vgg16实例化模型