GoogLeNet网络详解_googlenet辅助输出头-CSDN博客

本文链接：https://blog.csdn.net/sxn0121/article/details/127309179

GoogLeNet网络介绍

AlexNet和VGG都只有一个输出层

而GoogLeNet有三个输出层（其中两个辅助分类层）

网络表格

inception结构

辅助分类器介绍

模型脚本

model

import torch.nn as nn
import torch
import torch.nn.functional as F



#定义GoogLeNet网络
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):   #类别个数，是否使用辅助分类器，是否权重初始化
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits  #将是否使用辅助分类器传入到类变量

        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)   #第一个卷积层，通过我们定义好的BasicConv2d传入参数
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)   #紧接着进入最大池化下采样层，然后进入卷积层2,3，最大池化下采样

        self.conv2 = BasicConv2d(64, 64, kernel_size=1)
        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)          #然后通过我们定义好的Inception进入inception3a层
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)        #然后根据网络层结构，通过表格依次传入参数
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        if self.aux_logits:         #定义两个辅助分类器，如果为true，通过我们定义好的辅助分类器模板InceptionAux创建辅助分类器1和2
            self.aux1 = InceptionAux(512, num_classes)   #辅助分类器的参数有两个：输入特征矩阵的深度，分类的类别个数
            self.aux2 = InceptionAux(528, num_classes)

        # 定义平均池化下采样操作，使用自适应的平均池化，不管输入特征矩阵的高和宽怎样，都能得到所指定的特征矩阵的高和宽，得到高和宽都为1 的特征矩阵
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)             #展平之前加入dropout函数
        self.fc = nn.Linear(1024, num_classes)     #全连接层，输入的展平后的向量节点个数是1024，输出的节点个数是num_classes
        if init_weights:              #如果init_weights为true，调用初始化权重函数_initialize_weights
            self._initialize_weights()

    def forward(self, x):       #定义正向传播过程
        # N x 3 x 224 x 224
        x = self.conv1(x)     #根据网络层结构依次进入卷积层，最大池化下采样层和inception层
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:  # 如果为true，将使用nception4a得到的输出放入到辅助分类器1当中得到辅助分类器1的结果。eval model lose this layer
            aux1 = self.aux1(x)

        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux2 = self.aux2(x)

        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:   # eval model lose this layer
            return x, aux2, aux1
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


#定义Inception结构，继承自nn.module
class Inception(nn.Module):
    #构造初始函数，传入参数：输入特征矩阵的深度，后面的是对应表格中Inception层的6个参数
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        #分支1采用我们刚刚所定义的BasicConv2d。输出的特征矩阵深度是传入的in_channels，以及卷积核个数ch1*1,步距和大小都是1
        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
        #第二层因为传入的是两个卷积，所以使用的是nn.Sequential函数，第一个参数指输入特征矩阵的个数，第二个参数指卷积核个数，下一个是卷积核大小
        self.branch2 = nn.Sequential(
            BasicConv2d(in_channels, ch3x3red, kernel_size=1),
            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # padding=1保证输出大小等于输入大小
        )
        #分支3，和分支2类似
        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, ch5x5red, kernel_size=1),
            # 在官方的实现中，其实是3x3的kernel并不是5x5，这里我也懒得改了，具体可以参考下面的issue
            # Please see https://github.com/pytorch/vision/issues/906 for details.
            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小
        )
        #最后一个分支，第一个是最大池化下采样操作，第二个是卷积操作
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):        #正向传播过程，将输入的特征矩阵分别传入四个分支，得到四个分支所对应的输出
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)

        outputs = [branch1, branch2, branch3, branch4]    #将4个输出放入一个列表中
        return torch.cat(outputs, 1)    #通过torch.cat将列表合并，1代表所需要合并的维度是深度。在深度上进行拼接(batch，channels，height，width)

#定义辅助分类器InceptionAux
class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):   #构造函数的参数：输入特征矩阵的深度，分类的类别个数
        super(InceptionAux, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)      #第一个层是平均池化下采样
        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # 卷积层。output[batch, 128, 4, 4]。kernel_size=1不会改变特征矩阵的高和宽

        self.fc1 = nn.Linear(2048, 1024)        #定义全连接层1，展平处理，输入结点个数：2048=128*4*4
        self.fc2 = nn.Linear(1024, num_classes)  #第二个全连接层，输入结点个数是上层的输出，输出时分类的类别数

    def forward(self, x):            #定义正向传播过程
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(x)   #先将输入x通过平均池化下采样操作
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)    #然后将得到的特征矩阵输入到卷积层中
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)     #将得到的特征矩阵进行展平处理，1代表从channel这个维度向后开始展平的
        x = F.dropout(x, 0.5, training=self.training)   #将输入的特征矩阵以50%的比例随机失活神经元。self.training随着训练或者测试变化，训练为true，测试为false
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)     #通过dropout得到的输出输入到全连接层1中，然后通过relu得到输出
        x = F.dropout(x, 0.5, training=self.training)
        # N x 1024
        x = self.fc2(x)    #将输出输入到全连接层2当中，得到最终的输出
        # N x num_classes
        return x    #至此，辅助分类器的模板定义完毕


#定义第一个卷积模板
class BasicConv2d(nn.Module):       #定义类BasicConv2d，继承自nn.Module
    def __init__(self, in_channels, out_channels, **kwargs):   #定义构造函数，传入的参数：输入矩阵和输出矩阵的深度
        super(BasicConv2d, self).__init__()      #定义两个层结构，一个卷积层，一个激活层
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):     #定义正向传播过程

        x = self.conv(x)     #把输入的特征矩阵输入到卷积层，然后通过relu激活函数得到输出
        x = self.relu(x)
        return x

训练脚本

train

import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import GoogLeNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)  #实例化GoogLeNet模型，花分类数据集类别为5，是否使用辅助分类器，是否权重初始化
    # 如果要使用官方的预训练权重，注意是将权重载入官方的模型，不是我们自己实现的模型
    # 官方的模型中使用了bn层以及改了一些参数，不能混用
    # import torchvision
    # net = torchvision.models.googlenet(num_classes=5)
    # model_dict = net.state_dict()
    # # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth
    # pretrain_model = torch.load("googlenet.pth")
    # del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
    #             "aux2.fc2.weight", "aux2.fc2.bias",
    #             "fc.weight", "fc.bias"]
    # pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
    # model_dict.update(pretrain_dict)
    # net.load_state_dict(model_dict)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0003)

    epochs = 30
    best_acc = 0.0
    save_path = './googleNet.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits, aux_logits2, aux_logits1 = net(images.to(device))  #将一批数据传入网络当中会得到：主输出，辅助分类器2，辅助分类器1的输出
            loss0 = loss_function(logits, labels.to(device))    #通过定义好的loss_function计算主分类器与真实标签的损失
            loss1 = loss_function(aux_logits1, labels.to(device))
            loss2 = loss_function(aux_logits2, labels.to(device))
            loss = loss0 + loss1 * 0.3 + loss2 * 0.3     #将三个损失相加，0.3指的是按照0.3的权重加入损失中（论文中定义的）
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()     #测试的时候不去管辅助分类器的结果
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))  # eval model only have last output layer
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()

预测脚本

predict

import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import GoogLeNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    # create model。因为构建模型的时候，已经把辅助分类器的参数保存了，预测的时候不需要辅助函数，设置为false
    model = GoogLeNet(num_classes=5, aux_logits=False).to(device)

    # load model weights
    weights_path = "./googleNet.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device),
                                                          strict=False) #辅助分类器的参数也保存在权重当中了，所以strict=False

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()