基于pytorch的VGG网络实现

最新推荐文章于 2024-05-24 21:37:22 发布

尘小风

最新推荐文章于 2024-05-24 21:37:22 发布

阅读量347

点赞数

文章标签： pytorch 人工智能 python 深度学习分类网络

本文链接：https://blog.csdn.net/Qcherry/article/details/131561116

版权

B站UP：霹雳吧啦Wz

课程合集链接：1.1 卷积神经网络基础_哔哩哔哩_bilibili

代码参考B站UP：霹雳吧啦Wz的《深度学习-图像分类篇章》视频，代码根据个人编程习惯及本人自用数据集进行少量改动，欢迎大佬们批评指正。

1 网络结构

2 代码实现

2.1 模型搭建

import torchvision.models.vgg # 按住ctrl点击model.vgg即可跳转官方实现源码

import torch
from typing import cast

# 官方预训练权重
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}


class VGG(torch.nn.Module):
    # features是特征提取卷积层的网络结构，通过下面定义的make_features进行生成
    # init_weights表示是否对权重进行初始化
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(512*7*7, 4096), # 原网络结构应该是4096，这里为了减少计算量取2048
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, num_classes)
        )
        if init_weights: # 如果上面的init_weights设置成True，则通过下面定义的_initialize_weights方法进行初始化
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = self.avgpool(x)
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self): # 初始化方法
        for m in self.modules(): # 遍历每一层网络结构
            if isinstance(m, torch.nn.Conv2d): # 如果是卷积层
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # (何)恺明初始化方法
                #torch.nn.init.xavier_uniform_(m.weight)
                if m.bias is not None: # 如果有偏置参数
                    torch.nn.init.constant_(m.bias, 0) # 把偏置参数初始化为0
            elif isinstance(m, torch.nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, torch.nn.Linear): # 如果是全连接层
                #torch.nn.init.xavier_uniform_(m.weight)
                torch.nn.init.normal_(m.weight, 0, 0.01)
                torch.nn.init.constant_(m.bias, 0) # 把偏置参数初始化为0


def make_layers(cfg: list, batch_norm=False): # 传入的是列表形式的参数
    layers = [] # 存放网络层级
    in_channels = 3
    for v in cfg:
        if v == "M": # 如果是最大池化层
            layers += [torch.nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            v = cast(int, v)
            conv2d = torch.nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, torch.nn.BatchNorm2d(v), torch.nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, torch.nn.ReLU(inplace=True)]
            in_channels = v  # 下一层卷积层的输入是上一层的输出

    return torch.nn.Sequential(*layers) # 之所以layers前面加*是表示采用非关键字参数传入，具体可以看OneNote笔记torch.nn.Sequential()部分对两种方法的解释


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # VGG11层网络结构
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # # VGG13层网络结构
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], # VGG16层网络结构
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # VGG19层网络结构
}


def vgg(model_name, batch_norm, **kwargs): # **kwargs代表传入可变长度的字典变量
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]
    model = VGG(make_layers(cfg, batch_norm=batch_norm), **kwargs)
    return model

2.2 模型训练

import os
import json
import sys
import torch
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

from VGGnet_model_choice import vgg
from utils import train_one_epoch, evaluate

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 如果检测到GPU设备使用cuda:0，如果没有GPU使用cpu
print("using {} device.".format(device)) # 输出使用的设备


if os.path.exists("./dataset2_SGD0.01_BNweights") is False:
    os.makedirs("./dataset2_SGD0.01_BNweights")  # 创建名为"SGD0.0005_weights"的文件夹，用于存储每个epoch的训练权重

tb_writer = SummaryWriter("dataset2_SGD0.01_BNruns")

batch_size = 64
# 数据预处理
 # 对训练集的预处理多了随机裁剪和水平翻转这两个步骤,可以起到扩充数据集的作用，增强模型泛化能力
# 详细解释见：https://blog.csdn.net/see_you_yu/article/details/106722787
transform = {
    "train": transforms.Compose([transforms.RandomResizedCrop(224), # 随机裁剪
                                transforms.RandomHorizontalFlip(), # 随机水平翻转
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]),
    "val": transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])}


# 导入、加载训练集
# 获取图像数据集的路径
# data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
'''
os.getcwd()作用是：获取当前这个py文件的工作路径字符串；
os.path.join作用是：将前后的路径字符串连接起来，并用正确格式的路径分隔符
这里../代表返回上一层目录，../..即代表返回上上层目录，由于教学视频当中Alexnet_model.py文件的上上层目录和dataset文件夹同级目录，所以要返回上上级目录，
但是在我使用时候，Alexnet_model的上级目录和dataset文件夹同级，所以只需要取../
os.path.abspath作用是：取绝对目录
'''
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
image_path = data_root + "/dataset/SIPaKMeD/"
assert os.path.exists(image_path), "{} path does not exist.".format(image_path) # 如果找不到image_path目录就返回这个路径不存在


# 导入训练集并进行预处理
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train2"), transform=transform["train"])
train_num = len(train_dataset)
# 按batch_size分批次加载训练集
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# 导入测试集并进行预处理
val_dataset = datasets.ImageFolder(root=image_path + "/val2", transform=transform["val"])
val_num = len(val_dataset)
# 加载测试集
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)



# 存储 索引：标签 的字典
# 为了方便在predict时读取信息，将 索引：标签 存入到一个json文件中
# 字典，类别：索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 将 flower_list 中的 key 和 index 调换位置
cla_dict = dict((index, key) for key, index in flower_list.items())

# 将 cla_dict 写入 json 文件中
json_str = json.dumps(cla_dict, indent=4) # 段前空4格的格式写入json文件
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)


print("using {} images for training, {} images for Validation.".format(train_num, val_num))

# 迁移学习
model_name = "vgg16"
model = vgg(model_name=model_name, batch_norm=True).to(device)  # 不可以直接在这里输入num_classes=5，因为预训练是进行1000分类的，需要在修改对全连接层进行修改
#print(model.modules)
model_weight_path = "./vgg16_bn-6c64b313.pth"  # 预训练权重的地址
# 当strict=True,要求预训练权重层数的键值与新构建的模型中的权重层数名称完全吻合；如果新构建的模型在层数上进行了部分微调，则上述代码就会报错：说key对应不上。
# 如果strict=False 就能够完美的解决这个问题。也即，与训练权重中与新构建网络中匹配层的键值就进行使用，没有的就默认初始化。
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path, map_location='cpu'), strict=False)
inchannel = model.classifier[6].in_features  # 提取classifier模块最后一个全连接层中固定的参数（即4096）
model.classifier[6] = torch.nn.Linear(inchannel, 5).to(device)  # 修改分类类别为5
#print(model.modules)

# for param in model.features.parameters():
#     param.requires_grad = False

loss_function = torch.nn.CrossEntropyLoss()
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9, weight_decay=5E-5)
#optimizer = torch.optim.Adam(params, lr=0.01) # Adam优化器


#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # 学习率调整策略


# 训练过程
for epoch in range(50):
    # 训练过程
    train_loss, train_acc = train_one_epoch(model=model,
                                            optimizer=optimizer,
                                            data_loader=train_loader,
                                            device=device,
                                            epoch=epoch)


    # 测试过程
    val_loss, val_acc = evaluate(model=model,
                                 data_loader=val_loader,
                                 device=device,
                                 epoch=epoch)

    tb_writer.add_scalars('loss/', {'train': train_loss, 'val': val_loss}, epoch)
    tb_writer.add_scalars('accuracy/', {'train': train_acc, 'val': val_acc}, epoch)
    tb_writer.add_scalar('learning_rate', optimizer.param_groups[0]["lr"], epoch)

    # 保存每个epoch的训练权重，每个训练权重以model-epoch.pth的命名保存（epoch是当前epoch数值这个数字，比如epoch=0，就是model-0.pth）
    torch.save(model.state_dict(), "./dataset2_SGD0.01_BNweights/vgg16model-{}.pth".format(epoch))

2.3 模型预测

import os
import torch
from PIL import Image
from torchvision import transforms
from VGGnet_model_choice import vgg


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 图像预处理
transform = transforms.Compose([transforms.Resize(256),
                               transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])


# 读取所有预测集图片的地址
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
data_path = data_root + "/dataset/SIPaKMeD/test3/"
img_path_list = [os.path.join(data_path, i) for i in os.listdir(data_path) if i.endswith(".bmp")]

# 模型实例化
model = vgg(model_name="vgg16", num_classes=5, batch_norm=True, init_weights=False).to(device)
# 导入训练得到的权重
weights_path = "vgg16model-49.pth"
model.load_state_dict(torch.load(weights_path, map_location=device))


classes = ["im_Dyskeratotic", "im_Koilocytotic", "im_Metaplastic", "im_Parabasal", "im_Superficial_Intermediate"]
model.eval()
batch_size = 8  # 每次预测时将多少张图片打包成一个batch
true_num = 0
i = 0
with torch.no_grad():
    for ids in range(len(img_path_list) // batch_size): # 双斜线 // 运算符代表向下取整除，循环次数等于batch的个数
        for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: # 循环次数等于batch的大小
            assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
            img = Image.open(img_path)
            path, img_name = os.path.split(img_path) # os.path.split可以用来分割路径和文件名
            img = transform(img)
            img = torch.unsqueeze(img, dim=0)

            output = model(img.to(device)).cpu()

            i += 1

            predict = torch.softmax(output, dim=1).numpy()  # torch.softmax返回的是一个张量形式的二维矩阵，通过.numpy()转换成数组形式的二维矩阵
            predict_cla = torch.argmax(output).item() # 把张量形式的值取出来

            if str(img_name[:-10]) == classes[predict_cla]:
                true_num += 1

print("The accuacy of prediction is {:.3f}%".format(100 * true_num / i))