基于pytorch的VGG网络实现

B站UP:霹雳吧啦Wz

课程合集链接:1.1 卷积神经网络基础_哔哩哔哩_bilibili

代码参考B站UP:霹雳吧啦Wz的《深度学习-图像分类篇章》视频,代码根据个人编程习惯及本人自用数据集进行少量改动,欢迎大佬们批评指正。

1 网络结构

 2 代码实现

2.1 模型搭建

import torchvision.models.vgg # 按住ctrl点击model.vgg即可跳转官方实现源码

import torch
from typing import cast

# 官方预训练权重
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}


class VGG(torch.nn.Module):
    # features是特征提取卷积层的网络结构,通过下面定义的make_features进行生成
    # init_weights表示是否对权重进行初始化
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(512*7*7, 4096), # 原网络结构应该是4096,这里为了减少计算量取2048
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, num_classes)
        )
        if init_weights: # 如果上面的init_weights设置成True,则通过下面定义的_initialize_weights方法进行初始化
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = self.avgpool(x)
        x = torch.flatten(x, start_dim=1)
        # N x 512*7*7
        x = self.classifier(x)
        return x

    def _initialize_weights(self): # 初始化方法
        for m in self.modules(): # 遍历每一层网络结构
            if isinstance(m, torch.nn.Conv2d): # 如果是卷积层
                torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # (何)恺明初始化方法
                #torch.nn.init.xavier_uniform_(m.weight)
                if m.bias is not None: # 如果有偏置参数
                    torch.nn.init.constant_(m.bias, 0) # 把偏置参数初始化为0
            elif isinstance(m, torch.nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, torch.nn.Linear): # 如果是全连接层
                #torch.nn.init.xavier_uniform_(m.weight)
                torch.nn.init.normal_(m.weight, 0, 0.01)
                torch.nn.init.constant_(m.bias, 0) # 把偏置参数初始化为0


def make_layers(cfg: list, batch_norm=False): # 传入的是列表形式的参数
    layers = [] # 存放网络层级
    in_channels = 3
    for v in cfg:
        if v == "M": # 如果是最大池化层
            layers += [torch.nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            v = cast(int, v)
            conv2d = torch.nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, torch.nn.BatchNorm2d(v), torch.nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, torch.nn.ReLU(inplace=True)]
            in_channels = v  # 下一层卷积层的输入是上一层的输出

    return torch.nn.Sequential(*layers) # 之所以layers前面加*是表示采用非关键字参数传入,具体可以看OneNote笔记torch.nn.Sequential()部分对两种方法的解释


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # VGG11层网络结构
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # # VGG13层网络结构
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], # VGG16层网络结构
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # VGG19层网络结构
}


def vgg(model_name, batch_norm, **kwargs): # **kwargs代表传入可变长度的字典变量
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]
    model = VGG(make_layers(cfg, batch_norm=batch_norm), **kwargs)
    return model

2.2 模型训练

import os
import json
import sys
import torch
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

from VGGnet_model_choice import vgg
from utils import train_one_epoch, evaluate

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 如果检测到GPU设备使用cuda:0,如果没有GPU使用cpu
print("using {} device.".format(device)) # 输出使用的设备


if os.path.exists("./dataset2_SGD0.01_BNweights") is False:
    os.makedirs("./dataset2_SGD0.01_BNweights")  # 创建名为"SGD0.0005_weights"的文件夹,用于存储每个epoch的训练权重

tb_writer = SummaryWriter("dataset2_SGD0.01_BNruns")

batch_size = 64
# 数据预处理
 # 对训练集的预处理多了随机裁剪和水平翻转这两个步骤,可以起到扩充数据集的作用,增强模型泛化能力
# 详细解释见:https://blog.csdn.net/see_you_yu/article/details/106722787
transform = {
    "train": transforms.Compose([transforms.RandomResizedCrop(224), # 随机裁剪
                                transforms.RandomHorizontalFlip(), # 随机水平翻转
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]),
    "val": transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])}


# 导入、加载训练集
# 获取图像数据集的路径
# data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
'''
os.getcwd()作用是:获取当前这个py文件的工作路径字符串;
os.path.join作用是:将前后的路径字符串连接起来,并用正确格式的路径分隔符
这里../代表返回上一层目录,../..即代表返回上上层目录,由于教学视频当中Alexnet_model.py文件的上上层目录和dataset文件夹同级目录,所以要返回上上级目录,
但是在我使用时候,Alexnet_model的上级目录和dataset文件夹同级,所以只需要取../
os.path.abspath作用是:取绝对目录
'''
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
image_path = data_root + "/dataset/SIPaKMeD/"
assert os.path.exists(image_path), "{} path does not exist.".format(image_path) # 如果找不到image_path目录就返回这个路径不存在


# 导入训练集并进行预处理
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train2"), transform=transform["train"])
train_num = len(train_dataset)
# 按batch_size分批次加载训练集
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# 导入测试集并进行预处理
val_dataset = datasets.ImageFolder(root=image_path + "/val2", transform=transform["val"])
val_num = len(val_dataset)
# 加载测试集
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)



# 存储 索引:标签 的字典
# 为了方便在predict时读取信息,将 索引:标签 存入到一个json文件中
# 字典,类别:索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 将 flower_list 中的 key 和 index 调换位置
cla_dict = dict((index, key) for key, index in flower_list.items())

# 将 cla_dict 写入 json 文件中
json_str = json.dumps(cla_dict, indent=4) # 段前空4格的格式写入json文件
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)


print("using {} images for training, {} images for Validation.".format(train_num, val_num))

# 迁移学习
model_name = "vgg16"
model = vgg(model_name=model_name, batch_norm=True).to(device)  # 不可以直接在这里输入num_classes=5,因为预训练是进行1000分类的,需要在修改对全连接层进行修改
#print(model.modules)
model_weight_path = "./vgg16_bn-6c64b313.pth"  # 预训练权重的地址
# 当strict=True,要求预训练权重层数的键值与新构建的模型中的权重层数名称完全吻合;如果新构建的模型在层数上进行了部分微调,则上述代码就会报错:说key对应不上。
# 如果strict=False 就能够完美的解决这个问题。也即,与训练权重中与新构建网络中匹配层的键值就进行使用,没有的就默认初始化。
missing_keys, unexpected_keys = model.load_state_dict(torch.load(model_weight_path, map_location='cpu'), strict=False)
inchannel = model.classifier[6].in_features  # 提取classifier模块最后一个全连接层中固定的参数(即4096)
model.classifier[6] = torch.nn.Linear(inchannel, 5).to(device)  # 修改分类类别为5
#print(model.modules)

# for param in model.features.parameters():
#     param.requires_grad = False

loss_function = torch.nn.CrossEntropyLoss()
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9, weight_decay=5E-5)
#optimizer = torch.optim.Adam(params, lr=0.01) # Adam优化器


#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # 学习率调整策略


# 训练过程
for epoch in range(50):
    # 训练过程
    train_loss, train_acc = train_one_epoch(model=model,
                                            optimizer=optimizer,
                                            data_loader=train_loader,
                                            device=device,
                                            epoch=epoch)


    # 测试过程
    val_loss, val_acc = evaluate(model=model,
                                 data_loader=val_loader,
                                 device=device,
                                 epoch=epoch)

    tb_writer.add_scalars('loss/', {'train': train_loss, 'val': val_loss}, epoch)
    tb_writer.add_scalars('accuracy/', {'train': train_acc, 'val': val_acc}, epoch)
    tb_writer.add_scalar('learning_rate', optimizer.param_groups[0]["lr"], epoch)

    # 保存每个epoch的训练权重,每个训练权重以model-epoch.pth的命名保存(epoch是当前epoch数值这个数字,比如epoch=0,就是model-0.pth)
    torch.save(model.state_dict(), "./dataset2_SGD0.01_BNweights/vgg16model-{}.pth".format(epoch))

2.3 模型预测

import os
import torch
from PIL import Image
from torchvision import transforms
from VGGnet_model_choice import vgg


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 图像预处理
transform = transforms.Compose([transforms.Resize(256),
                               transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])


# 读取所有预测集图片的地址
data_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
data_path = data_root + "/dataset/SIPaKMeD/test3/"
img_path_list = [os.path.join(data_path, i) for i in os.listdir(data_path) if i.endswith(".bmp")]

# 模型实例化
model = vgg(model_name="vgg16", num_classes=5, batch_norm=True, init_weights=False).to(device)
# 导入训练得到的权重
weights_path = "vgg16model-49.pth"
model.load_state_dict(torch.load(weights_path, map_location=device))


classes = ["im_Dyskeratotic", "im_Koilocytotic", "im_Metaplastic", "im_Parabasal", "im_Superficial_Intermediate"]
model.eval()
batch_size = 8  # 每次预测时将多少张图片打包成一个batch
true_num = 0
i = 0
with torch.no_grad():
    for ids in range(len(img_path_list) // batch_size): # 双斜线 // 运算符代表向下取整除,循环次数等于batch的个数
        for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]: # 循环次数等于batch的大小
            assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
            img = Image.open(img_path)
            path, img_name = os.path.split(img_path) # os.path.split可以用来分割路径和文件名
            img = transform(img)
            img = torch.unsqueeze(img, dim=0)

            output = model(img.to(device)).cpu()

            i += 1

            predict = torch.softmax(output, dim=1).numpy()  # torch.softmax返回的是一个张量形式的二维矩阵,通过.numpy()转换成数组形式的二维矩阵
            predict_cla = torch.argmax(output).item() # 把张量形式的值取出来

            if str(img_name[:-10]) == classes[predict_cla]:
                true_num += 1

print("The accuacy of prediction is {:.3f}%".format(100 * true_num / i))

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
PyTorch VGG是基于深度卷积神经网络的图像分类模型,它是由Simonyan和Zisserman提出的。这个模型使用了非常小的3x3卷积核和最大池化层,其深度和卷积层数也相对较浅。在网络的顶部,使用了几个全连接层来进行分类。 以下是PyTorch VGG代码的实现步骤: 1.导入PyTorch库和VGG网络模型: ```python import torch import torch.nn as nn class VGG(nn.Module): def __init__(self, features, num_classes=1000, init_weights=True): super(VGG, self).__init__() self.features = features self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, num_classes), ) if init_weights: self._initialize_weights() def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ``` 2.定义卷积块: ```python def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) ``` 3.定义不同深度的VGG模型: ```python cfgs = { 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def _vgg(arch, cfg, batch_norm, pretrained, progress): if pretrained: kwargs['init_weights'] = False model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) model.load_state_dict(state_dict) return model def vgg11(pretrained=False, progress=True, **kwargs): r"""VGG 11-layer model (configuration "A") Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ return _vgg('vgg11', 'A', False, pretrained, progress) ``` 4.使用定义好的模型进行训练或预测。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值