VGG pytorch代码

VGG 模型

# python.model
import torch.nn as nn
import torch

# official pretrain weights
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}


class VGG(nn.Module):   # VGG继承nn.Module这个父类
    def __init__(self, features, num_classes=1000, init_weights=False):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
        	nn.Dropout(p=0.5),   # 目的减少过拟合,以50%的概率来随机失活神经元,一般添加在全连接层前
            nn.Linear(512*7*7, 4096),  
            nn.ReLU(True),
            nn.Dropout(p=0.5),  
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
        )
        if init_weights:   # 是否对网络的参数进行初始化
            self._initialize_weights()

    def forward(self, x):   
        # N x 3 x 224 x 224
        x = self.features(x)
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)  # 使用哪个维度进行展平处理,0维是batch维度 (1,521,7,7)
        # N x 512*7*7
        x = self.classifier(x)	
        return x

    def _initialize_weights(self):  # 初始化权重函数
        for m in self.modules():	# 遍历每一个子模块(层)
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)  # 利用init.xavier_uniform_函数去初始化卷积核权重
                if m.bias is not None:				# 如果采用偏置,则默认初始化偏置参数为0
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_features(cfg: list):  # 配置变量传入
    layers = []        # 用于存放每层结构
    in_channels = 3    
    for v in cfg:		#利用for循环来遍历配置列表
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)  # 这里的v是列表中卷积通道数  VGG中conv stride=1 padding=1
            layers += [conv2d, nn.ReLU(True)] 
            in_channels = v   # 这里自动使上一层的输出channels=v,送到下一层的输入
    return nn.Sequential(*layers)  # 利用非关键字传入的方式传入

# 通道个数,传入模型时使用响应的配置列表即可
cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg(model_name="vgg16", **kwargs):
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), **kwargs)
    return model

VGG模型训练

import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import vgg


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
    # 对训练集进行初始化操作,将训练集设置成我们需要的大小
        "train": transforms.Compose([transforms.RandomResizedCrop(224), 	# 随机的裁剪
                                     transforms.RandomHorizontalFlip(),		# 随机的水平翻转
                                     transforms.ToTensor(),					# 转换成tensor格式 
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),  # 对它进行标准化处理,如果使用迁移学习的方式,就需要减去[123.68,116.78,103.94],原因是预训练的数据集是基于imagenet数据集进行的操作训练
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)  # 在window系统中无法设置,只能为0,线程个数

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    model_name = "vgg16"
    net = vgg(model_name=model_name, num_classes=5, init_weights=True)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 30
    best_acc = 0.0
    save_path = './{}Net.pth'.format(model_name)
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()

单点调试

  1. 设置断点
  2. 查看layers变量 可以看到每一层的结构
VGG是一种经典的卷积神经网络模型,它由Karen Simonyan和Andrew Zisserman在2014年提出。它的全称是Visual Geometry Group,因此被称为VGGVGG模型主要用于图像分类任务。 在PyTorch中,可以使用torchvision库来加载和使用预训练的VGG模型。下面是一个加载VGG模型并进行图像分类的示例代码: ```python import torch import torch.nn as nn import torchvision.models as models import torchvision.transforms as transforms from PIL import Image # 加载预训练的VGG模型 vgg = models.vgg16(pretrained=True) # 设置为评估模式 vgg.eval() # 图像预处理 preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # 加载并预处理图像 image = Image.open('image.jpg') input_tensor = preprocess(image) input_batch = input_tensor.unsqueeze(0) # 使用VGG模型进行预测 with torch.no_grad(): output = vgg(input_batch) # 加载标签文件 with open('imagenet_classes.txt') as f: labels = f.readlines() # 预测结果 _, predicted_idx = torch.max(output, 1) predicted_label = labels[predicted_idx.item()] print(predicted_label) ``` 上述代码中,首先通过`models.vgg16(pretrained=True)`来加载预训练的VGG模型,然后使用`transforms`模块对输入图像进行预处理,接着将预处理后的图像输入到VGG模型中进行推理,最后根据输出结果进行分类预测。 需要注意的是,上述代码中的`image.jpg`是待分类的图像文件,`imagenet_classes.txt`是包含ImageNet数据集标签的文件。你可以将自己的图像和标签文件进行替换。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值