VGG-16网络结构

在这里插入图片描述在这里插入图片描述在这里插入图片描述在这里插入图片描述
在这里插入图片描述

此图为博主手绘的VGG-16的流程图

在这里插入图片描述

模型

import torch.nn as nn
import torch


class VGG(nn.Module):#继承自nn.Module的父类
    def __init__(self, features, num_classes=1000, init_weights=False):
        #features--通过make_features()得到的提取特征网络结构,num_classes--想要得到的分类种类,init_weights--初始化权重
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(#全连接层
            nn.Dropout(p=0.5),#随机失活一部分神经元
            nn.Linear(512*7*7, 2048),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(True),
            nn.Linear(2048, num_classes)
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.features(x)#特征提取结构
        # N x 512 x 7 x 7
        x = torch.flatten(x, start_dim=1)#数据展平成一维数据,[b,c,h,w],start_dim=0是batch维度,start_dim=是channel维度
        # N x 512*7*7
        x = self.classifier(x)#分类结构
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                # nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

#提取特征网络结构
def make_features(cfg: list):#传入一个list类型的配置变量 ,传入定义配置的列表
    layers = []#定义一个空列表来存放我们创建的每一层结构
    in_channels = 3#输入通道是3
    for v in cfg:#由for循环遍历我们的配置列表就可以得到一个有卷积操作和池化操作做组成一个列表
        if v == "M":#如果是一个最大池化层的话
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]#创建一个最大池化下采样2d 池化核2*2,步距为2
        else:#否则就是一个卷积核
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
# 创建一个卷积层  输入的彩色图像的深度是3 输出的特征矩阵的深度也就是对应着我们卷积核的个数,卷积核大小是3*3 padding=1     stride默认是等于1
            layers += [conv2d, nn.ReLU(True)]
# 每一个卷积层都是采用ReLU激活函数 定义在一起 并添加到layers的空列表中
            in_channels = v
# 将in channels改变 使得上一层的输出变成下一层的输入
    return nn.Sequential(*layers)#将我们的列表通过非关键字参数的形式传入 然后合并成一个整体


cfgs = {#定义一个字典,每一个key都代表一个模型配置文件,64-卷积核个数。'M'-最大池化
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
        }

#实例化我们的VGG网络
def vgg(model_name="vgg16", **kwargs):#model_name可以实例化我们所给定的网络模型
    try:
        cfg = cfgs[model_name]#得到我们的配置列表
    except:
        print("Warning: model number {} not in cfgs dict!".format(model_name))
        exit(-1)
    model = VGG(make_features(cfg), **kwargs)#通过VGG的类来实例化我们VGG网络
    # 第一个参数 make_features是一个函数 feature的参数是所对应的配置文件, **kwargs可变长度的字典变量,包括了class_num=1000.init_weights=False.
    return model

训练

import os
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm

from model import vgg


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)
    print("using {} images for training, {} images fot validation.".format(train_num,
                                                                           val_num))

    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()

    model_name = "vgg16"
    net = vgg(model_name=model_name, num_classes=5, init_weights=True)
    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 30
    best_acc = 0.0
    save_path = './{}Net.pth'.format(model_name)
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, colour='green')
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()

预测

import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import vgg


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)
    
    # create model
    model = vgg(model_name="vgg16", num_classes=5).to(device)
    # load model weights
    weights_path = "./vgg16Net.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    print(print_res)
    plt.show()


if __name__ == '__main__':
    main()

  • 5
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是用MATLAB搭建VGG-16网络结构的示例代码: ```matlab layers = [ imageInputLayer([224 224 3]) convolution2dLayer(3,64,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,64,'Padding',[1 1 1 1]) reluLayer() maxPooling2dLayer(2,'Stride',2) convolution2dLayer(3,128,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,128,'Padding',[1 1 1 1]) reluLayer() maxPooling2dLayer(2,'Stride',2) convolution2dLayer(3,256,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,256,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,256,'Padding',[1 1 1 1]) reluLayer() maxPooling2dLayer(2,'Stride',2) convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() maxPooling2dLayer(2,'Stride',2) convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() convolution2dLayer(3,512,'Padding',[1 1 1 1]) reluLayer() maxPooling2dLayer(2,'Stride',2) fullyConnectedLayer(4096) reluLayer() dropoutLayer(0.5) fullyConnectedLayer(4096) reluLayer() dropoutLayer(0.5) fullyConnectedLayer(1000) softmaxLayer() classificationLayer()]; % 将网络结构保存为一个变量 vgg16 = layerGraph(layers); ``` 这个代码定义了一个由 16 个卷积层和 3 个全连接层组成的VGG-16网络结构。由于该网络是在ImageNet数据集上进行训练的,因此最后一层是一个具有1000个类别的softmax分类器。 如果你需要其他网络结构的代码,可以在MATLAB的文档中查找相应的示例代码。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值