VGG笔记

1.CNN感受野

经过卷积操作后的输出只与部分输入有关,反向推回去的区域即为感受野。

out=(9-3+2*0)/2+1=4

out=(4-2+2*0)/2+1=2

使用多个小的卷积核所需要的参数更少。

感受野的介绍

2.网格构建

2.1 vgg网络参数

vgg各个版本的基本参数一样,只是卷积核个数不一样。

cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

2.2 定义特征提取网络

def make_features(cfg: list):
    layers = []
    #三通道彩色图像
    in_channels = 3 
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(True)]
            in_channels = v         #经过卷积后。通道数发生改变
    #*layers即将layers中的一个个元素提出来
    return nn.Sequential(*layers)

2.3 定义分类网络

初始化模型,并定义全连接层

class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=False):
        super(VGG, self).__init__()
        self.features = features               #提取特征网络结构
       
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),                 #随机失活
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            #最后一层用softmax,故不用relu
            nn.Linear(4096, num_classes)       #最后输出的分类数
        )
        if init_weights: # 是否初始化权重参数
            self._initialize_weights()

正向传播

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

初始化权重,用xavier_uniform的方法

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

3.实例化模型

# 进行实例化
# **kwargs是将一个可变的关键字参数的字典传给函数实参
def vgg(model_name="vgg16", **kwargs):  # **kwargs关键字参数
    assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)    #assert 断言
    cfg = cfgs[model_name]

    model = VGG(make_features(cfg), **kwargs)
    return model

4.训练

def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
    train_path = "zhang/train"
    val_path = "zhang/val"

    train_data = datasets.ImageFolder(root=train_path,transform=data_transform["train"])
    val_data = datasets.ImageFolder(root=val_path,transform = data_transform["val"])
    train_dataset = DataLoader(train_data,batch_size=32,shuffle=True)
    val_dataset = DataLoader(val_data, batch_size=32, shuffle=True)
    val_num = len(val_data)


    flower_list = train_data.class_to_idx
    class_dict = dict((val, key) for key, val in flower_list.items())  # 将 key 和val 反过来
    json_str = json.dumps(class_dict, indent=4)  # indent=4换行,更加美观
    with open("class_indices.json", "w", encoding="utf-8") as f:
        f.write(json_str)


#开始训练
    net = vgg("vgg16",num_classes=5,init_weights=True)
    net.to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0002)
    best_acc = 0.0  # 用于后面设置准确率

    for epoch in range(10):
        print(f"----------第{epoch + 1}轮训练开始--------")
        # train dropout使用后需要.train .eval
        net.train()
        for i, (images, labels) in enumerate(train_dataset):
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            # 优化模型
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # val
        net.eval()
        accuracy = 0.0
        with torch.no_grad():
            for data in val_dataset:
                test_Img, target = data
                output = net(test_Img.to(device))
                # dim=1表示的是行
                predict_y = torch.argmax(output, dim=1)
                accuracy += (predict_y == target.to(device)).sum().item()
            acc_rate = accuracy / val_num
            # 选取训练效果最好的模型保存
            if acc_rate > best_acc:
                best_acc = acc_rate
                torch.save(net.state_dict(), './VGG.pth')
            print(acc_rate)

if __name__ == '__main__':
    main()

5.predict

与AlexNet大同小异

import json
import torch
from PIL import Image
from torchvision import transforms
from model import vgg

data_transform = transforms.Compose([
            transforms.Resize((224,244)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ])
img_path = "./th.jpg"
img = Image.open(img_path)
img = data_transform(img)
#扩充维度,否则图片是三维的
img = torch.unsqueeze(img,dim = 0)
try:
    json_file = open("class_indices.json","r")
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)
#初始化网络
model =  vgg("vgg16",num_classes=5,init_weights=False)
model_path = "VGG.pth"
model.load_state_dict(torch.load(model_path))
model.eval()
with torch.no_grad():
    output = torch.squeeze(model(img))          #压缩掉第一个维度
    predict = torch.softmax(output,dim=0)       #得到概率
    predict_cla = torch.argmax(predict).numpy()

print(class_indict[str(predict_cla)],predict[predict_cla].item())

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
要实现VGG16的代码,可以参考引用中的笔记本文件VGG16 Train.ipynb。该笔记本文件是用于培训和测试VGG16的实现的。您可以在其中找到有关VGGNet的进一步说明和代码注释。此外,如果您想简单实现VGGNet并使用TensorFlow,您还可以参考引用中提到的《Tensorflow实战》和《Tensorflow 实战Google深度学习框架》这两本书。在这些资源中,您可以学习有关TensorFlow的基础知识,并了解如何从数据集制作到训练和测试VGG16模型的过程。同时,根据引用中的描述,VGG16是VGGNet的一种配置,具体的实现代码可以在上述资源中找到。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [VGG16-Tensorflow:VGG16的Tensorflow实现](https://download.csdn.net/download/weixin_42139871/18333565)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 33.333333333333336%"] - *2* [VGG16_TF:简单实现VGG16](https://download.csdn.net/download/weixin_42116705/18214475)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 33.333333333333336%"] - *3* [VGG16—tensorflow2.0实战(cifar10数据集)](https://blog.csdn.net/weixin_44690935/article/details/105871109)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 33.333333333333336%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值