Pytorch:实现VGG16做汽车分类

github地址:https://github.com/SPECTRELWF/pytorch-cnn-study

网络介绍:

image.png

VGG是牛津大学Visual Geometry Group小组在ICLR2015的一篇文章,得益于Alexet网络在卷积层的深度上的工作带来的好处,VGG网络也在卷积层的数量上做工作。
与Alexnet相比,VGG使用更小的卷积核去替代Alexnet中的卷积,实验证明,两个33的卷积核可以替代55的卷积核,3个33的卷积核可以替代一个77的卷积核,但更小的卷积核大小的参数会更少。
image.png
本次实现的就是上图中D的那个网络,总共16层,其中13个卷积层,3个全连接层,每个卷积块之后都接了MAXPOOLING。

数据集介绍:

数据集是来自网络上的一个公开数据集,做了简单处理一些处理,其中包含十类汽车,包括BUS,SUV,TAXI等分类。
数据集下载地址:链接: https://pan.baidu.com/s/1x9Zxu8e9Sr0_9GucwgMNhQ 密码: mqw6

网络结构

# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/5 下午4:16
import torch
import torch.nn as nn
import torchvision
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2,stride=2),
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(256, 512,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.block5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.AvgPool2d(kernel_size=1, stride=1),
        )
        self.fc1 = nn.Sequential(
            nn.Linear(512 * 7 * 7,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
        )
        self.fc3 = nn.Sequential(
            nn.Linear(1024, 10),
            # nn.ReLU(inplace=True),
            # nn.Softmax(),
        )
    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        # print(x.shape)
        x = x.view(-1, 512*7*7)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return x

Train

我在训练过程中发现,一开始只设置了30个EPOCH,网络很难收敛,得到的效果特别差,一直以为是结构上出了问题,后面把epoch设置多一点就慢慢收敛了。

# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/5 下午4:37

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms as transforms
import torch.optim as optim
from dataload.car_dataload import CAR_DATASET
from vgg16 import VGG16
import torch.nn as nn
from utils import plot_curve

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 200
batch_size = 32
lr = 0.01
transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])
train_dataset = CAR_DATASET(r'dataset/train', transform=transform)
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
)


model = VGG16().to(device)
opt = optim.SGD(model.parameters(),lr=lr,momentum=0.9)
cri = nn.CrossEntropyLoss()

train_loss = []
for epoch in range(epochs):
    sum_loss = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)
        pred = model(x)

        opt.zero_grad()

        loss = cri(pred, y)
        loss.backward()
        opt.step()
        train_loss.append(loss.item())

        print('[epoch : %d  ,batch : %d  ,loss : %.3f]' %(epoch,batch_idx,loss.item()))
    torch.save(model.state_dict(), 'model/new/epoch'+str(epoch)+'.pth')
plot_curve(train_loss)

test

# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/4 下午1:29

import torch
import torchvision
from dataload.car_dataload import CAR_DATASET
# 定义使用GPU
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torchvision.transforms as transforms
from vgg16 import VGG16
transform = transforms.Compose([
    transforms.Resize([224,224]),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5]),
    ])

test_dataset = CAR_DATASET('dataset/test',transform=transform)
test_loader = DataLoader(test_dataset,
                         batch_size = 32,
                         shuffle = False,
                         )




def predict():
    net = VGG16().to(device)
    net.load_state_dict(torch.load('model/new/epoch75.pth'))
    print(net)
    total_correct = 0
    for batch_idx, (x, y) in enumerate(test_loader):
        # x = x.view(x.size(0),28*28)
        # x = x.view(256,28,28)
        x = x.to(device)
        # print(x.shape)
        y = y.to(device)
        print('y',y)
        out = net(x)
        # print(out)
        pred = out.argmax(dim=1)
        print('pred',pred)
        correct = pred.eq(y).sum().float().item()
        total_correct += correct
    total_num = len(test_loader.dataset)

    acc = total_correct / total_num
    print("test acc:", acc)


predict()


仅使用测试准确率作为简单的评价。

  • 0
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值