github地址:https://github.com/SPECTRELWF/pytorch-cnn-study
网络介绍:
VGG是牛津大学Visual Geometry Group小组在ICLR2015的一篇文章,得益于Alexet网络在卷积层的深度上的工作带来的好处,VGG网络也在卷积层的数量上做工作。
与Alexnet相比,VGG使用更小的卷积核去替代Alexnet中的卷积,实验证明,两个33的卷积核可以替代55的卷积核,3个33的卷积核可以替代一个77的卷积核,但更小的卷积核大小的参数会更少。
本次实现的就是上图中D的那个网络,总共16层,其中13个卷积层,3个全连接层,每个卷积块之后都接了MAXPOOLING。
数据集介绍:
数据集是来自网络上的一个公开数据集,做了简单处理一些处理,其中包含十类汽车,包括BUS,SUV,TAXI等分类。
数据集下载地址:链接: https://pan.baidu.com/s/1x9Zxu8e9Sr0_9GucwgMNhQ 密码: mqw6
网络结构
# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/5 下午4:16
import torch
import torch.nn as nn
import torchvision
class VGG16(nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.block1 = nn.Sequential(
nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64,64,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.block2 = nn.Sequential(
nn.Conv2d(64, 128,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2),
)
self.block3 = nn.Sequential(
nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.block4 = nn.Sequential(
nn.Conv2d(256, 512,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512,kernel_size=3,stride=1,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.block5 = nn.Sequential(
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.AvgPool2d(kernel_size=1, stride=1),
)
self.fc1 = nn.Sequential(
nn.Linear(512 * 7 * 7,4096),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
)
self.fc2 = nn.Sequential(
nn.Linear(4096, 1024),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
)
self.fc3 = nn.Sequential(
nn.Linear(1024, 10),
# nn.ReLU(inplace=True),
# nn.Softmax(),
)
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
# print(x.shape)
x = x.view(-1, 512*7*7)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
Train
我在训练过程中发现,一开始只设置了30个EPOCH,网络很难收敛,得到的效果特别差,一直以为是结构上出了问题,后面把epoch设置多一点就慢慢收敛了。
# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/5 下午4:37
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms as transforms
import torch.optim as optim
from dataload.car_dataload import CAR_DATASET
from vgg16 import VGG16
import torch.nn as nn
from utils import plot_curve
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 200
batch_size = 32
lr = 0.01
transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])
train_dataset = CAR_DATASET(r'dataset/train', transform=transform)
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
)
model = VGG16().to(device)
opt = optim.SGD(model.parameters(),lr=lr,momentum=0.9)
cri = nn.CrossEntropyLoss()
train_loss = []
for epoch in range(epochs):
sum_loss = 0
for batch_idx, (x, y) in enumerate(train_loader):
x = x.to(device)
y = y.to(device)
pred = model(x)
opt.zero_grad()
loss = cri(pred, y)
loss.backward()
opt.step()
train_loss.append(loss.item())
print('[epoch : %d ,batch : %d ,loss : %.3f]' %(epoch,batch_idx,loss.item()))
torch.save(model.state_dict(), 'model/new/epoch'+str(epoch)+'.pth')
plot_curve(train_loss)
test
# !/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:WeiFeng Liu
# @Time: 2021/11/4 下午1:29
import torch
import torchvision
from dataload.car_dataload import CAR_DATASET
# 定义使用GPU
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torchvision.transforms as transforms
from vgg16 import VGG16
transform = transforms.Compose([
transforms.Resize([224,224]),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5]),
])
test_dataset = CAR_DATASET('dataset/test',transform=transform)
test_loader = DataLoader(test_dataset,
batch_size = 32,
shuffle = False,
)
def predict():
net = VGG16().to(device)
net.load_state_dict(torch.load('model/new/epoch75.pth'))
print(net)
total_correct = 0
for batch_idx, (x, y) in enumerate(test_loader):
# x = x.view(x.size(0),28*28)
# x = x.view(256,28,28)
x = x.to(device)
# print(x.shape)
y = y.to(device)
print('y',y)
out = net(x)
# print(out)
pred = out.argmax(dim=1)
print('pred',pred)
correct = pred.eq(y).sum().float().item()
total_correct += correct
total_num = len(test_loader.dataset)
acc = total_correct / total_num
print("test acc:", acc)
predict()
仅使用测试准确率作为简单的评价。