【pytorch实现VGG网络的构建】
1. 构建vgg_block函数
定义vgg_block函数,在这里是定义了VGG网络的基础模块.
def vgg_block(num_convs,in_channels,out_channels):
blk = []
for i in range(num_convs):
if i == 0:
blk.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
else:
blk.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
blk.append(nn.ReLU())
blk.append(nn.MaxPool2d(kernel_size=2,stride=2)) #最大池化操作,将高宽减半
return nn.Sequential(*blk)
提示:以下是本篇文章正文内容,下面案例可供参考
2. 定义VGG网络
在第一步中我们构建了vgg_block函数,这个函数是实现了VGG网络中,卷积层的堆叠,便于网络看上去更加清晰。堆叠的卷积层,我们一眼就能看明白。
def VGG(conv_arch,fc_features,fc_hidden_neurons=4096):
net = nn.Sequential()
#卷积层
for i,(num_convs,in_channels,out_channels) in enumerate(conv_arch):
net.add_module("vgg_block" + str(i+1),vgg_block(num_convs,in_channels,out_channels))
#全连接层
net.add_module(
"fc",nn.Sequential(nn.Flatten(),
nn.Linear(fc_features,fc_hidden_neurons),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_hidden_neurons,fc_hidden_neurons),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_hidden_neurons,10)
)
)
return net
2.1 初始化VGG网络中特征提取的参数
在《动手学深度学习》这本书中,conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512)) 最开始是这样的定义的,但是由于计算过于复杂,所以就变成了下面代码中的small_conv_arch.
fc_features = 512 * 7 * 7 # c * w * h
fc_hidden_neurons = 4096 # 任意
#因为VGG-11计算上比AlexNet更加复杂,出于测试的目的我们构造一个通道数更小,或者说更窄的网络在Fashion-MNIST数据集上进行训练
ratio = 8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio),
(2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]
net = VGG(small_conv_arch, fc_features // ratio, fc_hidden_neurons // ratio)
3. 获取数据集
不同于之前博客中写的获取数据集函数,VGG网络中获取数据集后,对数据集进行了水平旋转和裁剪高斯归一化处理,提高网络的训练中的精确率。
def gain_datasets(batch_size):
data_path = '../../../Datasets'
data_tf = transforms.Compose([
transforms.Resize(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(0, 0.01)
])
mnist_train = mnist.FashionMNIST(data_path,train=True,transform=data_tf,download=True)
mnist_test = mnist.FashionMNIST(data_path,train=False,transform=data_tf,download=True)
train_iter = data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=4)
test_iter = data.DataLoader(mnist_test,batch_size=128,shuffle=True,num_workers=4)
return train_iter,test_iter
4. 其他步骤
构建网络的其他步骤和我的之前的两个博客类似,我就不再详细介绍了,代码没有太多的改动,大家可以自行参考:
5.完整代码
VGG网络整体代码如下:
import torch
from torch import nn
import torch.utils.data as data
from torchvision.datasets import mnist
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
import time
import sys
sys.path.append('../..')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#使用函数vgg_block来实现VGG的基础模块
def vgg_block(num_convs,in_channels,out_channels):
blk = []
for i in range(num_convs):
if i == 0:
blk.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
else:
blk.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
blk.append(nn.ReLU())
blk.append(nn.MaxPool2d(kernel_size=2,stride=2)) #最大池化操作,将高宽减半
return nn.Sequential(*blk)
#定义网络VGG
def VGG(conv_arch,fc_features,fc_hidden_neurons=4096):
net = nn.Sequential()
#卷积层
for i,(num_convs,in_channels,out_channels) in enumerate(conv_arch):
net.add_module("vgg_block" + str(i+1),vgg_block(num_convs,in_channels,out_channels))
#全连接层
net.add_module(
"fc",nn.Sequential(nn.Flatten(),
nn.Linear(fc_features,fc_hidden_neurons),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_hidden_neurons,fc_hidden_neurons),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_hidden_neurons,10)
)
)
return net
fc_features = 512 * 7 * 7 # c * w * h
fc_hidden_neurons = 4096 # 任意
#因为VGG-11计算上比AlexNet更加复杂,出于测试的目的我们构造一个通道数更小,或者说更窄的网络在Fashion-MNIST数据集上进行训练
ratio = 8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio),
(2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]
net = VGG(small_conv_arch, fc_features // ratio, fc_hidden_neurons // ratio)
#获取数据集
def gain_datasets(batch_size):
data_path = '../../../Datasets'
data_tf = transforms.Compose([
transforms.Resize(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(0, 0.01)
])
mnist_train = mnist.FashionMNIST(data_path,train=True,transform=data_tf,download=True)
mnist_test = mnist.FashionMNIST(data_path,train=False,transform=data_tf,download=True)
train_iter = data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=4)
test_iter = data.DataLoader(mnist_test,batch_size=128,shuffle=True,num_workers=4)
return train_iter,test_iter
batch_size = 256
train_iter,test_iter = gain_datasets(batch_size)
def evaluate_accuracy(data_iter,net,device=None):
if device is None and isinstance(net,nn.Module):
#如果没有指定device就用net的device
device = list(net.parameters())[0].device
acc_sum,n = 0.0,0
with torch.no_grad():
for X,y in data_iter:
if isinstance(net,nn.Module):
net.eval() #进行模式评估,关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() #改回训练模式
else:
if('is_training' in net.__code__.co_varnames): #is_training 是一个参数
acc_sum += (net(X,is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum/n
#画图函数
def draw_function(x_vals, y_vals, x_label, y_label, y2_vals=None, y3_vals=None,legend=None):
fig, ax1 = plt.subplots()
plt.title('VGG')
ax1.plot(x_vals, y_vals, marker='o')
ax1.plot(x_vals,y2_vals,color='r',marker='o')
ax1.set_xlabel(x_label)
ax1.set_ylabel(y_label)
plt.legend(legend)
ax2 = ax1.twinx()
ax2.plot(x_vals, y3_vals, linestyle='--',color='g')
ax2.set_ylabel('Loss')
plt.show()
lr,num_epoches = 0.001,20
optimizer = torch.optim.Adam(net.parameters(),lr)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
#训练
def train_VGG(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs):
net = net.to(device)
print("training on ",device)
loss = nn.CrossEntropyLoss()
loss_list,train_list,test_list = [],[],[]
for epoch in range(num_epochs):
train_loss_sum,train_acc_sum,n,batch_count,startTime = 0.0,0.0,0,0,time.time()
for X,y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat,y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_loss_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc_sum = evaluate_accuracy(test_iter,net)
loss_list.append(train_loss_sum/n)
train_list.append(train_acc_sum/n)
test_list.append(test_acc_sum)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc_sum, time.time() - startTime))
draw_function(range(1,num_epochs+1),train_list,'epochs','Accuracy',
test_list,loss_list,['train','test','loss'],)
train_VGG(net,train_iter,test_iter,batch_size,optimizer,device,num_epoches)
6.实现效果
6.1 画图效果
6.2 迭代效果
参考内容
https://tangshusen.me/Dive-into-DL-PyTorch/#/chapter05_CNN/5.7_vgg