VGG16的特点:
VGGNet使用了更深的结构, AlexNet只有8 层网络,而VGGNet 有16 层,不在使用大的卷积核,只使用3*3卷积核和2*2的池化层
之所以使用小的滤波器,是因为层叠很多小的滤波器的感受野和一个大的滤波器的感受野是相同的,还能减少参数,同时有更深的网络结构。其实他只是不断的对网络层进行叠加,并没有太多的创新,而增加深度确实可以一定程度改善模型的效果。
代码如下
- 导入必要的包
-
import torch from torch.autograd import Variable import numpy as np import matplotlib.pyplot as plt from torch import nn,optim from torch.utils.data import DataLoader from torchvision import datasets,transforms
- 定义模型结构
-
class VGG(nn.Module): def __init__(self,num_classes): super(VGG,self).__init__() self.features=nn.Sequential( nn.Conv2d(3,64,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(64,64,kernel_size=3,padding=1), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), nn.Conv2d(64,128,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(128,128,kernel_size=3,padding=1), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), nn.Conv2d(128,256,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(256,256,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(256,256,kernel_size=3,padding=1), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), nn.Conv2d(256,512,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(512,512,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(512,512,kernel_size=3,padding=1), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2), nn.Conv2d(512,512,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(512,512,kernel_size=3,padding=1), nn.ReLU(True), nn.Conv2d(512,512,kernel_size=3,padding=1), nn.ReLU(True), nn.MaxPool2d(kernel_size=2,stride=2),) self.classifier=nn.Sequential( nn.Linear(512,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,num_classes),) def forward(self,x): x=self.features(x) x=x.view(x.size(0),-1) x=self.classifier(x) return x
- 训练网络
-
import math import torch from torch.autograd import Variable import numpy as np import matplotlib.pyplot as plt import argparse from torch import nn,optim from torch.utils.data import DataLoader from torchvision import datasets,transforms #定义一些超参数 batch_size=128 learning_rate=1e-2 num_epoches=200 #预处理 #data_tf=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) data_tf=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) #将图像转化成tensor,然后继续标准化,就是减均值,除以方差 #读取数据集 train_dataset=datasets.CIFAR10(root='./data1',train=True,transform=data_tf,download=True) test_dataset=datasets.CIFAR10(root='./data1',train=False,transform=data_tf) #使用内置的函数导入数据集 train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True) test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False) #导入网络,定义损失函数和优化方法 #model=Lenet() #model=CNN() #model=AlexNet(10) model=VGG(10) if torch.cuda.is_available():#是否使用cuda加速 model=model.cuda() criterion=nn.CrossEntropyLoss() optimizer=optim.SGD(model.parameters(),lr=learning_rate) n_epochs=50 for epoch in range(n_epochs): total=0 running_loss=0.0 running_correct=0 print("epoch {}/{}".format(epoch,n_epochs)) print("-"*100) for data in train_loader: img,label=data #img=img.view(img.size(0),-1) img = Variable(img) if torch.cuda.is_available(): img=img.cuda() label=label.cuda() else: img=Variable(img) label=Variable(label) out=model(img)#得到前向传播的结果 loss=criterion(out,label)#得到损失函数 print_loss=loss.data.item() optimizer.zero_grad()#归0梯度 loss.backward()#反向传播 optimizer.step()#优化 running_loss+=loss.item() epoch+=1 if epoch%50==0: print('epoch:{},loss:{:.4f}'.format(epoch,loss.data.item())) _, predicted = torch.max(out.data, 1) total += label.size(0) running_correct += (predicted == label).sum() print('第%d个epoch的识别准确率为:%d%%' % (epoch + 1, (100 * running_correct / total)))