Kaggle猫狗大战的数据集下载链接:https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition
这是VGG的结构,红色框的则是VGG11。
一,写VGG代码时,首先定义一个 vgg_block(n,in,out)方法,用来构建VGG中每个block中的卷积核和池化层:
n是这个block中卷积层的数目,in是输入的通道数,out是输出的通道数
有了block以后,我们还需要一个方法把形成的block叠在一起,我们定义这个方法叫vgg_stack:
def vgg_stack(num_convs, channels): # vgg_net = vgg_stack((1, 1, 2, 2, 2), ((3, 64), (64, 128), (128, 256), (256, 512), (512, 512)))
net = []
for n, c in zip(num_convs, channels):
in_c = c[0]
out_c = c[1]
net.append(vgg_block(n, in_c, out_c))
return nn.Sequential(*net)
右边的注释:
vgg_net = vgg_stack((1, 1, 2, 2, 2), ((3, 64), (64, 128), (128, 256), (256, 512), (512, 512)))
里,(1, 1, 2, 2, 2)表示五个block里,各自的卷积层数目,((3, 64), (64, 128), (128, 256), (256, 512), (512, 512))表示每个block中的卷积层的类型,如(3,64)表示这个卷积层输入通道数是3,输出通道数是64。vgg_stack方法返回的就是完整的vgg11模型了。
接着定义一个vgg类,包含vgg_stack方法:
#vgg类
class vgg(nn.Module):
def __init__(self):
super(vgg, self).__init__()
self.feature = vgg_net
self.fc = nn.Sequential(
nn.Linear(512, 100),
nn.ReLU(True),
nn.Linear(100, 10)
)
def forward(self, x):
x = self.feature(x)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x
最后:
net = vgg() #就能获取到vgg网络
那么构建vgg网络完整的pytorch代码是:
from torch import nn
def vgg_block(num_convs, in_channels, out_channels):
net = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(True)]
for i in range(num_convs - 1): # 定义后面的许多层
net.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
net.append(nn.BatchNorm2d(out_channels))
net.append(nn.ReLU(True))
net.append(nn.MaxPool2d(2, 2)) # 定义池化层
return nn.Sequential(*net)
# 下面我们定义一个函数对这个 vgg block 进行堆叠
def vgg_stack(num_convs,
channels): # vgg_net = vgg_stack((1, 1, 2, 2, 2), ((3, 64), (64, 128), (128, 256), (256, 512), (512, 512)))
net = []
for n, c in zip(num_convs, channels):
in_c = c[0]
out_c = c[1]
net.append(vgg_block(n, in_c, out_c))
return nn.Sequential(*net)
vgg_net = vgg_stack((1, 1, 2, 2, 2), ((3, 64), (64, 128), (128, 256), (256, 512), (512, 512)))
# vgg类
class vgg(nn.Module):
def __init__(self):
super(vgg, self).__init__()
self.feature = vgg_net
self.fc = nn.Sequential(
nn.Linear(25088, 64),
nn.ReLU(True),
nn.Linear(64, 2)
)
def forward(self, x):
x = self.feature(x)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x
def get_vgg():
# 获取vgg网络
net = vgg()
return net
至此,VGG的网络就构建完毕。
二,基于VGG11的猫狗大战训练代码:
import os
import random
from PIL import Image
import torch.utils.data as data
import numpy as np
import torchvision.transforms as transforms
import torch
from vgg import get_vgg
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import *
import torchvision.transforms as transforms
import numpy as np
import os
#对数据集的读取
class DogCat(data.Dataset):
def __init__(self, root, transform=None, train=True, test=False):
self.test = test
self.train = train
self.transform = transform
imgs = [os.path.join(root, img) for img in os.listdir(root)]
# test1: data/test1/8973.jpg
# train: data/train/cat.10004.jpg
if self.test:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
else:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
imgs_num = len(imgs)
if self.test:
self.imgs = imgs
else:
random.shuffle(imgs)
if self.train:
self.imgs = imgs[:int(0.7 * imgs_num)]
else:
self.imgs = imgs[int(0.7 * imgs_num):]
#作为迭代器必须有的方法
def __getitem__(self, index):
img_path = self.imgs[index]
if self.test:
label = int(self.imgs[index].split('.')[-2].split('/')[-1])
else:
label = 1 if 'dog' in img_path.split('/')[-1] else 0 #狗的label设为1,猫的设为0
data = Image.open(img_path)
data = self.transform(data)
return data, label
def __len__(self):
return len(self.imgs)
#对数据集训练集的处理
transform_train=transforms.Compose([
transforms.Resize((256,256)), #先调整图片大小至256x256
transforms.RandomCrop((224,224)), #再随机裁剪到224x224
transforms.RandomHorizontalFlip(), #随机的图像水平翻转,通俗讲就是图像的左右对调
transforms.ToTensor(),
transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.2225)) #归一化,数值是用ImageNet给出的数值
])
#对数据集验证集的处理
transform_val=transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),
])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 若能使用cuda,则使用cuda
trainset = DogCat('F:/数据集/Cats_Dogs/train/all', transform=transform_train)
valset = DogCat('F:/数据集/Cats_Dogs/train/all', transform=transform_val)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True,num_workers=0)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=False,num_workers=0)
model = get_vgg()
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) #设置训练细节
scheduler = StepLR(optimizer, step_size=3)
criterion = nn.CrossEntropyLoss()
def get_acc(output, label):
total = output.shape[0]
_, pred_label = output.max(1)
num_correct = (pred_label == label).sum().item()
return num_correct / total
def train(epoch):
print('\nEpoch: %d' % epoch)
scheduler.step()
model.train()
train_acc = 0.0
for batch_idx,(img,label) in enumerate(trainloader):
image=Variable(img.cuda())
label=Variable(label.cuda())
optimizer.zero_grad()
out=model(image)
loss=criterion(out,label)
loss.backward()
optimizer.step()
train_acc = get_acc(out,label)
print("Epoch:%d [%d|%d] loss:%f acc:%f" %(epoch,batch_idx,len(trainloader),loss.mean(),train_acc))
def val(epoch):
print("\nValidation Epoch: %d" %epoch)
model.eval()
total=0
correct=0
with torch.no_grad():
for batch_idx,(img,label) in enumerate(valloader):
image=Variable(img.cuda())
label=Variable(label.cuda())
out=model(image)
_,predicted=torch.max(out.data,1)
total+=image.size(0)
correct+=predicted.data.eq(label.data).cpu().sum()
print("Acc: %f "% ((1.0*correct.numpy())/total))
for epoch in range(21):
train(epoch)
val(epoch)
torch.save(model, 'modelcatdog.pth') # 保存模型
训练效果:
训练完成后会生成 一个名为 modelcatdog.pth的模型。
三,预测代码
import torch
import cv2
import torch.nn.functional as F
from vgg import vgg ##重要,虽然显示灰色(即在次代码中没用到),但若没有引入这个模型代码,加载模型时会找不到模型
from torchvision import datasets, transforms
from PIL import Image
classes = ('cat','dog')
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('modelcatdog.pth') # 加载模型
model = model.to(device)
model.eval() # 把模型转为test模式
img = cv2.imread("dog.jpg") # 读取要预测的图片
img = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
img = trans(img)
img = img.to(device)
img = img.unsqueeze(0) # 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽]
output = model(img)
prob = F.softmax(output,dim=1) #prob是2个分类的概率
print(prob)
value, predicted = torch.max(output.data, 1)
print(predicted.item())
print(value)
pred_class = classes[predicted.item()]
print(pred_class)
预测效果:
先找一张有小狗的图片: