用VGG16训练分类模型

系列文章目录

@[TOC](文章目录)

前言

话不多说,直接上代码

一、训练过程

示例:pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。

import torch
import torch.nn as nn
from torch import  optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset
from Focalloss import FocalLoss

class MyDataset(Dataset):
    def __init__(self, txt_path, transform = None, target_transform = None):
	    fh = open(txt_path, 'r')
	    imgs = []
	    for line in fh:
		    line = line.rstrip()
		    words = line.split()
		    imgs.append((words[0], int(words[1])))
		    self.imgs = imgs 
		    self.transform = transform
		    self.target_transform = target_transform
    def __getitem__(self, index):
    	fn, label = self.imgs[index]
    	img = Image.open(fn).convert('RGB') 
    	if self.transform is not None:
    		img = self.transform(img) 
    	return img, label
    def __len__(self):
    	return len(self.imgs)


'''定义超参数'''
batch_size = 8        # 批的大小
learning_rate = 1e-3    # 学习率
num_epoches = 100      # 遍历训练集的次数

classes = ('circle','triangle','square','basketball','volleyball','football')

root=r'D:\\class'   #存放图片和标签(txt)

data_transform = transforms.Compose([
    transforms.RandomResizedCrop(128),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.22358292 ,0.22358292, 0.22358292],
                         std  =  [0.15551882, 0.15551882, 0.15551882]),
    ])



'''下载训练集 CIFAR-10 10分类训练集'''
#train_dataset = datasets.CIFAR10('./data', train=True, transform=transforms.ToTensor(), download=True)
#train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
#test_dataset = datasets.CIFAR10('./data', train=False, transform=transforms.ToTensor(), download=True)
#test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
train_dataset = MyDataset(txt_path=r'D:\\class\train.txt', transform=data_transform)
#图片放在class文件夹中,标签文件train.txt格式为(图片绝对路径+类别:D:\class\0001.jpg 0)
test_dataset = MyDataset(txt_path=r'D:\\class\valid.txt', transform=data_transform)
train_loader = DataLoader(dataset = train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=2)


'''定义网络模型'''
class VGG16(nn.Module):
    def __init__(self, num_classes=6):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            #1
            nn.Conv2d(3,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            #2
            nn.Conv2d(64,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #3
            nn.Conv2d(64,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            #4
            nn.Conv2d(128,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #5
            nn.Conv2d(128,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #6
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #7
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #8
            nn.Conv2d(256,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #9
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #10
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #11
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #12
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #13
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.AvgPool2d(kernel_size=1,stride=1),
            )
        self.classifier = nn.Sequential(
            #14
            nn.Linear(8192,4096),
            nn.ReLU(True),
            nn.Dropout(),
            #15
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            #16
            nn.Linear(4096,6),
            )
        #self.classifier = nn.Linear(512, 10)
 
    def forward(self, x):
        out = self.features(x) 
#        print(out.shape)
        out = out.view(out.size(0), -1)
#        print(out.shape)
        out = self.classifier(out)
#        print(out.shape)
        return out


'''创建model实例对象,并检测是否支持使用GPU'''
model = VGG16()

#model.load_state_dict(torch.load("/home/jc/CenterNet/src/vgg16.pth"))
use_gpu = torch.cuda.is_available()  # 判断是否有GPU加速
if use_gpu:
    model = model.cuda()


'''定义loss和optimizer'''
criterion = FocalLoss(class_num = 6)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)


'''训练模型'''

for epoch in range(num_epoches):
    print('*' * 25, 'epoch {}'.format(epoch + 1), '*' * 25)      # .format为输出格式,formet括号里的即为左边花括号的输出
    running_loss = 0.0
    running_acc = 0.0
    model.train()
    for i, data in tqdm(enumerate(train_loader, 1)):
        
        img, label = data
        # cuda
        if use_gpu:
            img = img.cuda()
            label = label.cuda()
        img = Variable(img)
        label = Variable(label)
        # 向前传播
        out = model(img)
        loss = criterion(inputs = out, targets = label)
        running_loss += loss.item() * label.size(0)
        _, pred = torch.max(out, 1)     # 预测最大值所在的位置标签
        num_correct = (pred == label).sum()
        accuracy = (pred == label).float().mean()
        running_acc += num_correct.item()
        # 向后传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
        epoch + 1, running_loss / (len(train_dataset)), running_acc / (len(train_dataset))))
    model.eval()    # 模型评估
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:      # 测试模型
        img, label = data
        if use_gpu:
           with torch.no_grad():
              img = Variable(img).cuda()
              label = Variable(label).cuda()
        else:
            with torch.no_grad():
              img = Variable(img)
              label = Variable(label)
        out = model(img)
        loss = criterion(out, label)
        eval_loss += loss.item() * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = (pred == label).sum()
        eval_acc += num_correct.item()
    print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
        test_dataset)), eval_acc / (len(test_dataset))))
    print()

# 保存模型
torch.save(model.state_dict(), './vgg16.pth')


二、FocalLoss

代码如下(示例):

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class FocalLoss(nn.Module):
    r"""
        This criterion is a implemenation of Focal Loss, which is proposed in 
        Focal Loss for Dense Object Detection.
            Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
        The losses are averaged across observations for each minibatch.
        Args:
            alpha(1D Tensor, Variable) : the scalar factor for this criterion
            gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5), 
                                   putting more focus on hard, misclassified examples
            size_average(bool): By default, the losses are averaged over observations for each minibatch.
                                However, if the field size_average is set to False, the losses are
                                instead summed for each minibatch.
    """
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super(FocalLoss, self).__init__()
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = alpha
            else:
                self.alpha = Variable(alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average
    def forward(self, inputs, targets):
        N = inputs.size(0)
        C = inputs.size(1)
        P = F.softmax(inputs, dim=1)          # 按行SoftMax(每行归一化,维度不变),行和为1,
        class_mask = inputs.data.new(N, C).fill_(0)       #生成新tensor,维度相同,用0填充
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)
        class_mask.scatter_(1, ids.data, 1.)       #把GT编译成one-hot编码
        #print(class_mask)
        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.data.view(-1)]      #把ids改成一行
        probs = (P*class_mask).sum(1).view(-1,1)
        log_p = probs.log()
        #print('probs size= {}'.format(probs.size()))
        #print(probs)
        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 
        #print('-----bacth_loss------')
        #print(batch_loss)
        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
        return loss


三、预测过程

代码如下(示例):

import torch
import torch.nn as nn
from torch import  optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import torchvision.models as models
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset
import os
from glob import glob


data_transform = transforms.Compose([
    transforms.RandomResizedCrop(128),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.22358292 ,0.22358292, 0.22358292],
                         std  =  [0.15551882, 0.15551882, 0.15551882]),
    ])

classes = ('circle','triangle','square','basketball','volleyball','football')

demo_fold = 'D:/Pic'


class VGG16(nn.Module):
    def __init__(self, num_classes=6):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            #1
            nn.Conv2d(3,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            #2
            nn.Conv2d(64,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #3
            nn.Conv2d(64,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            #4
            nn.Conv2d(128,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #5
            nn.Conv2d(128,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #6
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #7
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #8
            nn.Conv2d(256,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #9
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #10
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #11
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #12
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #13
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.AvgPool2d(kernel_size=1,stride=1),
            )
        self.classifier = nn.Sequential(
            #14
            nn.Linear(8192,4096),
            nn.ReLU(True),
            nn.Dropout(),
            #15
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            #16
            nn.Linear(4096,6),
            )
        #self.classifier = nn.Linear(512, 10)
 
    def forward(self, x):
        out = self.features(x) 
#        print(out.shape)
        out = out.view(out.size(0), -1)
#        print(out.shape)
        out = self.classifier(out)
#        print(out.shape)
        return out
    
def fenlei(inputs):
        model =  VGG16()
        model.load_state_dict(torch.load("D:/xzz/研一/vgg16/vgg16.pth"))
        img = data_transform(inputs).reshape(1,3,128,128)
        model.eval()
        out = model(img)
        _, pred = torch.max(out, 1)
        pred = classes[pred.item()]
        #print(pred)
        return pred    



model =  VGG16()
model.load_state_dict(torch.load("D:/xzz/研一/vgg16/vgg16.pth"))
fList=os.listdir(demo_fold)
for i in fList :
    img = Image.open(os.path.join(demo_fold,i)).convert('RGB') 
    img = data_transform(img).reshape(1,3,128,128)
    model.eval()
    out = model(img)
    _, pred = torch.max(out, 1)
    pred = classes[pred.item()]
    print(pred)

该处使用的url网络请求的数据。


总结

后续还会更新一些ap值计算的test.py文件,敬请关注

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值