系列文章目录
@[TOC](文章目录)前言
话不多说,直接上代码
一、训练过程
示例:pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。
import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset
from Focalloss import FocalLoss
class MyDataset(Dataset):
def __init__(self, txt_path, transform = None, target_transform = None):
fh = open(txt_path, 'r')
imgs = []
for line in fh:
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
fn, label = self.imgs[index]
img = Image.open(fn).convert('RGB')
if self.transform is not None:
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
'''定义超参数'''
batch_size = 8 # 批的大小
learning_rate = 1e-3 # 学习率
num_epoches = 100 # 遍历训练集的次数
classes = ('circle','triangle','square','basketball','volleyball','football')
root=r'D:\\class' #存放图片和标签(txt)
data_transform = transforms.Compose([
transforms.RandomResizedCrop(128),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean = [0.22358292 ,0.22358292, 0.22358292],
std = [0.15551882, 0.15551882, 0.15551882]),
])
'''下载训练集 CIFAR-10 10分类训练集'''
#train_dataset = datasets.CIFAR10('./data', train=True, transform=transforms.ToTensor(), download=True)
#train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
#test_dataset = datasets.CIFAR10('./data', train=False, transform=transforms.ToTensor(), download=True)
#test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
train_dataset = MyDataset(txt_path=r'D:\\class\train.txt', transform=data_transform)
#图片放在class文件夹中,标签文件train.txt格式为(图片绝对路径+类别:D:\class\0001.jpg 0)
test_dataset = MyDataset(txt_path=r'D:\\class\valid.txt', transform=data_transform)
train_loader = DataLoader(dataset = train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=2)
'''定义网络模型'''
class VGG16(nn.Module):
def __init__(self, num_classes=6):
super(VGG16, self).__init__()
self.features = nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
#2
nn.Conv2d(64,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#3
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
#4
nn.Conv2d(128,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#5
nn.Conv2d(128,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#6
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#7
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#8
nn.Conv2d(256,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#9
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#10
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#11
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#12
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#13
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.AvgPool2d(kernel_size=1,stride=1),
)
self.classifier = nn.Sequential(
#14
nn.Linear(8192,4096),
nn.ReLU(True),
nn.Dropout(),
#15
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
#16
nn.Linear(4096,6),
)
#self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
# print(out.shape)
out = out.view(out.size(0), -1)
# print(out.shape)
out = self.classifier(out)
# print(out.shape)
return out
'''创建model实例对象,并检测是否支持使用GPU'''
model = VGG16()
#model.load_state_dict(torch.load("/home/jc/CenterNet/src/vgg16.pth"))
use_gpu = torch.cuda.is_available() # 判断是否有GPU加速
if use_gpu:
model = model.cuda()
'''定义loss和optimizer'''
criterion = FocalLoss(class_num = 6)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
'''训练模型'''
for epoch in range(num_epoches):
print('*' * 25, 'epoch {}'.format(epoch + 1), '*' * 25) # .format为输出格式,formet括号里的即为左边花括号的输出
running_loss = 0.0
running_acc = 0.0
model.train()
for i, data in tqdm(enumerate(train_loader, 1)):
img, label = data
# cuda
if use_gpu:
img = img.cuda()
label = label.cuda()
img = Variable(img)
label = Variable(label)
# 向前传播
out = model(img)
loss = criterion(inputs = out, targets = label)
running_loss += loss.item() * label.size(0)
_, pred = torch.max(out, 1) # 预测最大值所在的位置标签
num_correct = (pred == label).sum()
accuracy = (pred == label).float().mean()
running_acc += num_correct.item()
# 向后传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
epoch + 1, running_loss / (len(train_dataset)), running_acc / (len(train_dataset))))
model.eval() # 模型评估
eval_loss = 0
eval_acc = 0
for data in test_loader: # 测试模型
img, label = data
if use_gpu:
with torch.no_grad():
img = Variable(img).cuda()
label = Variable(label).cuda()
else:
with torch.no_grad():
img = Variable(img)
label = Variable(label)
out = model(img)
loss = criterion(out, label)
eval_loss += loss.item() * label.size(0)
_, pred = torch.max(out, 1)
num_correct = (pred == label).sum()
eval_acc += num_correct.item()
print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
test_dataset)), eval_acc / (len(test_dataset))))
print()
# 保存模型
torch.save(model.state_dict(), './vgg16.pth')
二、FocalLoss
代码如下(示例):
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class FocalLoss(nn.Module):
r"""
This criterion is a implemenation of Focal Loss, which is proposed in
Focal Loss for Dense Object Detection.
Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
The losses are averaged across observations for each minibatch.
Args:
alpha(1D Tensor, Variable) : the scalar factor for this criterion
gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5),
putting more focus on hard, misclassified examples
size_average(bool): By default, the losses are averaged over observations for each minibatch.
However, if the field size_average is set to False, the losses are
instead summed for each minibatch.
"""
def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
super(FocalLoss, self).__init__()
if alpha is None:
self.alpha = Variable(torch.ones(class_num, 1))
else:
if isinstance(alpha, Variable):
self.alpha = alpha
else:
self.alpha = Variable(alpha)
self.gamma = gamma
self.class_num = class_num
self.size_average = size_average
def forward(self, inputs, targets):
N = inputs.size(0)
C = inputs.size(1)
P = F.softmax(inputs, dim=1) # 按行SoftMax(每行归一化,维度不变),行和为1,
class_mask = inputs.data.new(N, C).fill_(0) #生成新tensor,维度相同,用0填充
class_mask = Variable(class_mask)
ids = targets.view(-1, 1)
class_mask.scatter_(1, ids.data, 1.) #把GT编译成one-hot编码
#print(class_mask)
if inputs.is_cuda and not self.alpha.is_cuda:
self.alpha = self.alpha.cuda()
alpha = self.alpha[ids.data.view(-1)] #把ids改成一行
probs = (P*class_mask).sum(1).view(-1,1)
log_p = probs.log()
#print('probs size= {}'.format(probs.size()))
#print(probs)
batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
#print('-----bacth_loss------')
#print(batch_loss)
if self.size_average:
loss = batch_loss.mean()
else:
loss = batch_loss.sum()
return loss
三、预测过程
代码如下(示例):
import torch
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import torchvision.models as models
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset
import os
from glob import glob
data_transform = transforms.Compose([
transforms.RandomResizedCrop(128),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean = [0.22358292 ,0.22358292, 0.22358292],
std = [0.15551882, 0.15551882, 0.15551882]),
])
classes = ('circle','triangle','square','basketball','volleyball','football')
demo_fold = 'D:/Pic'
class VGG16(nn.Module):
def __init__(self, num_classes=6):
super(VGG16, self).__init__()
self.features = nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
#2
nn.Conv2d(64,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#3
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
#4
nn.Conv2d(128,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#5
nn.Conv2d(128,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#6
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#7
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#8
nn.Conv2d(256,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#9
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#10
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#11
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#12
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#13
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.AvgPool2d(kernel_size=1,stride=1),
)
self.classifier = nn.Sequential(
#14
nn.Linear(8192,4096),
nn.ReLU(True),
nn.Dropout(),
#15
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
#16
nn.Linear(4096,6),
)
#self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
# print(out.shape)
out = out.view(out.size(0), -1)
# print(out.shape)
out = self.classifier(out)
# print(out.shape)
return out
def fenlei(inputs):
model = VGG16()
model.load_state_dict(torch.load("D:/xzz/研一/vgg16/vgg16.pth"))
img = data_transform(inputs).reshape(1,3,128,128)
model.eval()
out = model(img)
_, pred = torch.max(out, 1)
pred = classes[pred.item()]
#print(pred)
return pred
model = VGG16()
model.load_state_dict(torch.load("D:/xzz/研一/vgg16/vgg16.pth"))
fList=os.listdir(demo_fold)
for i in fList :
img = Image.open(os.path.join(demo_fold,i)).convert('RGB')
img = data_transform(img).reshape(1,3,128,128)
model.eval()
out = model(img)
_, pred = torch.max(out, 1)
pred = classes[pred.item()]
print(pred)
该处使用的url网络请求的数据。
总结
后续还会更新一些ap值计算的test.py文件,敬请关注