数据集介绍
The Oxford-IIIT Pet Dataset是一个宠物图像数据集,包含37种宠物,每种宠物200张左右宠物图片,并同时包含宠物轮廓标注信息。下载链接:
https://www.robots.ox.ac.uk/~vgg/data/pets/
导入库
// pytorch 1.9.1 cuda10.2 gtx1080ti
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torchvision
from torchvision import transforms
import os
from lxml import etree
from matplotlib.patches import Rectangle
import glob
from PIL import Image
数据可视化
下面展示一些 图片和标签可视化
。
#数据的初认识
#打开一张图作可视化
pil_img = Image.open(r'dataset/images/Abyssinian_1.jpg')#改为你图片的路径
np_img = np.array(pil_img)#将图片转位np格式
print(np_img.shape)#打印一下图片的size(400,600,3)3位通道数
plt.imshow(np_img)
plt.show()
#读取对应图片的标签
xml = open(r'dataset/annotations/xmls/Abyssinian_1.xml').read()#读取xml文件
sel = etree.HTML(xml)#读取label数据(坐标,长宽高)
width = sel.xpath('//size/width/text()')[0]
height = int(sel.xpath('//size/height/text()')[0])
xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
plt.imshow(np_img)
rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')#绘制红色矩形框
ax = plt.gca()
ax.axes.add_patch(rect)
plt.show()
结果图
读取数据
下面展示一些 数据读取
。
#数据集读取制作
images = glob.glob('dataset/images/*.jpg')#读取收集所有文件夹内jpg格式文件,返回一个list
xmls = glob.glob('dataset/annotations/xmls/*.xml')#读取收集文件夹内所有xml格式文件,返回一个list
#剔除没有标注的数据
xmls_names = [x.split('\\')[-1].split('.xml')[0] for x in xmls]#提出标签文件的名字
if len(imgs) == len(xmls):
imgs =imgs
else:
imgs = [img for img in images if
img.split('\\')[-1].split('.jpg')[0] in xmls_names]#图片名包含在标签名中,被收集
#定义一个读取方法
scal = 224
def to_labels(path):
xml = open(r'{}'.format(path)).read()
sel = etree.HTML(xml)
width = int(sel.xpath('//size/width/text()')[0])
height = int(sel.xpath('//size/height/text()')[0])
xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
return [xmin/width, ymin/height, xmax/width, ymax/height]
labels = [to_labels(path) for path in xmls]#对xmls文件迭代,读取所有标签的值
out1_label, out2_label, out3_label, out4_label = list(zip(*labels))#存放到一个大的list
index = np.random.permutation(len(imgs))#乱序处理
images = np.array(imgs)[index]#图片乱序
out1_label = np.array(out1_label).astype(np.float32).reshape(-1, 1)[index]
out2_label = np.array(out2_label).astype(np.float32).reshape(-1, 1)[index]
out3_label = np.array(out3_label).astype(np.float32).reshape(-1, 1)[index]
out4_label = np.array(out4_label).astype(np.float32).reshape(-1, 1)[index]
#划分测试和训练
i = int(len(imgs)*0.8)#80%为训练集
train_images = images[:i]
out1_train_label = out1_label[:i]
out2_train_label = out2_label[:i]
out3_train_label = out3_label[:i]
out4_train_label = out4_label[:i]
test_images = images[i: ]
out1_test_label = out1_label[i: ]
out2_test_label = out2_label[i: ]
out3_test_label = out3_label[i: ]
out4_test_label = out4_label[i: ]
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
#读取数据
class Oxford_dataset(data.Dataset):
def __init__(self, img_paths, out1_label, out2_label,
out3_label, out4_label, transform):
self.imgs = img_paths
self.out1_label = out1_label
self.out2_label = out2_label
self.out3_label = out3_label
self.out4_label = out4_label
self.transforms = transform
def __getitem__(self, index):
img = self.imgs[index]
out1_label = self.out1_label[index]
out2_label = self.out2_label[index]
out3_label = self.out3_label[index]
out4_label = self.out4_label[index]
pil_img = Image.open(img)
imgs_data = np.asarray(pil_img, dtype=np.uint8)
if len(imgs_data.shape) == 2:
imgs_data = np.repeat(imgs_data[:, :, np.newaxis], 3, axis=2)
img_tensor = self.transforms(Image.fromarray(imgs_data))
else:
img_tensor = self.transforms(pil_img)
return (img_tensor,
out1_label,
out2_label,
out3_label,
out4_label)
def __len__(self):
return len(self.imgs)
train_dataset = Oxford_dataset(train_images, out1_train_label,
out2_train_label, out3_train_label,
out4_train_label, transform)
test_dataset = Oxford_dataset(test_images, out1_test_label,
out2_test_label, out3_test_label,
out4_test_label, transform)
train_dl = data.DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
)
test_dl = data.DataLoader(
test_dataset,
batch_size=BATCH_SIZE,
)
建立模型
下面展示同样高亮的 代码片
.
#建立训练模型-迁移学习
resnet = torchvision.models.resnext101_32x8d(pretrained=True)#下载resnext101预训练模型
in_f = resnet.fc.in_features
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv_base = nn.Sequential(*list(resnet.children())[:-1])
self.fc1 = nn.Linear(in_f, 1)
self.fc2 = nn.Linear(in_f, 1)
self.fc3 = nn.Linear(in_f, 1)
self.fc4 = nn.Linear(in_f, 1)
def forward(self, x):
x = self.conv_base(x)
x = x.view(x.size(0), -1)
x1 = self.fc1(x)
x2 = self.fc2(x)
x3 = self.fc3(x)
x4 = self.fc4(x)
return x1, x2, x3, x4
#初始化模型并放入gpu
model = Net()
if torch.cuda.is_available():
model.to('cuda')
#损失函数
loss_fn = nn.MSELoss()
#优化器
from torch.optim import lr_scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
## 训练模型
下面展示同样高亮的 `代码片`.
```javascript
#训练模型
def fit(epoch, model, trainloader, testloader):
total = 0
running_loss = 0
model.train()
for x, y1, y2, y3, y4 in trainloader:
if torch.cuda.is_available():
x, y1, y2, y3, y4 = (x.to('cuda'),
y1.to('cuda'), y2.to('cuda'),
y3.to('cuda'), y4.to('cuda'))
y_pred1, y_pred2, y_pred3, y_pred4 = model(x)
loss1 = loss_fn(y_pred1, y1)
loss2 = loss_fn(y_pred2, y2)
loss3 = loss_fn(y_pred3, y3)
loss4 = loss_fn(y_pred4, y4)
loss = loss1 + loss2 + loss3 + loss4
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
running_loss += loss.item()
exp_lr_scheduler.step()
epoch_loss = running_loss / len(trainloader.dataset)
test_total = 0
test_running_loss = 0
model.eval()
with torch.no_grad():
for x, y1, y2, y3, y4 in testloader:
if torch.cuda.is_available():
x, y1, y2, y3, y4 = (x.to('cuda'),
y1.to('cuda'), y2.to('cuda'),
y3.to('cuda'), y4.to('cuda'))
y_pred1, y_pred2, y_pred3, y_pred4 = model(x)
loss1 = loss_fn(y_pred1, y1)
loss2 = loss_fn(y_pred2, y2)
loss3 = loss_fn(y_pred3, y3)
loss4 = loss_fn(y_pred4, y4)
loss = loss1 + loss2 + loss3 + loss4
test_running_loss += loss.item()
epoch_test_loss = test_running_loss / len(testloader.dataset)
print('epoch: ', epoch,
'loss: ', round(epoch_loss, 3),
'test_loss: ', round(epoch_test_loss, 3),
)
return epoch_loss, epoch_test_loss
下面 开始训练
。
// An highlighted block
#模型训练
epochs = 10#训练4个即可完美
train_loss = []
test_loss = []
for epoch in range(epochs):
epoch_loss, epoch_test_loss = fit(epoch, model, train_dl, test_dl)
train_loss.append(epoch_loss)
test_loss.append(epoch_test_loss)
可视化你的训练结果
下面展示一些 训练结果
。
epoch: 0 loss: 0.005 test_loss: 0.001
epoch: 1 loss: 0.001 test_loss: 0.001
epoch: 2 loss: 0.001 test_loss: 0.001
epoch: 3 loss: 0.0 test_loss: 0.0
epoch: 4 loss: 0.0 test_loss: 0.0
epoch: 5 loss: 0.0 test_loss: 0.0
epoch: 6 loss: 0.0 test_loss: 0.0
epoch: 7 loss: 0.0 test_loss: 0.0
epoch: 8 loss: 0.0 test_loss: 0.0
epoch: 9 loss: 0.0 test_loss: 0.0
// An highlighted block
plt.figure()
plt.plot(range(1, len(train_loss)+1), train_loss, 'r', label='Training loss')
plt.plot(range(1, len(train_loss)+1), test_loss, 'bo', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.legend()
plt.show()
测试与应用
模型保存与测试
// An highlighted block
#保存模型
PATH = 'location_model.pth'
torch.save(model.state_dict(), PATH)
#模型测试
plt.figure(figsize=(8, 24))
imgs,_,_,_,_= next(iter(train_dl))
imgs = imgs.to('cuda')
out1, out2, out3, out4 = model(imgs)
for i in range(6):
plt.subplot(6, 1, i+1)
plt.imshow(imgs[i].permute(1,2,0).cpu().numpy())
xmin, ymin, xmax, ymax = (out1[i].item()*224,
out2[i].item()*224,
out3[i].item()*224,
out4[i].item()*224)
rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)
单图应用
如果你想测试自己家的小猫小狗,在下面的路径可以改为你的图片测试喔
// An highlighted block
#模型应用
images_test=transform(Image.open('dataset/test2/c3.jpg'))#找自家小猫小狗的照片试试
images_test=images_test.unsqueeze(0)#扩增一个维度
imgs = images_test.to('cuda')
out1, out2, out3, out4 = model(imgs)
plt.imshow(imgs2.permute(1,2,0).cpu().numpy())
xmin, ymin, xmax, ymax = (out1.item()*224,
out2.item()*224,
out3.item()*224,
out4.item()*224)
rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)
plt.show()
备注
此代码参考学习日月光华老师的pytorch深度学习课程,需要更多资料可以留言。