图像定位
import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from torch.utils import data
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
import os
from lxml import etree #对xml文件进行解码
from matplotlib.patches import Rectangle #绘制矩形框
import glob
from PIL import Image
images = glob.glob(r'dataset/images/*jpg')
anno = glob.glob(r'dataset/annotations/xmls/*.xml')#导入数据
由于两个数据长度不一致,需要对其进行
xml_name = [x.split('\\')[-1].replace('.xml','') for x in anno]
imgs = [x for x in images if x.split('\\')[-1].replace('.jpg','')in xml_name]
从xml中获得矩形框四个顶点的坐标位置
def to_labels(path):
xml = open(r'{}'.format(path)).read() #防止转译 将path通过{}和format的方法写入
sel = etree.HTML(xml)#解码
xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
width = int(sel.xpath('//size/width/text()')[0])
height = int(sel.xpath('//size/height/text()')[0])
return [xmin/width, ymin/height, xmax/width, ymax/height]
labels = [to_labels(p)for p in anno]
乱序并区分train和test数据
index = np.random.permutation(len(imgs))
imgs = np.array(imgs)[index]
labels = np.array(labels)[index]
labels = labels.astype(np.float32)
i = int(len(imgs)*0.8)
train_imgs = imgs[:i]
train_labels = labels[:i]
test_imgs = imgs[i:]
test_labels = labels[i:]
创建dataloader
transform = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
])
class OXford_Dataset(data.Dataset):
def __init__(self,img_paths,labels_list):
self.imgs = img_paths
self.labels = labels_list
def __getitem__(self,index):
img = self.imgs[index]
pil_img = Image.open(img)
pil_img = pil_img.convert("RGB")
img_tensor = transform(pil_img)
l1,l2,l3,l4 = self.labels[index] #对列表进行解包
return img_tensor,l1,l2,l3,l4
def __len__(self):
return len(self.imgs)
train_dataset = OXford_Dataset(train_imgs,train_labels)
test_dataset = OXford_Dataset(test_imgs,test_labels)
train_dl = data.DataLoader(train_dataset,batch_size=16,shuffle=True)
test_dl = data.DataLoader(test_dataset,batch_size=16)
绘制图片
imgs_batch,out1_b,out2_b,out3_b,out4_b = next(iter(train_dl))
plt.figure(figsize=(12,8))
for i, (img,l1,l2,l3,l4)in enumerate(zip(imgs_batch[:2],
out1_b[:2],
out2_b[:2],
out3_b[:2],
out4_b[:2])):
img = img.permute(1,2,0).numpy()
plt.subplot(1,2,i+1)
plt.imshow(img)
xmax,ymax,xmin,ymin = l1*224,l2*224,l3*224,l4*224
rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color='red')
ax = plt.gca()#获取坐标轴
ax.axes.add_patch(rect)#在当前坐标系下添加矩形框
利用预训练模型
resnet = torchvision.models.resnet101(pretrained=True)
取出前面的特征提取器
len(list(resnet.children())) #所有层的生成器
conv_base = nn.Sequential(*list(resnet.children())[:-1])
建立全连接层时,需要知道其输入参数, 输入参数就等于resnet101全连接层的输入参数
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv_base = nn.Sequential(*list(resnet.children())[:-1])
self.fc1 = nn.Linear(in_f, 1)
self.fc2 = nn.Linear(in_f, 1)
self.fc3 = nn.Linear(in_f, 1)
self.fc4 = nn.Linear(in_f, 1)
def forward(self, x):
x = self.conv_base(x)
x = x.view(x.size(0), -1)
x1 = self.fc1(x)
x2 = self.fc2(x)
x3 = self.fc3(x)
x4 = self.fc4(x)
return x1, x2, x3, x4
模型的训练
model = Net()
if torch.cuda.is_available():
model.to('cuda')
loss_fn = nn.MSELoss() #
from torch.optim import lr_scheduler
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1)
def fit(epoch, model, trainloader, testloader):
total = 0
running_loss = 0
model.train()
for x, y1,y2,y3,y4 in trainloader:
if torch.cuda.is_available():
x, y1,y2,y3,y4 = x.to('cuda'), y1.to('cuda'), y2.to('cuda'), y3.to('cuda'), y4.to('cuda')
y1_pred,y2_pred,y3_pred,y4_pred = model(x)
loss1 = loss_fn(y1_pred, y1)
loss2 = loss_fn(y2_pred, y2)
loss3 = loss_fn(y3_pred, y3)
loss4 = loss_fn(y4_pred, y4)
loss = loss1+loss2+loss3+loss4
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
running_loss += loss.item()
epoch_loss = running_loss / len(trainloader)
test_correct = 0
test_total = 0
test_running_loss = 0
with torch.no_grad():
model.eval()
for x, y1,y2,y3,y4 in test_dl:
if torch.cuda.is_available():
x, y1,y2,y3,y4 = x.to('cuda'), y1.to('cuda'), y2.to('cuda'), y3.to('cuda'), y4.to('cuda')
y1_pred,y2_pred,y3_pred,y4_pred = model(x)
loss1 = loss_fn(y1_pred, y1)
loss2 = loss_fn(y2_pred, y2)
loss3 = loss_fn(y3_pred, y3)
loss4 = loss_fn(y4_pred, y4)
loss = loss1+loss2+loss3+loss4
test_running_loss += loss.item()
epoch_test_loss = test_running_loss / len(testloader)
print('epoch: ', epoch,
'loss: ', round(epoch_loss, 3),
'test_loss: ', round(epoch_test_loss, 3),
)
return epoch_loss, epoch_test_loss
epochs = 10
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_test_loss, = fit(epoch, model,
train_dl,
test_dl)
train_loss.append(epoch_loss)
test_loss.append(epoch_test_loss)
存储数据并查看效果
PATH = 'location_model.path'
torch.save(model.state_dict(),PATH)
plt.figure(figsize=(8, 24))
imgs, _, _, _, _ = next(iter(test_dl))
imgs = imgs.to('cuda')
out1, out2, out3, out4 = model(imgs)
for i in range(6):
plt.subplot(6, 1, i+1)
plt.imshow(imgs[i].permute(1,2,0).cpu().numpy())
xmin, ymin, xmax, ymax = (out1[i].item()*224,
out2[i].item()*224,
out3[i].item()*224,
out4[i].item()*224)
rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
ax = plt.gca()
ax.axes.add_patch(rect)