在pytorch中如何实现图像定位

图像定位

import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from torch.utils import data
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms
import os 

from lxml import etree #对xml文件进行解码
from matplotlib.patches import Rectangle #绘制矩形框
import glob

from PIL import Image
images = glob.glob(r'dataset/images/*jpg')
anno = glob.glob(r'dataset/annotations/xmls/*.xml')#导入数据

由于两个数据长度不一致,需要对其进行

xml_name = [x.split('\\')[-1].replace('.xml','') for x in anno]
imgs = [x for x in images if x.split('\\')[-1].replace('.jpg','')in xml_name]

从xml中获得矩形框四个顶点的坐标位置

def to_labels(path):
    xml = open(r'{}'.format(path)).read() #防止转译  将path通过{}和format的方法写入
    sel = etree.HTML(xml)#解码
    xmin = int(sel.xpath('//bndbox/xmin/text()')[0])
    xmax = int(sel.xpath('//bndbox/xmax/text()')[0])
    ymin = int(sel.xpath('//bndbox/ymin/text()')[0])
    ymax = int(sel.xpath('//bndbox/ymax/text()')[0])
    width = int(sel.xpath('//size/width/text()')[0])
    height = int(sel.xpath('//size/height/text()')[0])
    return [xmin/width, ymin/height, xmax/width, ymax/height]

labels = [to_labels(p)for p in anno]

乱序并区分train和test数据

index = np.random.permutation(len(imgs))
imgs = np.array(imgs)[index]
labels = np.array(labels)[index]
labels = labels.astype(np.float32)
i = int(len(imgs)*0.8)
train_imgs = imgs[:i]
train_labels = labels[:i]
test_imgs = imgs[i:]
test_labels = labels[i:]

创建dataloader

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    
])
class OXford_Dataset(data.Dataset):
    def __init__(self,img_paths,labels_list):
        self.imgs = img_paths
        self.labels = labels_list
        
    def __getitem__(self,index):
        img = self.imgs[index]
        pil_img = Image.open(img)
        pil_img = pil_img.convert("RGB")
        img_tensor = transform(pil_img)
        l1,l2,l3,l4 = self.labels[index] #对列表进行解包
        return img_tensor,l1,l2,l3,l4
    def __len__(self):
        return len(self.imgs)
train_dataset = OXford_Dataset(train_imgs,train_labels)
test_dataset = OXford_Dataset(test_imgs,test_labels)
train_dl = data.DataLoader(train_dataset,batch_size=16,shuffle=True)
test_dl = data.DataLoader(test_dataset,batch_size=16)

绘制图片

imgs_batch,out1_b,out2_b,out3_b,out4_b = next(iter(train_dl))
plt.figure(figsize=(12,8))
for i, (img,l1,l2,l3,l4)in enumerate(zip(imgs_batch[:2],
                              out1_b[:2],           
                              out2_b[:2],
                              out3_b[:2],
                              out4_b[:2])):
    img = img.permute(1,2,0).numpy()
    plt.subplot(1,2,i+1)
    plt.imshow(img)
    xmax,ymax,xmin,ymin = l1*224,l2*224,l3*224,l4*224
    rect = Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),fill=False,color='red')
    ax = plt.gca()#获取坐标轴
    ax.axes.add_patch(rect)#在当前坐标系下添加矩形框

利用预训练模型

resnet = torchvision.models.resnet101(pretrained=True)

取出前面的特征提取器

len(list(resnet.children())) #所有层的生成器
conv_base = nn.Sequential(*list(resnet.children())[:-1])

建立全连接层时,需要知道其输入参数, 输入参数就等于resnet101全连接层的输入参数

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_base = nn.Sequential(*list(resnet.children())[:-1])
        self.fc1 = nn.Linear(in_f, 1)
        self.fc2 = nn.Linear(in_f, 1)
        self.fc3 = nn.Linear(in_f, 1)
        self.fc4 = nn.Linear(in_f, 1)

    def forward(self, x):
        x = self.conv_base(x)
        x = x.view(x.size(0), -1)
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        x3 = self.fc3(x)
        x4 = self.fc4(x)
        return x1, x2, x3, x4 

模型的训练

model = Net()
if torch.cuda.is_available():
   model.to('cuda')
loss_fn = nn.MSELoss() #
from torch.optim import lr_scheduler
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1)
def fit(epoch, model, trainloader, testloader):
   
    total = 0
    running_loss = 0
    
    model.train()
    for x, y1,y2,y3,y4 in trainloader:
        if torch.cuda.is_available():
            x, y1,y2,y3,y4 = x.to('cuda'), y1.to('cuda'), y2.to('cuda'), y3.to('cuda'), y4.to('cuda')
        y1_pred,y2_pred,y3_pred,y4_pred = model(x)
        loss1 = loss_fn(y1_pred, y1)
        loss2 = loss_fn(y2_pred, y2)
        loss3 = loss_fn(y3_pred, y3)
        loss4 = loss_fn(y4_pred, y4)
        loss = loss1+loss2+loss3+loss4
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
        
            

            running_loss += loss.item()
        
    epoch_loss = running_loss / len(trainloader)
   
        
    test_correct = 0
    test_total = 0
    test_running_loss = 0 
    
    with torch.no_grad():
        
        model.eval()
        for x, y1,y2,y3,y4 in test_dl:
            if torch.cuda.is_available():
                x, y1,y2,y3,y4 = x.to('cuda'), y1.to('cuda'), y2.to('cuda'), y3.to('cuda'), y4.to('cuda')
            y1_pred,y2_pred,y3_pred,y4_pred = model(x)
            loss1 = loss_fn(y1_pred, y1)
            loss2 = loss_fn(y2_pred, y2)
            loss3 = loss_fn(y3_pred, y3)
            loss4 = loss_fn(y4_pred, y4)
            loss = loss1+loss2+loss3+loss4
           
         
            test_running_loss += loss.item()
    
    epoch_test_loss = test_running_loss / len(testloader)
    
    
        
    print('epoch: ', epoch, 
          'loss: ', round(epoch_loss, 3),

          'test_loss: ', round(epoch_test_loss, 3),

             )
        
    return epoch_loss, epoch_test_loss
epochs = 10
train_loss = []
train_acc = []
test_loss = []
test_acc = []

for epoch in range(epochs):
    epoch_loss, epoch_test_loss,  = fit(epoch,  model,
                                                                 train_dl,
                                                                 test_dl)
    train_loss.append(epoch_loss)
    
    test_loss.append(epoch_test_loss)
    

存储数据并查看效果

PATH = 'location_model.path'
torch.save(model.state_dict(),PATH)
plt.figure(figsize=(8, 24))
imgs, _, _, _, _ = next(iter(test_dl))
imgs = imgs.to('cuda')
out1, out2, out3, out4 = model(imgs)
for i in range(6):
    plt.subplot(6, 1, i+1)
    plt.imshow(imgs[i].permute(1,2,0).cpu().numpy())
    xmin, ymin, xmax, ymax = (out1[i].item()*224, 
                              out2[i].item()*224, 
                              out3[i].item()*224, 
                              out4[i].item()*224)
    rect = Rectangle((xmin, ymin), (xmax-xmin), (ymax-ymin), fill=False, color='red')
    ax = plt.gca()
    ax.axes.add_patch(rect)
  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值