今天根据实际数据读取和锚框产生的情况对之前的代码进行了修改
数据读取
import os
import torch
from torchvision import transforms
import torch.utils.data as data
from PIL import Image
import numpy as np
import xml.etree.ElementTree as ET
classname = ['redbox','matrix','bluebox','beer','redbull','ball','AD','milk']
VOC_CLASSES = [ # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
# 读取第i张图片的xml信息和jpg图像
def get_example(self, i):
id_ = self.ids[i]
anno = ET.parse(os.path.join(self.root_dir, 'Annotations', id_+'.xml'))
bbox = []
label = []
img = Image.open(os.path.join(self.root_dir, 'JPEGImages', id_+'.jpg'))
w, h = img.size
img = self.transform(img)
for obj in anno.findall('object'):
bndbox_anno = obj.find('bndbox')
tem = []
tem.append((int(bndbox_anno.find('xmin').text)-1)/w)
tem.append((int(bndbox_anno.find('ymin').text)-1)/h)
tem.append((int(bndbox_anno.find('xmax').text)-1)/w)
tem.append((int(bndbox_anno.find('ymax').text)-1)/h)
bbox.append(tem)
name = obj.find('name').text
label.append([VOC_CLASSES.index(name)+1])
bbox = torch.Tensor(bbox).squeeze(0)
label = torch.Tensor(label)
print(bbox, label)
return img, torch.cat((label, bbox), dim=1)
# 数据读取
class my_date(data.Dataset):
def __init__(self, root_dir, name):
self.root_dir = root_dir
self.annopath = os.path.join(root_dir,'Annotations')
self.imgpath = os.path.join(root_dir, 'JPEGImages')
self.idpath = os.path.join(root_dir, 'ImageSets', name+'.txt')
self.transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
])
self.ids = []
for line in open(self.idpath):
self.ids.append(line.strip('\n'))
def __getitem__(self, idx):
return get_example(self, idx)
def __len__(self):
return len(self.ids)
data = my_date('./czkdata', 'train')
print(data[0])
这里拿VOC训练集做了实验,读取完返回的格式为(tensor([图片]),tensor ([类别, xmin, ymin, xmax, ymax]))以便与产生的锚框格式对应
锚框产生部分
import torch
import math
import numpy as np
from PIL import Image
img = Image.open('./czkdata/JPEGImages/1.jpg')
classname = ['redbox','matrix','bluebox','beer','redbull','ball','AD','milk']
#锚框产生
def MultiBoxPrior(feature_map, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]):
"""
Args:
feature_map: torch tensor, Shape: [N, C, H, W].
sizes: List of sizes (0~1) of generated MultiBoxPriores.
ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores.
Returns:
anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
"""
pairs = [] # pair of (size, sqrt(ration))
for r in ratios:
pairs.append([sizes[0], math.sqrt(r)])
for s in sizes[1:]:
pairs.append([s, math.sqrt(ratios[0])])
pairs = np.array(pairs)
ss1 = pairs[:, 0] * pairs[:, 1] # size * sqrt(ration)
ss2 &