这是一个读取VOC数据集的例子,供大家参考 但是我想这更适合作为一个云备份,哈哈
"" "
因为我打算先搞一下目标检测
所以先用VOC这个轻量级的来做
"" "
import torch
import torch. utils. data as data
import numpy as np
import cv2
import xml # 标注是xml格式
try :
import xml. etree. cElementTree as ET # 解析xml的c语言版的模块
except ImportError:
import xml. etree. ElementTree as ET
VOC_CLASSES = {
'aeroplane' , 'bicycle' , 'bird' , 'boat' ,
'bottle' , 'bus' , 'car' , 'cat' , 'chair' ,
'cow' , 'diningtable' , 'dog' , 'horse' ,
'motorbike' , 'person' , 'pottedplant' ,
'sheep' , 'sofa' , 'train' , 'tvmonitor'
}
# 把str映射为int
dict_classes = dict ( zip ( VOC_CLASSES, range ( len ( VOC_CLASSES) ) ) )
# print ( dict_classes[ 'aeroplane' ] )
class ReadVOC ( data. Dataset) :
def __init__ ( self, root) :
print ( "reading voc..." )
self. root = root
self. img_idx = [ ]
self. ano_idx = [ ]
self. bbox = [ ]
self. obj_name = [ ] # 类别
train_txt_path = self. root + "/ImageSets/Main/train_val.txt" # train这个文件夹里面数量太少 换掉
self. img_path = self. root + "/JPEGImages/"
self. ano_path = self. root + "/Annotations/"
# 首先读取txt文件进行训练集图片索引
train_txt = open ( train_txt_path)
lines = train_txt. readlines ( )
for line in lines:
name = line. strip ( ) . split ( ) [ 0 ]
# print ( name) # name is in str type
self. img_idx. append ( self. img_path + name + '.jpg' )
self. ano_idx. append ( self. ano_path + name + '.xml' ) # 最好是在这直接解析出bbox
def __getitem__ ( self, item) :
# print ( "getitem..." )
# print ( self. img_idx[ item] )
img = cv2. imread ( self. img_idx[ item] )
height, width, channels = img. shape
targrts = ET. parse ( self. ano_idx[ item] ) # . getroot ( ) # 运行时解析 逻辑更加清晰
res = [ ] # 标注输出
# find all obj in xml
for obj in targrts. iter ( "object" ) : # 便利物体
name = obj. find ( 'name' ) . text. lower ( ) . strip ( )
class_idx = dict_classes[ name]
bbox = obj. find ( 'bndbox' )
pts = [ 'xmin' , 'ymin' , 'xmax' , 'ymax' ]
obj_bbox = [ ]
for i, pt in enumerate ( pts) :
cur_pt = int ( bbox. find ( pt) . text)
cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height # scale height or width
obj_bbox. append ( cur_pt)
res. append ( obj_bbox) # 当前obj的所有bboxdf
res. append ( class_idx)
img, res = self. data_trans ( img, res)
return img, res
def __len__ ( self) :
data_lenth = len ( self. img_idx)
# print ( 'data lenth is ' , data_lenth)
return data_lenth
# 标注输入使用w h归一化的相对坐标
def data_trans ( self, img_input, bbox_input) :
# print ( "trans..." )
goal_size = ( 400 , 400 )
# 在这时候, 图像尺寸可以变化, 只要目标不发生平移等等
img = cv2. resize ( img_input, goal_size)
# pre- process input img
img = torch. from_numpy ( img) . permute ( 2 , 0 , 1 ) . float ( )
# 把bbox转换成绝对坐标
# bbox = [ bbox_input[ 0 ] * goal_size[ 0 ] , bbox_input[ 1 ] * goal_size[ 1 ] , bbox_input[ 2 ] * goal_size[ 0 ] , bbox_input[ 3 ] * goal_size[ 1 ] ]
# bbox = list ( map ( int , bbox) )
bbox = torch. tensor ( bbox_input[ 0 ] )
return img, bbox
if __name__ == "__main__" :
ReadVOC ( root= '/home/.../data/VOCdevkit/VOC2012' )