voc2007_extract_car_label
将上一步提取到的car转化为KITTI格式
1,第一步
获取所有图片的名,同时删除后缀。我这里用了比较笨的方法就是先把图片名保存,然后在gedit里面替换
保存图片名python代码:
import os
dirlist = os.listdir("/home/mahnx0/data/voc_extract_car/train_img/")
fp = open("ImageID.txt","w")
for name in dirlist:
fp.write(name)
fp.write("\n")
fp.close()
gedit查找.jpg并替换成空
替换后如下:
开始转换,python代码如下:
# coding: utf-8
# In[24]:
import cv2, os, shutil
import xml.etree.ElementTree as ET
# In[2]:
ann_DIR = "/home/mahnx0/data/voc_extract_car/train_lable"
img_DIR = "/home/mahnx0/data/voc_extract_car/train_img"
trainval_file = "/home/mahnx0/data/voc_extract_car/train0.list"
test_file = "/home/mahnx0/data/voc_extract_car/val0.list"
# In[42]:
def readXmlAnno(im_fn, ann_DIR):
anno_pn = os.path.join(ann_DIR, im_fn+'.xml')
#print 'On annotation: {}'.format(anno_pn)
tree = ET.parse(anno_pn)
root = tree.getroot()
p_anno = {}
size = root.find('size')
d_size = {"width": size.find('width').text,
"height": size.find('height').text,
"depth": size.find('depth').text
}
p_anno['size'] = d_size
l_obj = []
for obj in root.findall('object'):
d_obj = {"name": obj.find('name').text, "truncated": '0.0', "difficult": '0.0', "occluded":'0.0',
"xmin": float(obj.find('bndbox').find('xmin').text),
"ymin": float(obj.find('bndbox').find('ymin').text),
"xmax": float(obj.find('bndbox').find('xmax').text),
"ymax": float(obj.find('bndbox').find('ymax').text),
}
l_obj.append(d_obj)
p_anno['l_obj'] = l_obj
if len(l_obj) > 0:
return p_anno
else:
return None
def convertToKitti(p_anno):
l_annos = []
for obj in p_anno['l_obj']:
k_anno = {}
k_anno["type"] = obj["name"]
k_anno["truncated"] = obj["truncated"]
k_anno["occluded"] = obj["occluded"]
k_anno["alpha"] = '0.0'
k_anno["bbox"] = "{:.1f} {:.1f} {:.1f} {:.1f}".format(obj["xmin"],obj["ymin"],obj["xmax"],obj["ymax"])
k_anno["dimensions"] = "{:.1f} {:.1f} {:.1f}".format(0,0,0)
k_anno["location"] = "{:.1f} {:.1f} {:.1f}".format(0,0,0)
k_anno["rotation_y"] = '0.0'
l_annos.append(k_anno)
return l_annos
# ### Create our own kitti dataset
# In[23]:
k_train_img_DIR = "/home/mahnx0/data/voc_extract_car_2_kitti/train_img"
k_train_lab_DIR = "/home/mahnx0/data/voc_extract_car_2_kitti/train_label"
k_val_img_DIR = "/home/mahnx0/data/voc_extract_car_2_kitti/val_img"
k_val_lab_DIR = "/home/mahnx0/data/voc_extract_car_2_kitti/val_label"
# In[47]:
# Processing train data
#'''
with open(trainval_file) as in_f:
for im_fn in in_f:
#print 'Processing img: {}'.format(im_fn)
im_fn = im_fn.split('\n')[0].split('\r')[0]
p_anno = readXmlAnno(im_fn, ann_DIR)
if p_anno != None:
l_annos = convertToKitti(p_anno)
k_anno_file = os.path.join(k_train_lab_DIR,im_fn+".txt")
with open(k_anno_file, 'w') as out_f:
for k_anno in l_annos:
#print k_anno
to_write = ""
to_write += str(k_anno['type'])+' '
to_write += str(k_anno['truncated'])+' '
to_write += str(k_anno['occluded'])+' '
to_write += str(k_anno['alpha'])+' '
to_write += str(k_anno['bbox'])+' '
to_write += str(k_anno['dimensions'])+' '
to_write += str(k_anno['location'])+' '
to_write += str(k_anno['rotation_y'])
#print to_write
out_f.write(to_write+'\n')
out_f.close()
# copy im file
from_file = os.path.join(img_DIR,im_fn+".jpg")
to_file = os.path.join(k_train_img_DIR,im_fn+".jpg")
shutil.copy2(from_file, to_file)
#break
in_f.close()
#'''
# Processing test data
with open(test_file) as in_f:
for im_fn in in_f:
#print 'Processing img: {}'.format(im_fn)
im_fn = im_fn.split('\n')[0].split('\r')[0]
p_anno = readXmlAnno(im_fn, ann_DIR)
if p_anno != None:
l_annos = convertToKitti(p_anno)
k_anno_file = os.path.join(k_val_lab_DIR,im_fn+".txt")
with open(k_anno_file, 'w') as out_f:
for k_anno in l_annos:
#print k_anno
to_write = ""
to_write += str(k_anno['type'])+' '
to_write += str(k_anno['truncated'])+' '
to_write += str(k_anno['occluded'])+' '
to_write += str(k_anno['alpha'])+' '
to_write += str(k_anno['bbox'])+' '
to_write += str(k_anno['dimensions'])+' '
to_write += str(k_anno['location'])+' '
to_write += str(k_anno['rotation_y'])
#print to_write
out_f.write(to_write+'\n')
out_f.close()
# copy im file
from_file = os.path.join(img_DIR,im_fn+".jpg")
to_file = os.path.join(k_val_img_DIR,im_fn+".jpg")
shutil.copy2(from_file, to_file)
#break
in_f.close()
# In[ ]:
OK,至此voc数据集转化KITTI工作全部完成
之前把coco,voc,kitti都转化成voc同时提取出了car,用yolov3训练发现效果好到爆炸,但是考虑到嵌入式平台TX2,速度无法达到实时,v3优化后1080p只有5fps, 用tensorrt又不好推理自定义层,所以当前准备用KITTI数据集,利用nvidia detectnet实现实时检测效果
下一步:转化DetracMvtData数据集到KITTI格式,这个数据我已经转化成voc了。至此已经有60000张不同场景的大小目标数据