百度近期开源了自动驾驶大型数据集,官方网址:http://apolloscape.auto/index.html
数据集的具体介绍就看官方网址就行,如何利用这个数据集做目标检测呢?就是要把像素的实例级分割转为包围框xmin,xmax,ymin,ymax即可。以处理road_04为例,代码展示如何使用。
import json
import os
from os import listdir, getcwd
from os.path import join
import os.path
rootdir='/home/wang/下载/数据集/apolloscape/Image/road04_ins' #图像文件夹绝对地址
def position(pos):
x=[]
y=[]
nums=len(pos)
for i in range(nums):
x.append(pos[i][0])
y.append(pos[i][1])
x_max=max(x)
x_min=min(x)
y_max=max(y)
y_min=min(y)
b=(float(x_min),float(x_max),float(y_min),float(y_max))
#b=(x_min,x_max,y_min,y_max)
return b
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
load_f=open("./label/road04_ins/%s.json"%(image_id),'r') #标签文件夹相对地址
load_dict = json.load(load_f)
out_file = open('./voc_type/road04_ins/%s.txt'%(image_id), 'w') #输出标签存放地址
#keys=tuple(load_dict.keys())
w=load_dict['imgWidth']
h=load_dict['imgHeight']
#print(h)
objects=load_dict['objects']
nums=len(objects)
#print(nums)
#object_key=tuple(objects.keys()
for i in range(0,nums):
labels=objects[i]['label']
#print(i)
if (str(labels) in ['36','37']):
#print(labels)
pos=objects[i]['polygons'][0]
b=position(pos)
bb = convert((w,h), b)
cls_id=2
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
#print(type(pos))
elif (str(labels) in['33','38','39','40']):
#print(labels)
pos=objects[i]['polygons'][0]
b=position(pos)
bb = convert((w,h), b)
cls_id=1
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
def image_id(rootdir):
a=[]
for parent,dirnames,filenames in os.walk(rootdir):
for filename in filenames:
filename=filename.strip('.jpg')
#print(filename)
a.append(filename)
return a
names=image_id(rootdir)
for image_id in names:
convert_annotation(image_id)
发现图片比标签多时,就用做差删除无标签图片:
#encoding:utf-8
import os
import os.path
from os import listdir, getcwd
from os.path import join
txt_dir='./label/road04_ins'
pic_dir='./Image/road04_ins'
def txt(rootdir):
a=[]
for parent,dirnames,filenames in os.walk(rootdir):
for filenames in filenames:
filenames=filenames.strip('.json')
a.append(filenames)
return a
def pic(rootdir):
b=[]
for parent,dirnames,filenames in os.walk(rootdir):
for filenames in filenames:
filenames=filenames.strip('.jpg')
b.append(filenames)
return b
txt_set=txt(txt_dir)
txt_set=set(txt_set)
pic_set=pic(pic_dir)
pic_set=set(pic_set)
#comp=txt_set-pic_set
comp=pic_set-txt_set
print("ok")
print(len(comp))
for item in comp:
file=pic_dir+'/'+item+'.jpg'
if os.path.exists(file):
os.remove(file)
print(file)
#for item in comp:
# file=txt_dir+'/'+item+'.json'
# if os.path.exists(file):
# os.remove(file)
# print(file)