VOC2012的数据集感觉不是很够,因此将部分COCO数据集转为VOC格式。做的是目标检测的,因此主要还是annotation的部分。
因为VOC格式中的需要的数据其实主要是图片id、bbox数据已经分类资料,所以我先从COCO数据集中的json文件中截取出了我需要的内容
其中这些类就是和VOC重合的类别,共20类。
import json
className = {
1:'person',
16:'bird',
17:'cat',
21:'cow',
18:'dog',
19:'horse',
20:'sheep',
5:'aeroplane',
2:'bicycle',
9:'boat',
6:'bus',
3:'car',
4:'motorbike',
7:'train',
44:'bottle',
62:'chair',
67:'dining table',
64:'potted plant',
63:'sofa',
72:'tvmonitor'
}
classNum = [1,2,3,4,5,6,7,9,16,17,18,19,20,21,44,62,63,64,67,72]
def writeNum(Num):
with open("COCO_train.json","a+") as f:
f.write(str(Num))
# with open("instances_val2014.json","r+") as f:
# data = json.load(f)
# annData = data["annotations"]
# print(annData[0])
# for x in annData[0]:
# if(x == "image_id"):
# print(type(x))
# print(x+ ":" + str(annData[0][x]))
# if (x == "image_id" or x == "bbox" or x == "category_id"):
# print(x + ":" + annData[0][x])
# if (x == "image_id" or x == "bbox" or x == "category_id"):
# print(x+ ":" + annData[0][x])
# with open("test.json","w") as f:
# json.dump(annData, f, ensure_ascii=False)
inputfile = []
inner = {}
##向test.json文件写入内容
with open("instances_train2014.json","r+") as f:
allData = json.load(f)
data = allData["annotations"]
print(data[1])
print("read ready")
for i in data:
if(i[