首先来了解一下voc数据集格式:
<annotation>
<folder>VOC2007</folder> #图片来源
<filename>000001.jpg</filename> #图片名称
<source> #图片来源相关信息(不重要)
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>341012865</flickrid>
</source>
<owner> #所有者信息
<flickrid>Fried Camels</flickrid>
<name>Jinky the Fruit Bat</name>
</owner>
<size> #图像尺寸
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
<segmented>0</segmented> #是否用于分割(在图像物体识别中01无所谓)
<object> #标注个体
<name>dog</name> #物体类别
<pose>Unspecified</pose> #拍摄角度
<truncated>1</truncated> #是否被截断(0表示完整)
<difficult>0</difficult> #目标是否难以识别(0表示容易识别)
<bndbox> #物体bbox位置,以证明为左上角及右下角,详情证明查看本栏目另一篇标点文章
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
</annotation>
再来看一下json的格式:其中label为物体类别,boxes中点位置为左上角及右下角
{
"shape": [
{
"label": "xxx",
"boxes": [
xxx,
xxx,
xxx,
xxx
],
"points": null
}
],
"imagePath": "xxxxxxxxxxxx.jpg"
}
代码:
import json
import os
import cv2
headstr = """\
<annotation>
<folder>VOC</folder>
<filename>%s</filename>
<source>
<database>Database</database>
</source>
<size>
<width>%d</width>
<height>%d</height>
<depth>%d</depth>
</size>
<segmented>0</segmented>
"""
objstr = """\
<object>
<name>%s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%d</xmin>
<ymin>%d</ymin>
<xmax>%d</xmax>
<ymax>%d</ymax>
</bndbox>
</object>
"""
tailstr = '''\
</annotation>
'''
def write_xml(anno_path, head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
f.write(tail)
def json_to_xml(jpg_path,json_path, xml_path):
if not os.path.exists(xml_path): #创建xml生成路径
os.makedirs(xml_path)
json_dir = os.listdir(json_path) #获取json列表
jpg_dir = os.listdir(jpg_path) #获取图片列表
for file in json_dir:
file_list = file.split(".")
with open(os.path.join(json_path, file), 'r') as load_f:
load_dict = json.load(load_f)
shape = load_dict["shape"] #通过字典关键字获取内容
# headstr
for file_jpg in jpg_dir: #获取图片信息
jpg_list = file_jpg.split(".")
if jpg_list[0] == file_list[0]:
img = cv2.imread("E:/data/pig/train_img/"+file_jpg)
sp = img.shape # 获取照片信息
height = sp[0] # 长
width = sp[1] # 宽
depth = sp[2] # 通道数
break
else:
continue
points= shape[0]["points"]
# print("file_list:",file_list)
# print("sp:",sp,"\theight:",height,"\twidth:",width,"\tdepth:",depth)
xml_name = file.replace("json", "xml") #文件名后缀替换
filename = file.replace("json","jpg")
head = headstr % (filename, width, height, depth) #向头部添加信息
# objstr
dataset = []
# 依次获取目标信息
for region in shape:
# print(region)
name = region["label"]
# print(name)
xmin = region["boxes"][0]
ymin = region["boxes"][1]
xmax = region["boxes"][2]
ymax = region["boxes"][3]
print("xmin:",xmin,"\tymin:",ymin,"\txmax:",xmax,"\tymax:",ymax)
dataset.append([name, xmin, ymin, xmax, ymax])
tail = tailstr
write_xml(os.path.join(xml_path, xml_name), head, dataset, tail)
if __name__ == '__main__':
jpg_path = "E:/data/img" # 该目录为存放jpg文件路径
json_path = "E:/data/json" # 该目录为存放json文件路径
xml_path = "E:/data/xml" # 该目录为放xml文件路径
json_to_xml(jpg_path,json_path, xml_path)
参考内容: