labelme2voc

本文转自

labelme格式转VOC2007数据集格式 - 超杰 (spytensor.com)

  • import os

  • import numpy as np

  • import codecs

  • import json

  • from glob import glob

  • import cv2

  • import shutil

  • from sklearn.model_selection import train_test_split

  • #1.标签路径

  • labelme_path = "./labelme/" #原始labelme标注数据路径

  • saved_path = "./VOC2007/" #保存路径

  • #2.创建要求文件夹

  • if not os.path.exists(saved_path + "Annotations"):

  • os.makedirs(saved_path + "Annotations")

  • if not os.path.exists(saved_path + "JPEGImages/"):

  • os.makedirs(saved_path + "JPEGImages/")

  • if not os.path.exists(saved_path + "ImageSets/Main/"):

  • os.makedirs(saved_path + "ImageSets/Main/")

  • #3.获取待处理文件

  • files = glob(labelme_path + "*.json") #这里容易出错要改下

  • files = [i.split("/")[-1].split(".json")[0] for i in files]

  • #4.读取标注信息并写入 xml

  • for json_file_ in files:

  • json_filename = labelme_path + json_file_ + ".json"

  • json_file = json.load(open(json_filename,"r",encoding="utf-8"))

  • height, width, channels = cv2.imread(labelme_path + json_file_ +".jpg").shape

  • with codecs.open(saved_path + "Annotations/"+json_file_ + ".xml","w","utf-8") as xml:

  • xml.write('<annotation>\n')

  • xml.write('\t<folder>' + 'UAV_data' + '</folder>\n')

  • xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')

  • xml.write('\t<source>\n')

  • xml.write('\t\t<database>The UAV autolanding</database>\n')

  • xml.write('\t\t<annotation>UAV AutoLanding</annotation>\n')

  • xml.write('\t\t<image>flickr</image>\n')

  • xml.write('\t\t<flickrid>NULL</flickrid>\n')

  • xml.write('\t</source>\n')

  • xml.write('\t<owner>\n')

  • xml.write('\t\t<flickrid>NULL</flickrid>\n')

  • xml.write('\t\t<name>ChaojieZhu</name>\n')

  • xml.write('\t</owner>\n')

  • xml.write('\t<size>\n')

  • xml.write('\t\t<width>'+ str(width) + '</width>\n')

  • xml.write('\t\t<height>'+ str(height) + '</height>\n')

  • xml.write('\t\t<depth>' + str(channels) + '</depth>\n')

  • xml.write('\t</size>\n')

  • xml.write('\t\t<segmented>0</segmented>\n')

  • for multi in json_file["shapes"]:

  • points = np.array(multi["points"])

  • xmin = min(points[:,0])

  • xmax = max(points[:,0])

  • ymin = min(points[:,1])

  • ymax = max(points[:,1])

  • label = multi["label"]

  • if xmax <= xmin:

  • pass

  • elif ymax <= ymin:

  • pass

  • else:

  • xml.write('\t<object>\n')

  • xml.write('\t\t<name>'+"bubble"+'</name>\n')#这里"bubble"要改成 "label"

  • xml.write('\t\t<pose>Unspecified</pose>\n')

  • xml.write('\t\t<truncated>1</truncated>\n')

  • xml.write('\t\t<difficult>0</difficult>\n')

  • xml.write('\t\t<bndbox>\n')

  • xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')

  • xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')

  • xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')

  • xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')

  • xml.write('\t\t</bndbox>\n')

  • xml.write('\t</object>\n')

  • print(json_filename,xmin,ymin,xmax,ymax,label)

  • xml.write('</annotation>')

  • #5.复制图片到 VOC2007/JPEGImages/下

  • image_files = glob(labelme_path + "*.jpg")

  • print("copy image files to VOC007/JPEGImages/")

  • for image in image_files:

  • shutil.copy(image,saved_path +"JPEGImages/")

  • #6.split files for txt

  • txtsavepath = saved_path + "ImageSets/Main/"

  • ftrainval = open(txtsavepath+'/trainval.txt', 'w')

  • ftest = open(txtsavepath+'/test.txt', 'w')

  • ftrain = open(txtsavepath+'/train.txt', 'w')

  • fval = open(txtsavepath+'/val.txt', 'w')

  • total_files = glob("./VOC2007/Annotations/*.xml")

  • total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]

  • #test_filepath = ""

  • for file in total_files:

  • ftrainval.write(file + "\n")

  • #test

  • #for file in os.listdir(test_filepath):

  • # ftest.write(file.split(".jpg")[0] + "\n")

  • #split

  • train_files,val_files = train_test_split(total_files,test_size=0.15,random_state=42)

  • #train

  • for file in train_files:

  • ftrain.write(file + "\n")

  • #val

  • for file in val_files:

  • fval.write(file + "\n")

  • ftrainval.close()

  • ftrain.close()

  • fval.close()

  • #ftest.close()

注:

  1. 训练集和验证集的划分方法是采用 sklearn.model_selection.train_test_split 进行分割的。
  2. 默认图片格式 .jpg,如果图片格式有变化,请自行修改代码中的 .jpg 名称。
  3. 默认不添加测试集,如果有需要,自行解开注释即可。

 
 
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值