yolo格式、voc格式、coco格式相互转换(xml，json，txt)

最新推荐文章于 2024-08-05 11:19:50 发布

三寸光阴___

最新推荐文章于 2024-08-05 11:19:50 发布

阅读量1.9w

点赞数 11

分类专栏： python 深度学习

本文链接：https://blog.csdn.net/qq_38109843/article/details/90783347

版权

python 同时被 2 个专栏收录

24 篇文章 1 订阅

订阅专栏

深度学习

20 篇文章 5 订阅

订阅专栏

yolo转voc

keras版yolov3训练格式是name box class这种形式，转voc格式使用一下代码，根据别人的代码改了一点。list.txt为yolo的标签，转换的voc格式的标签为.xml文件，都存放在Annotations下。

from xml.dom.minidom import Document
from lxml.etree import Element, SubElement, tostring
import pprint
from xml.dom.minidom import parseString
import cv2
class XmlMaker:

    def __init__(self,txtpath,xmlpath):
        self.txtPath = txtpath
        self.xmlPath = xmlpath
        self.txtList = []

    def readtxt(self):
        jpg = []
        txtfile = open(self.txtPath,"r",encoding='gbk',errors='ignore')
        self.txtList = txtfile.readlines()
        for i in self.txtList:
            jpg = i.strip().split(" ")[0]
            xys = i.strip().split(" ")[1:]
            #print(xys)
            node_root = Element('annotation')
            node_folder = SubElement(node_root, 'folder')
            node_folder.text = 'VOC2012'
            node_filename = SubElement(node_root, 'filename')
            node_filename.text = jpg

            img = cv2.imread(jpg)
            shape = img.shape

            node_size = SubElement(node_root, 'size')
            node_width = SubElement(node_size, 'width')

            node_width.text = str(shape[1])

            node_height = SubElement(node_size, 'height')
            node_height.text = str(shape[0])

            node_depth = SubElement(node_size, 'depth')
            node_depth.text = '3'

            for xy in xys:
                list_xy = xy.split(",")
                for tmp in list_xy:
                    x_min = list_xy[0]
                    y_min = list_xy[1]
                    x_max = list_xy[2]
                    y_max = list_xy[3]
                    classes = list_xy[4]
                    node_object = SubElement(node_root, 'object')
                    node_name = SubElement(node_object, 'name')
                    node_name.text = 'person'
                    node_difficult = SubElement(node_object, 'difficult')
                    node_difficult.text = '0'
                    node_bndbox = SubElement(node_object, 'bndbox')
                    node_xmin = SubElement(node_bndbox, 'xmin')
                    node_xmin.text = str(x_min)
                    node_ymin = SubElement(node_bndbox, 'ymin')
                    node_ymin.text = str(y_min)
                    node_xmax = SubElement(node_bndbox, 'xmax')
                    node_xmax.text = str(x_max)
                    node_ymax = SubElement(node_bndbox, 'ymax')
                    node_ymax.text = str(y_max)


            xml = tostring(node_root, pretty_print=True)  # 格式化显示，该换行的换行
            xml_name = jpg.split("/")[-1][:-4]+".xml"
            print(xml_name)
            with open(self.xmlPath+"/"+xml_name, "wb") as f:
                f.write(xml)
                f.close()

if __name__ == "__main__":
    read =XmlMaker("list.txt","Annotations")
    read.readtxt()

voc转keras版yolo

import xml.etree.ElementTree as ET
from os import getcwd

sets=[('2012', 'train'), ('2012', 'val')]

classes = ["cola","milk tea","ice tea","beer","shampoo","toothpaste","soap","pear","apple","orange"]

def convert_annotation(year, image_id, list_file):
    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
    tree=ET.parse(in_file)
    root = tree.getroot()

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))

wd = getcwd()

for year, image_set in sets:
    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w')
    for image_id in image_ids:
        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
        convert_annotation(year, image_id, list_file)
        list_file.write('\n')
    list_file.close()

u版yolo格式转voc

import os, sys
import glob
from PIL import Image
import argparse

def txtLabel_to_xmlLabel(classes_file,source_txt_path,source_img_path,save_xml_path):
    if not os.path.exists(save_xml_path):
        os.makedirs(save_xml_path)
    classes = open(classes_file).read().splitlines()
    print(classes)
    for file in os.listdir(source_txt_path):
        img_path = os.path.join(source_img_path,file.replace('.txt','.png'))
        img_file = Image.open(img_path)
        txt_file = open(os.path.join(source_txt_path,file)).read().splitlines()
        print(txt_file)
        xml_file = open(os.path.join(save_xml_path,file.replace('.txt','.xml')), 'w')
        width, height = img_file.size
        xml_file.write('<annotation>\n')
        xml_file.write('\t<folder>simple</folder>\n')
        xml_file.write('\t<filename>' + str(file) + '</filename>\n')
        xml_file.write('\t<size>\n')
        xml_file.write('\t\t<width>' + str(width) + ' </width>\n')
        xml_file.write('\t\t<height>' + str(height) + '</height>\n')
        xml_file.write('\t\t<depth>' + str(3) + '</depth>\n')
        xml_file.write('\t</size>\n')

        for line in txt_file:
            print(line)
            line_split = line.split(' ')
            x_center = float(line_split[1])
            y_center = float(line_split[2])
            w = float(line_split[3])
            h = float(line_split[4])
            xmax = int((2*x_center*width + w*width)/2)
            xmin = int((2*x_center*width - w*width)/2)
            ymax = int((2*y_center*height + h*height)/2)
            ymin = int((2*y_center*height - h*height)/2)

            xml_file.write('\t<object>\n')
            xml_file.write('\t\t<name>'+ str(classes[int(line_split[0])]) +'</name>\n')
            xml_file.write('\t\t<pose>Unspecified</pose>\n')
            xml_file.write('\t\t<truncated>0</truncated>\n')
            xml_file.write('\t\t<difficult>0</difficult>\n')
            xml_file.write('\t\t<bndbox>\n')
            xml_file.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
            xml_file.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
            xml_file.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
            xml_file.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
            xml_file.write('\t\t</bndbox>\n')
            xml_file.write('\t</object>\n')
        xml_file.write('</annotation>')

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--classes_file', type=str, default="person.names")
    parser.add_argument('--source_txt_path', type=str, default="")
    parser.add_argument('--source_img_path', type=str, default="")
    parser.add_argument('--save_xml_path', type=str, default="")
    opt = parser.parse_args()

    txtLabel_to_xmlLabel(opt.classes_file,opt.source_txt_path,opt.source_img_path,opt.save_xml_path)

voc转u版yolo

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import sys

classes = ['person']  
 
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
 
def convert_annotation(image_id,xml_path):
    in_file = open(xml_path+'%s.xml'%(image_id))
    out_file = open(sys.argv[4]+'%s.txt'%(image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
 
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

if __name__ == "__main__":
    
    xml_path = sys.argv[1]
    xml_names = os.listdir(xml_path)
 
    list_file = open(sys.argv[2], 'w')
    print(list_file)
    for xml_name in xml_names:
        img_name = xml_name.replace(".xml",".png")    
        list_file.write(sys.argv[3]+'%s\n'%img_name)
        image_id = img_name[:-4]
        convert_annotation(image_id,xml_path)
        print(image_id)
    list_file.close()

coco转keras版yolo

import json
from collections import defaultdict

name_box_id = defaultdict(list)
id_name = dict()
f = open(
    "coco/annotations/instances_train2014.json",
    encoding='utf-8')
data = json.load(f)

annotations = data['annotations']
for ant in annotations:
    id = ant['image_id']
    name = 'coco/train2014/COCO_train2014_%012d.jpg' % id
    cat = ant['category_id']

    if cat >= 1 and cat <= 11:
        cat = cat - 1
    elif cat >= 13 and cat <= 25:
        cat = cat - 2
    elif cat >= 27 and cat <= 28:
        cat = cat - 3
    elif cat >= 31 and cat <= 44:
        cat = cat - 5
    elif cat >= 46 and cat <= 65:
        cat = cat - 6
    elif cat == 67:
        cat = cat - 7
    elif cat == 70:
        cat = cat - 9
    elif cat >= 72 and cat <= 82:
        cat = cat - 10
    elif cat >= 84 and cat <= 90:
        cat = cat - 11

    name_box_id[name].append([ant['bbox'], cat])

f = open('train.txt', 'w')
for key in name_box_id.keys():
    f.write(key)
    box_infos = name_box_id[key]
    for info in box_infos:
        x_min = int(info[0][0])
        y_min = int(info[0][1])
        x_max = x_min + int(info[0][2])
        y_max = y_min + int(info[0][3])

        box_info = " %d,%d,%d,%d,%d" % (
            x_min, y_min, x_max, y_max, int(info[1]))
        f.write(box_info)
    f.write('\n')
f.close()

coco2017抽取person类并转为u版yolo格式

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw

#the path you want to save your results for coco to voc
savepath="../coco_person/"
img_dir=savepath+'images/train2017/'
anno_dir=savepath+'Annotations/train2017/'
# datasets_list=['train2014', 'val2014']
# datasets_list=['train2014']
datasets_list=['train2017']
classes_names = ["person"] 

#Store annotations and train2014/val2014/... in this folder
dataDir= '/mldb/dataset/COCO/'  

headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""

tailstr = '''\
</annotation>
'''

#if the dir is not exists,make it,else delete it
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
    classes=dict()
    for cls in coco.dataset['categories']:
        classes[cls['id']]=cls['name']
    return classes

def write_xml(anno_path,head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
    f.write(tail)


def save_annotations_and_imgs(coco,dataset,filename,objs):
    #eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
    anno_path=anno_dir+filename[:-3]+'xml'
    img_path=dataDir+dataset+'/'+filename
    # print(img_path)
    dst_imgpath=img_dir+filename
    print(img_path)

    img=cv2.imread(img_path)
    # print(img)
    if (img.shape[2] == 1):
        print(filename + " not a RGB image")
        return

    #shutil.copy(img_path, dst_imgpath)

    head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path,head, objs, tail)


def showimg(coco,dataset,img,classes,cls_id,show=True):
    global dataDir
    #I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))
    #Get the annotated information by ID
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
    # print(annIds)
    anns = coco.loadAnns(annIds)
    # print(anns)
    # coco.showAnns(anns)
    objs = []
    for ann in anns:
        class_name=classes[ann['category_id']]
        if class_name in classes_names:
            #print(class_name)
            if 'bbox' in ann:
                bbox=ann['bbox']
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                # draw = ImageDraw.Draw(I)
                # draw.rectangle([xmin, ymin, xmax, ymax])
    if show:
        plt.figure()
        plt.axis('off')
        plt.imshow(I)
        plt.show()

    return objs

for dataset in datasets_list:
    #./COCO/annotations/instances_train2014.json
    annFile='{}/annotations/instances_{}.json'.format(dataDir,dataset)

    #COCO API for initializing annotated data
    coco = COCO(annFile)
    '''
    When the COCO object is created, the following information will be output:
    loading annotations into memory...
    Done (t=0.81s)
    creating index...
    index created!
    So far, the JSON script has been parsed and the images are associated with the corresponding annotated data.
    '''
    #show all classes in coco
    classes = id2name(coco)
    #print(classes)
    #[1, 2, 3, 4, 6, 8]
    classes_ids = coco.getCatIds(catNms=classes_names)
    #print(classes_ids)
    # exit()
    for cls in classes_names:
        #Get ID number of this class
        cls_id=coco.getCatIds(catNms=[cls])
        img_ids=coco.getImgIds(catIds=cls_id)
        #print(cls,len(img_ids))
        # imgIds=img_ids[0:10]
        for imgId in tqdm(img_ids):
            img = coco.loadImgs(imgId)[0]
            filename = img['file_name']
            # print(filename)
            objs=showimg(coco, dataset, img, classes,classes_ids,show=False)
            #print(objs)
            save_annotations_and_imgs(coco, dataset, filename, objs)

过滤没有object的

import os

Dir = '../coco_person/Annotations/train2017'
ImageDir = '../coco_person/images/train2017'
cnt = 0
for i, file_name in enumerate(os.listdir(Dir)):
    fsize = os.path.getsize(os.path.join(Dir,file_name))
    if fsize == 410:
        print('removing {} of size{}'.format(file_name,fsize))
        os.remove(os.path.join(Dir, file_name))
        cnt += 1

print('remove {} files'.format(cnt))

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
 
 
classes = ['person']  
 
 
 
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
 
def convert_annotation(image_id):
    in_file = open('../coco_person/Annotations/train2017/%s.xml'%(image_id))
    out_file = open('../coco_person/labels/train2017/%s.txt'%(image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
 
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 
 
data_path = '../coco_person/Annotations/train2017/'
xml_names = os.listdir(data_path)
 
list_file = open('../coco_person/class_train.txt', 'w')
for xml_name in xml_names:
    if not os.path.exists('../coco_person/labels/train2017'):
        os.makedirs('../coco_person/labels/train2017')
    img_name = xml_name.replace(".xml",".jpg")
 
    list_file.write('/mldb/dataset/COCO/train2017/%s\n'%img_name)
    image_id = img_name[:-4]
    convert_annotation(image_id)
    print(image_id)
 
list_file.close()