xml2txt

#!/usr/bin/env python
# -*- coding: utf8 -*-
import os
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
import cv2
from glob import glob

XML_EXT = '.xml'
ENCODE_METHOD = 'utf-8'

class PascalVocReader:
    def __init__(self, filepath):
        # shapes type:
        # [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult]
        self.shapes = []
        self.filepath = filepath
        self.verified = False
        try:
            self.parseXML()
        except:
            pass

    def getShapes(self):
        return self.shapes

    def addShape(self, label, bndbox, filename, difficult):
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
        self.shapes.append((label, points, filename, difficult))

    def parseXML(self):
        assert self.filepath.endswith(XML_EXT), "Unsupport file format"
        parser = etree.XMLParser(encoding=ENCODE_METHOD)
        xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
        filename = xmltree.find('filename').text
        path = xmltree.find('path').text
        try:
            verified = xmltree.attrib['verified']
            if verified == 'yes':
                self.verified = True
        except KeyError:
            self.verified = False

        for object_iter in xmltree.findall('object'):
            bndbox = object_iter.find("bndbox")
            label = object_iter.find('name').text
            # Add chris

            difficult = False
            if object_iter.find('difficult') is not None:
                difficult = bool(int(object_iter.find('difficult').text))
            self.addShape(label, bndbox, path, difficult)
        return True


classes = dict()
num_classes = 0

try:
    input = raw_input
except NameError:
    pass

dirpath =  r'E:\application\labelImg_windows_v1.8.1\data\train_0_10\train_0_10_xml'  #input("Directory path with XML files: ")  #xml路径
classes_txt =  r'E:\application\labelImg_windows_v1.8.1\data\train_0_10\classes.txt' #input("File containing classes: ")  #
ext = '.jpg'  #input("Image file extension: ")  '.jpg'
dirpath_img = r'E:\application\labelImg_windows_v1.8.1\data\train_0_10\train_0_10_jpg'  #图像路径
dirpat_txt =  r'E:\application\labelImg_windows_v1.8.1\data\train_0_10\train_0_10_txt' #转化成txt后,存放路径


if os.path.isfile(classes_txt):
    with open(classes_txt, "r") as f:
        class_list = f.read().strip().split()
        classes = {k : v for (v, k) in enumerate(class_list)}

filePaths = glob(dirpath_img + "/*" + ext)  #图像路径+文件
xmlNames =  os.listdir(dirpath)

# filePaths = glob(dirpath_img + "/*.xml")

#xmlName = xmlNames[0]
for xmlName in xmlNames:
    # filePath = os.path.splitext(filePath)[0]
    # fileName = os.path.splitext(filePath)[-2]
    imgFile = dirpath_img + '/' + xmlName.split(".")[0] + ".jpg"
    txtFile = dirpat_txt + '/' + xmlName.split(".")[0] + ".txt"
    xmlFile = dirpath + '/' + xmlName.split(".")[0] + ".xml"
    with open(txtFile, "w") as f:
        if not os.path.isfile(xmlFile):
            continue

        tVocParseReader = PascalVocReader(xmlFile)
        shapes = tVocParseReader.getShapes()

        for shape in shapes:
            class_name = shape[0]
            box = shape[1]
            filename = imgFile  #dirpath_img +  ext   #filePath + ext

            if class_name not in classes.keys():
                classes[class_name] = num_classes
                num_classes += 1
            class_idx = classes[class_name]
            print(filename)
            (height, width, _) = cv2.imread(filename).shape
            cv2.imread(filename)

            coord_min = box[0]
            coord_max = box[2]

            xcen = float((coord_min[0] + coord_max[0])) / 2 / width
            ycen = float((coord_min[1] + coord_max[1])) / 2 / height
            w = float((coord_max[0] - coord_min[0])) / width
            h = float((coord_max[1] - coord_min[1])) / height

            f.write("%d %.06f %.06f %.06f %.06f\n" % (class_idx, xcen, ycen, w, h))
            print(class_idx, xcen, ycen, w, h)

# with open(dirpath + "/classes.txt", "w") as f:
#     for key in classes.keys():
#         f.write("%s\n" % key)
#         print(key)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值