python中的xml操作

提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档


前言

提示:主要是用python操作xml

因为最近模型训练的时候遇到很多xml相关操作,为省事记录相关脚本

import traceback
import xml.etree.ElementTree as ET
import os.path as osp
import os
import shutil
from pathlib import Path
from tqdm import tqdm

一、修改xml里的图片名和label

def rename_xml(xml_path, new_name, new_Annotations):
    try:
        in_file = open(xml_path)
        tree = ET.parse(in_file)
        root = tree.getroot()
        root.find('filename').text = new_name
        root.find('folder').text = 'JPEGImages'
        for obj in root.iter('object'):
            label = obj.find('name').text
            if label == 'person' or label == 'headshoulder':#headshoulder
                continue
            elif label == 'head_shoulder':
                obj.find('name').text = 'headshoulder'
            else:
                root.remove(obj)
                print(label)
        objs = root.findall('object')
        if len(objs) > 0:
            tree.write(osp.join(new_Annotations, new_name.replace('jpg', 'xml')))
        else:
            print('no labels !')
    except:
        print(xml_path)

二、删除xml里的多余label

def delete_label(xml_path, labels):
    in_file = open(xml_path)
    try:
        tree = ET.parse(in_file)
        root = tree.getroot()
        objs = root.findall('object')
        temp = []
        if len(objs) == 0:
            return labels
        for obj in objs:
            label = obj.find('name').text
            if 'shoulder' in label:
                obj.find('name').text = 'headshoulder'
                temp.append(label)
            elif label == 'person':
                temp.append(label)
            else:
                root.remove(obj)
        p = '/temp' #保存路径
        if not os.path.exists(p):
            os.makedirs(p)
        if len(set(temp)) == 2:
            tree.write(Path(p)/Path(xml_path).name)
        return list(set(temp))

    except Exception:
        traceback.print_exc()
        print(xml_path)

    in_file.close()


def run():
    root = '/media/VOC2012' # 
    JPEGImages = osp.join(root, 'JPEGImages')
    Annotations = osp.join(root, 'Annotations')
    labels = []
    ind = 0
    for jpg_file in tqdm(os.listdir(JPEGImages)):
        # print('total: %d, i=%d' % (len(os.listdir(JPEGImages)), ind))
        new_name = str(0) * (6 - len(str(ind))) + str(ind)
        xml_name = jpg_file.replace('jpg', 'xml')
        xml_path = osp.join(Annotations, xml_name)
        if osp.exists(xml_path):
            labels = delete_label(xml_path,labels)

    p = '/media/deepstream/潘执政电话17865572379/数据集/headshoulder/VOC/VOCdevkit/VOC2012/temp'
    size = len(os.listdir(p))
    print('all num: ',size)


def mv_jpg():
    root = '/VOC2012/'
    JPG_ = '/JPEGImages_new'
    JPEGImages = osp.join(root, 'JPEGImages')
    Annotations = osp.join(root, 'temp')
    for xml_file in tqdm(os.listdir(Annotations)):
        jpg_name = xml_file.replace('xml', 'jpg')
        jpg_path = osp.join(JPEGImages, jpg_name)
        if osp.exists(jpg_path):
            # print('ddd')
            shutil.copy(jpg_path,Path(JPG_)/jpg_name)

if __name__ == '__main__':
    run()

2.读入数据

代码如下(示例):

data = pd.read_csv(
    'https://labfile.oss.aliyuncs.com/courses/1283/adult.data.csv')
print(data.head())

该处使用的url网络请求的数据。


总结

提示:这里对文章进行总结:
例如:以上就是今天要讲的内容,本文仅仅简单介绍了pandas的使用,而pandas提供了大量能使我们快速便捷地处理数据的函数和方法。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值