yolov5常用数据预处理脚本


一、修改xml中标签

import os
import shutil
from tqdm import tqdm
import xml.etree.ElementTree as ET
from loguru import logger

@logger.catch()
def datasets_info():
    source = './train/temp'
    instance = {}
    all_xml = 0
    for root1, dirs, files in os.walk(source):
        for file in files:
            if file.endswith('.xml'):
                all_xml +=1
                path = os.path.join(root1, file)
                tree = ET.parse(path)
                root = tree.getroot()
                objects = root.findall('object')
                for obj in objects:
                    name = obj.find('name').text
                    if name not in instance:
                        instance[name] = 1
                    else:
                        instance[name] += 1

    print('total: %d' % all_xml)
    print('示例框:', instance)

def modify_label():
    source = './train/temp'
    # front_whee working_luggae_transfer_truck
    for root, dirs, files in os.walk(source):
        for file in files:
            fp = os.path.join(root, file)
            if file.endswith('.xml'):
                tree = ET.parse(fp)
                root1 = tree.getroot()
                objects = root1.findall('object')
                bo = False
                for obj in objects:
                    name = obj.find('name').text
                    if name == 'front_whee':
                        print('label modify: ',fp)
                        obj.find('name').text = 'front_wheel'
                        bo = True
                if bo:
                    tree.write(fp)


if __name__=='__main__':
    modify_label()

二、移动文件以及删除空标签的xml

1、从CVAT下载数据后,数据结构例如: 合肥机场加油.zip 解压后的文件路径
合肥机场加油/Annotations/合肥机场加油/JPEGImages/05号停机位枪机_4_0003306.xml
需要将 xml等文件 移动到 “合肥机场加油/Annotations/ ”下
2、删除没有标签的xml

import os
import shutil
import xml.etree.ElementTree as ET
from loguru import logger

def move_file():
    source = '/media/deepstream/DATA/pan/hefei/2'
    for root, dirs, files in os.walk(source):
        for file in files:
            fp = os.path.join(root, file)
            if file.endswith('.jpg'):
                dp = fp.split('JPEGImages')
                jpg_path = os.path.join(dp[0], 'JPEGImages')
                shutil.move(fp, jpg_path)
                d = os.path.join(jpg_path, dp[1].split('/')[1])
                d_nums = os.listdir(root)
                if os.path.exists(d) and len(d_nums)==0:
                    shutil.rmtree(d)
                    print('remove jpg: ', d)

            if file.endswith('.xml'):
                dp = fp.split('Annotations')
                xml_path = os.path.join(dp[0], 'Annotations')
                shutil.move(fp, xml_path)
                d = os.path.join(xml_path, dp[1].split('/')[1])
                d_nums = os.listdir(root)
                if os.path.exists(d) and len(d_nums) == 0:
                    shutil.rmtree(d)
                    print('remove xml: ', d)

@logger.catch()
def del_null_xml():
    source = './train/temp'
    for root, dirs, files in os.walk(source):
        temp = []
        for file in files:
            if file.endswith('.xml'):
                xml_path = os.path.join(root, file)
                jpg_name = xml_path.replace('Annotations', 'JPEGImages').replace('xml', 'jpg')
                tree = ET.parse(xml_path)
                root1 = tree.getroot()
                objects = root1.findall('object')
                if len(objects) == 0:
                    temp.append(file)
                    logger.info(file)
                    os.remove(jpg_name)
                    os.remove(xml_path)

        logger.info('{} files num: {}'.format(root, len(temp)))

if __name__=='__main__':
    move_file()

三、根据xml文件打mosaic

import os
import xml.etree.ElementTree as ET

from PIL import Image
from tqdm import tqdm

def _mosaic(img):
    s = img.size
    img = img.resize((1, 1))
    img = img.resize(s)
    return img


def mosaic(img, fx, fy, tx, ty):
    c = img.crop((fx, fy, tx, ty))
    c = _mosaic(c)
    img.paste(c, (fx, fy, tx, ty))
    return img


def read_xml(xml_path, f_path,source_dist):
    img = Image.open(f_path)
    f_name = os.path.basename(f_path)
    tree = ET.parse(xml_path)
    root = tree.getroot()
    objs = root.findall('object')
    bo = False
    for obj in objs:
        name = obj.find('name').text
        xmin = obj.find('bndbox').find('xmin').text
        ymin = obj.find('bndbox').find('ymin').text
        xmax = obj.find('bndbox').find('xmax').text
        ymax = obj.find('bndbox').find('ymax').text

        if 'other' in name:
            img = mosaic(img,round(float(xmin)), round(float(ymin)), round(float(xmax)), round(float(ymax)))
            root.remove(obj)
            bo = True
            # continue
    # img.show()
    if bo:
        img.save(os.path.join(source_dist,f_name))
        tree.write(xml_path)
        print(xml_path)

def mosaic_main():
    source = './train/temp'
    for root, dirs, files in os.walk(source):
        for file in files:
            #fp = os.path.join(root, file)
            if file.endswith('.xml'):    
                xml_path = os.path.join(root, file)
                #xml_path = os.path.join(source_xml, n.replace('jpg', 'xml'))
                f_path = xml_path.replace('Annotations', 'JPEGImages').replace('xml', 'jpg')
                source_dist = os.path.dirname(f_path)
                # source_dist = './train/temp1/'
                if os.path.exists(xml_path):
                    read_xml(xml_path,f_path, source_dist)


if __name__=='__main__':
    mosaic_main()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值