labelimg 相关知识、数据处理脚本(VOC、XML)

👨‍💻个人简介: 深度学习图像领域工作者
🎉总结链接:
             链接中主要是个人工作的总结,每个链接都是一些常用demo,代码直接复制运行即可。包括:
                    📌1.工作中常用深度学习脚本
                    📌2.torch、numpy等常用函数详解
                    📌3.opencv 图片、视频等操作
                    📌4.个人工作中的项目总结(纯干活)
              链接: https://blog.csdn.net/qq_28949847/article/details/128552785
🎉视频讲解: 以上记录,通过B站等平台进行了视频讲解使用,可搜索 ‘Python图像识别’ 进行观看
              B站:Python图像识别
              抖音:Python图像识别
              西瓜视频:Python图像识别


github网址:https://github.com/heartexlabs/labelImg

相关脚本

1. 筛选出没有标签xml文件的jpg图片(xml 和 jpg 必须对应)


import os
import shutil


'''
将jpg 和 xml 对应的文件移动到另一个文件夹,用来排除掉没有标签的图片
'''
# xml路径
path_xml = r'\\SHARE\public\Time_Plus\traindata\bbox\tiaoshui3m\tiaoshui3m'
# 图片路径
path_img = path_xml
# 保存路径
save_result = r'./results'

for root, dirs, files in os.walk(path_xml):
    for file in files:
        file_name, file_suffix = os.path.splitext(file)
        if file_suffix != '.jpg':
            continue
        file_xml = os.path.join(path_img, file_name + '.xml')
        print(file_name)
        if not os.path.exists(file_xml):
            file_txt = os.path.join(root, file)
            shutil.move(file_txt, os.path.join(save_result, file_name + '.jpg'))
            # shutil.move(file_txt, os.path.join(save_result, file))

2. 删除掉xml中没有标签的图片

import os
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element

path_xml = r'\\SHARE\public\Time_Plus\traindata\bbox\tiaoyuan\20230418_gjx'
path_jpg = r'\\SHARE\public\Time_Plus\traindata\bbox\tiaoyuan\20230418_gjx'
for root, dirs, files in os.walk(path_xml):
    for file in files:
        filename, extension = os.path.splitext(file)
        if 'xml' not in extension:
            continue
        xml_file = os.path.join(root, file)
        tree = ET.parse(xml_file)
        xml_root = tree.getroot()

        try:
            object = xml_root.find('object')
            print(object.find('bndbox').find('xmin').text)
        except:
            print('移除文件:', xml_file)
            os.remove(xml_file)
            os.remove(os.path.join(path_jpg, filename + '.jpg'))

3. 统计文件夹下xml中所有label的数量,以及xml文件的数量

# -*- coding:utf-8 -*-
import os
import xml.etree.ElementTree as ET
import numpy as np

xml_path = r'D:\lg\BaiduSyncdisk\project\person_code\project_self\14_traffic_sign_detect\data\traffic_sign_1\Annotations'

dic_class = {}
xml_num = 0

for root, dirs, files in os.walk(xml_path):
    for file in files:
        filename, extension = os.path.splitext(file)
        if 'xml' not in extension:
            continue
        xml_file = os.path.join(root, file)
        tree = ET.parse(xml_file)
        objects = []
        for obj in tree.findall('object'):
            cls = obj.find('name').text
            if cls not in dic_class.keys():
                dic_class[cls] = 1
            else:
                dic_class[cls] += 1
        xml_num += 1

# 生成对应格式
new_dict = {}
counter = 0

for key in dic_class:
    new_dict[key] = counter
    counter += 1

for key, value in new_dict.items():
    print(value, ': ', key)
print('各类标签数量为:', dic_class)
print('类别数量:', len(dic_class))
print('总图片数量为:', xml_num)
print('对应:', new_dict)


4. 修改xml文件中的某些值(打标签过程中,难免会有写标签会标错,可以用程序直接修改)




import os
import xml.etree.ElementTree as ET


path_xml = r'\\SHARE\public\Time_Plus\traindata\bbox\dangang\20230420_zxl'
save_path = r'\\SHARE\public\Time_Plus\traindata\bbox\dangang\20230420_zxl'

for root, dirs, files in os.walk(path_xml):
    for file in files:
        filename, extension = os.path.splitext(file)
        if 'xml' not in extension:
            continue
        xml_file = os.path.join(root, file)
        tree = ET.parse(xml_file)
        folder = tree.find('folder').text
        filename = tree.find('filename').text
        sz = tree.find('size')
        for object in tree.findall('object'):
            cls = object.find('name').text
            if cls == 'personf':
                object.find('name').text = 'person'
                print(file, cls, '错误标签')

        tree.write(os.path.join(save_path, file), encoding="utf-8", xml_declaration=True)

5. 将图片和标签分成JPEGImages和Annotations保存

import os
import shutil


'''
将图片和标签文件移动到目标目录下,形成jpg放在JPEGImages下,xml放在Annotations下

path_ori:是主目录,这个目录下有多个文件夹
'''
path_ori = r'\\SHARE\public\Time_Plus\traindata\bbox'

path_dir = os.listdir(path_ori)
# 先对文件夹进行遍历
for dir in path_dir:
    for root, dirs, files in os.walk(os.path.join(path_ori, dir)):
        if len(files) > 0:
            imgs_dir = os.path.join(root, "JPEGImages")
            labs_dir = os.path.join(root, "Annotations")
            os.makedirs(imgs_dir)
            os.makedirs(labs_dir)

        for file in files:
            filename, extension = os.path.splitext(file)
            if 'xml' in extension:
                ann = os.path.join(root, file)
                shutil.move(ann, labs_dir)
            elif 'jpg' in extension:
                img = os.path.join(root, file)
                shutil.move(img, imgs_dir)
            elif 'json' in extension:
                json = os.path.join(root, file)
                shutil.move(json, labs_dir)


6. 分为 train 和 test文件夹

import os
import shutil
import random

path = r'D:\lg\BaiduSyncdisk\project\moving_object_tracker\data\traindata\traindata\1'
dirs = os.listdir(path)
for dir in dirs:
    for root, dirs, files in os.walk(os.path.join(path, dir)):
        # 删除掉原本存在的  Annotations 和 JPEGImages 文件夹
        if 'Annotations' in root:
            os.rmdir(root)
            continue
        if 'JPEGImages' in root:
            os.rmdir(root)
            continue

        path_ori = root
        print(path_ori)
        scale = 0.9

        imgs_dir = os.path.join(path_ori, "JPEGImages")
        labs_dir = os.path.join(path_ori, "Annotations")

        img_train = os.path.join(path_ori, "train", "JPEGImages")
        lab_train = os.path.join(path_ori, "train", "Annotations")

        img_val = os.path.join(path_ori, "test", "JPEGImages")
        lab_val = os.path.join(path_ori, "test", "Annotations")

        for path in [img_train, img_val, lab_train, lab_val]:
            os.makedirs(path)

        labs_list = os.listdir(labs_dir)
        labs_list.sort()
        lens = len(labs_list)
        idx_list = [i for i in range(lens)]
        # 将列表进行了打乱
        random.shuffle(idx_list)

        for i, idx in enumerate(idx_list):
            lab_name = labs_list[idx]
            file_name, ext = os.path.splitext(lab_name)
            img_path = os.path.join(imgs_dir, file_name + '.jpg')
            lab_path = os.path.join(labs_dir, lab_name)
            if i < lens * scale:  # 作为训练集
                img_train_path = os.path.join(img_train, file_name + ".jpg")
                lab_train_path = os.path.join(lab_train, file_name + ".xml")
                shutil.move(img_path, img_train_path)
                shutil.move(lab_path, lab_train_path)
            else:
                img_val_path = os.path.join(img_val, file_name + ".jpg")
                lab_val_path = os.path.join(lab_val, file_name + ".xml")
                shutil.move(img_path, img_val_path)
                shutil.move(lab_path, lab_val_path)

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Python图像识别

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值