提取xml文件中某个特征对应的图片

Aix959

已于 2023-04-07 09:52:57 修改

阅读量243

点赞数

文章标签： python 深度学习

于 2023-04-06 17:41:00 首次发布

本文链接：https://blog.csdn.net/weixin_71370467/article/details/129995208

版权

"""
 提取xml文件中某个特征对应的图片
    分标签写入txt
    分标签存图
"""
import os
import xml.etree.ElementTree as ET
import shutil


# 查询xml文件中的类别 和 对应的路径
def find_label_path(path, label_list):
    """
    :param path:        xml文件路径
    :param label_list:  标签列表
    :return:            {标签1：[图片路径1...]，标签2：[图片路径2...]}
    """
    label_path_dict = {}  # 存储类别,路径字典
    for c in label_list:
        label_path_dict[c] = []
    file_list = os.listdir(path)
    for file in file_list:
        if file.split('.')[1] == 'xml':  # 筛选xml文件
            in_file = open(path + file)  # 读取xml
            tree = ET.parse(in_file)  # 创建xml文件的ET对象
            root = tree.getroot()
            for obj in root.iter('object'):
                label = obj.find('name').text  # 查找类别
                if label in label_list:  # 判断类别
                    for i in root.iter('path'):  # 查找图片路径
                        img_path = i.text
                        label_path_dict[label].append(img_path)
    return label_path_dict


# 根据类别和路径字典  # 把路径写入对应txt文件
def run_txt(path, label_list, save_path):
    """
    :param path:        xml文件路径
    :param label_list:  标签列表
    :param save_path:   文件存储路径
    """
    label_path_dict = find_label_path(path, label_list)
    for label in label_list:  # 重新打开文件并格式化文件
        if os.path.exists(os.getcwd() + f'/{save_path}/{label}' + '.txt'):
            with open(os.getcwd() + f'/{save_path}/{label}' + '.txt', 'w') as f:
                pass
    for label, path_list in label_path_dict.items():
        for path in path_list:
            with open(os.getcwd() + f'/{save_path}/{label}' + '.txt', 'a') as f:
                f.write(f'{label},{path}' + '\n')
            print(f'{label}中写入{path}')


# 根据类别和路径字典 # 把不同类别图片放入各类别文件夹
def run_dir(path, label_list, save_path):
    """
    :param path:        xml文件路径
    :param label_list:  标签列表
    :param save_path:   文件存储路径
    """
    cls_path_dict = find_label_path(path, label_list)
    # for cls in cls_path_dict.keys():
    #     shutil.rmtree(f'./divided_images/{cls}')  # 删除
    for label, img_path_list in cls_path_dict.items():
        os.makedirs(f'./{save_path}/{label}', exist_ok=True)  # 生成文件夹
        label_path = f'./{save_path}/' + label
        for img_path in img_path_list:
            # shutil.move(img_path, cls_path)   # 移动图片
            shutil.copy(img_path, label_path)  # 复制图片
            print(f'{label}中存入{img_path}')


# 计算列表数量，以及多标签的图片
def find_dul_labels(path, label_list):
    """
    :param path:        xml文件路径
    :param label_list:  标签列表
    """
    label_path_dict = find_label_path(path, label_list)
    dict_path = {}  # 以路径为键，标签为值创建字典
    print('**' * 40)
    print('Labels Numbers:')
    total = 0
    for k, v in label_path_dict.items():
        print(k, len(v))
        total += len(v)
        for path in v:
            if path in dict_path:
                dict_path[path].append(k)  # 路径存在则说明多标签
            else:
                dict_path[path] = [k]
    print('Total Labels:', total)
    print('Actual XML Label Files:', len(dict_path.keys()))
    print('Images Dulplicate Labels:')
    for k, v in dict_path.items():
        if len(v) > 1:
            print(k, v)


if __name__ == '__main__':
    path = './test/xml/'  # xml所在文件夹 # 末尾的/不要忘记
    label_list = ['aaa', 'bbb', 'ccc', 'ddd', 'eee']  # 类别列表
    save_path = './divided_images'  # 创建文件存储路径
    os.makedirs(save_path, exist_ok=True)
    run_txt(path, label_list, save_path)  # 路径存入对应txt
    run_dir(path, label_list, save_path)  # 图片存入对应文件夹
    find_dul_labels(path, label_list)  # 计数