""" 提取xml文件中某个特征对应的图片 分标签写入txt 分标签存图 """ import os import xml.etree.ElementTree as ET import shutil # 查询xml文件中的类别 和 对应的路径 def find_label_path(path, label_list): """ :param path: xml文件路径 :param label_list: 标签列表 :return: {标签1:[图片路径1...],标签2:[图片路径2...]} """ label_path_dict = {} # 存储类别,路径字典 for c in label_list: label_path_dict[c] = [] file_list = os.listdir(path) for file in file_list: if file.split('.')[1] == 'xml': # 筛选xml文件 in_file = open(path + file) # 读取xml tree = ET.parse(in_file) # 创建xml文件的ET对象 root = tree.getroot() for obj in root.iter('object'): label = obj.find('name').text # 查找类别 if label in label_list: # 判断类别 for i in root.iter('path'): # 查找图片路径 img_path = i.text label_path_dict[label].append(img_path) return label_path_dict # 根据类别和路径字典 # 把路径写入对应txt文件 def run_txt(path, label_list, save_path): """ :param path: xml文件路径 :param label_list: 标签列表 :param save_path: 文件存储路径 """ label_path_dict = find_label_path(path, label_list) for label in label_list: # 重新打开文件并格式化文件 if os.path.exists(os.getcwd() + f'/{save_path}/{label}' + '.txt'): with open(os.getcwd() + f'/{save_path}/{label}' + '.txt', 'w') as f: pass for label, path_list in label_path_dict.items(): for path in path_list: with open(os.getcwd() + f'/{save_path}/{label}' + '.txt', 'a') as f: f.write(f'{label},{path}' + '\n') print(f'{label}中写入{path}') # 根据类别和路径字典 # 把不同类别图片放入各类别文件夹 def run_dir(path, label_list, save_path): """ :param path: xml文件路径 :param label_list: 标签列表 :param save_path: 文件存储路径 """ cls_path_dict = find_label_path(path, label_list) # for cls in cls_path_dict.keys(): # shutil.rmtree(f'./divided_images/{cls}') # 删除 for label, img_path_list in cls_path_dict.items(): os.makedirs(f'./{save_path}/{label}', exist_ok=True) # 生成文件夹 label_path = f'./{save_path}/' + label for img_path in img_path_list: # shutil.move(img_path, cls_path) # 移动图片 shutil.copy(img_path, label_path) # 复制图片 print(f'{label}中存入{img_path}') # 计算列表数量,以及多标签的图片 def find_dul_labels(path, label_list): """ :param path: xml文件路径 :param label_list: 标签列表 """ label_path_dict = find_label_path(path, label_list) dict_path = {} # 以路径为键,标签为值创建字典 print('**' * 40) print('Labels Numbers:') total = 0 for k, v in label_path_dict.items(): print(k, len(v)) total += len(v) for path in v: if path in dict_path: dict_path[path].append(k) # 路径存在则说明多标签 else: dict_path[path] = [k] print('Total Labels:', total) print('Actual XML Label Files:', len(dict_path.keys())) print('Images Dulplicate Labels:') for k, v in dict_path.items(): if len(v) > 1: print(k, v) if __name__ == '__main__': path = './test/xml/' # xml所在文件夹 # 末尾的/不要忘记 label_list = ['aaa', 'bbb', 'ccc', 'ddd', 'eee'] # 类别列表 save_path = './divided_images' # 创建文件存储路径 os.makedirs(save_path, exist_ok=True) run_txt(path, label_list, save_path) # 路径存入对应txt run_dir(path, label_list, save_path) # 图片存入对应文件夹 find_dul_labels(path, label_list) # 计数
提取xml文件中某个特征对应的图片
于 2023-04-06 17:41:00 首次发布