功能
1. 分类并统计
把图片分类到对应类比文件夹(根据XML的标签名字自动生成)
统计各类别个数,输出,并以txt格式保存至VOC2007根目录下
* 图片多标签时,图片会被分到multi_targets文件夹下
2. 单类合并
把图片文件还回到 JPEGImages,并删除category文件夹
使用方法
改root:根目录的绝对路径
1. 分类并统计:
用 # 注释掉 def main 函数的 back_to_one_folder 那一行
2. 单类合并:
取消注释。
【不要】动 classify_and_count 那一行
右键,运行(run)
代码
*此代码遇到错误直接跳过,个人实际应用,可视具体情况自行添加。 如果标签都对,则不用改
import os
import shutil
import xml.etree.ElementTree as ET
import argparse
root = "path/to/root"
def main(root):
# 分类并统计
category_folder, images_folder = classify_and_count(root)
# 合并
#back_to_one_folder(category_folder, images_folder)
def classify_and_count(root):
labels_folder = os.path.join(root, 'Annotations')
images_folder = os.path.join(root, 'JPEGImages')
output_folder = os.path.join(images_folder, 'category')
os.makedirs(output_folder, exist_ok=True)
multi_targets_lines = [] # 用于存储multi_targets的内容
class_counts = {} # 用于存储类别统计的数量
multi_num = 0
for label_file in os.listdir(labels_folder):
if label_file.endswith(".xml"):
try:
tree = ET.parse(os.path.join(labels_folder, label_file))
tree_root = tree.getroot()
label_name_elements = tree_root.findall('.//name')
if not label_name_elements:
continue
img = os.path.splitext(label_file)[0] + '.jpg'
source_path = os.path.join(images_folder, img)
if len(label_name_elements) > 1:
multi_num += 1
class_folder = os.path.join(output_folder, 'multi_targets')
for element in label_name_elements:
multi_targets_lines.append("{:25s}: {:>25}".format(img, element.text))
multi_targets_lines.append("\n")
else:
label_name = label_name_elements[0].text
class_folder = os.path.join(output_folder, label_name)
class_counts[label_name] = class_counts.get(label_name, 0) + 1
os.makedirs(class_folder, exist_ok=True)
shutil.move(source_path, os.path.join(class_folder, img))
except ET.ParseError:
pass
except FileNotFoundError:
pass
print("Images have been moved to the corresponding folders.")
count_txt = os.path.join(root, 'classes_num.txt')
with open(count_txt, "w") as f:
for label, count in class_counts.items():
print("{:25s}: {:>5}个".format(label, count))
f.write("{:25s}: {:>5}个\n".format(label, count))
if multi_targets_lines:
print("{:25s}: {:>5}个\n".format("multi_targets", multi_num))
f.write("{:25s}: {:>5}个\n".format("multi_targets", multi_num))
f.write("\n\n以下为多标签图片:\n")
for line in multi_targets_lines:
f.write(line + "\n")
print(f"File {count_txt} has been created and written with content.")
return output_folder, images_folder
def back_to_one_folder(category_folder, images_folder):
# 遍历根文件夹下的所有文件(穿透子文件)
for category_path, _, files in os.walk(category_folder): # os.walk 返回 (当前文件夹路径, 当前文件夹的子文件夹列表, 当前文件夹中的文件列表)
for file in files:
if file.endswith('.jpg'):
source_file_path = os.path.join(category_path, file)
destination_file_path = os.path.join(images_folder, file)
if not os.path.exists(destination_file_path):
shutil.move(source_file_path, destination_file_path)
print(f"已移动文件: {source_file_path} 到 {destination_file_path}")
else:
print(f"文件已存在,跳过: {source_file_path}")
shutil.rmtree(category_folder)
print("All classified images have been moved back.")
if __name__ == '__main__':
# 创建 ArgumentParser 对象
parser = argparse.ArgumentParser(description='Description of your script.')
# 添加命令行参数
parser.add_argument('--root', type=str, default=root, help='Root path for processing.')
# 解析命令行参数
args = parser.parse_args()
# 调用主函数,并传入解析后的参数
main(args.root)