前言:使用labelimg标注生成的xml或json文件,为了防止标注时漏标导致图集和xml(json)文件不一致,需要将不一一对应的文件(图片、xml)删除掉
预期结果
一、一个文件夹
图片和xml文件在同一个文件夹
# -*- coding: utf-8 -*-
import os
import sys
from PIL import Image
input_folder = r'G:\dataSets\211218\image_xmls' # 源文件夹,包含.png格式图片
output_folder = r'G:\komla\DataSets\a' # 输出文件夹
# training_data=[]
# 防止图片是png格式 上述代码可删除使用
path1 = input_folder
def file_name(file_dir):
jpg_list = []
json_list = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.png' or os.path.splitext(file)[1] == '.jpg':
jpg_list.append(os.path.splitext(file)[0])
elif os.path.splitext(file)[1] == '.xml':
json_list.append(os.path.splitext(file)[0])
diff = set(json_list).difference(set(jpg_list))
print(len(diff))
for name in diff:
print("no jpg", name + ".xml")
os.remove(file_dir + "/" + name + ".xml")
diff2 = set(jpg_list).difference(set(json_list))
print(len(diff2))
for name in diff2:
print("no json", name + ".jpg")
os.remove(file_dir + "/" + name + ".jpg")
return jpg_list, json_list
file_name(path1)
二、两个文件夹
图片在一个文件夹,xml文件在另一个文件夹
# -*- coding: utf-8 -*-
import os
import sys
from PIL import Image
picture_folder = r'F:\test\dataSets\220308\images' # 源文件夹,包含.png格式图片
xml_folder = r'F:\test\dataSets\220308\xmls' # 输出文件夹
# training_data=[]
# 防止图片是png格式 上述代码可删除使用
path1 = picture_folder
def file_name(picture_dir,xml_dir):
jpg_list = []
xml_list = []
for root, dirs, files in os.walk(picture_dir):
for file in files:
if os.path.splitext(file)[1] == '.png' or os.path.splitext(file)[1] == '.jpg':
jpg_list.append(os.path.splitext(file)[0])
for root, dirs, files in os.walk(xml_dir):
for file in files:
if os.path.splitext(file)[1] == '.xml':
xml_list.append(os.path.splitext(file)[0])
xml_diff = set(xml_list).difference(set(jpg_list))
print(len(xml_diff))
for name in xml_diff:
print("no jpg", name + ".xml")
os.remove(xml_dir + "/" + name + ".xml")
jpg_diff = set(jpg_list).difference(set(xml_list))
print(len(jpg_diff))
for name in jpg_diff:
print("no json", name + ".jpg")
os.remove(picture_dir + "/" + name + ".jpg")
return jpg_list, xml_list
file_name(picture_folder,xml_folder)