你是否遇到过标注完图片数据后发现xml文件比jpg文件要少几个,但是成千上万张图片你有不知道是少了哪个,下面简单的代码可以很好地解决你的问题:
基本思路就是把两个文件夹下的文件名分别放到列表中,对比两个列表不同的地方即可。
# -*- coding: utf-8 -*-
import os
path1 = r'./train'
path2 = r'./train_xml'
def file_name(image_dir,xml_dir):
jpg_list = []
xml_list = []
for root, dirs, files in os.walk(image_dir):
for file in files:
jpg_list.append(os.path.splitext(file)[0])
for root, dirs, files in os.walk(xml_dir):
for file in files:
xml_list.append(os.path.splitext(file)[0])
print(len(jpg_list))
diff = set(xml_list).difference(set(jpg_list)) # 差集,在a中但不在b中的元素
for name in diff:
print("no jpg", name + ".xml")
diff2 = set(jpg_list).difference(set(xml_list)) # 差集,在b中但不在a中的元素
print(len(diff2))
for name in diff2:
print("no xml", name + ".jpg")
if __name__ == '__main__':
file_name(path1,path2)