文件夹里有很多图片以及对图片使用labelme标注生成的json文件,并不是所有的图片都进行了标注,有json文件对应的图片也可能存在没有目标的情况,相应的json文件内shapes为空列表,需要筛选出所有有标注且标注json内存在目标的图片
并基于筛选的结果,把未删选出来的图片和对应的json删除,代码见下:
import os
import json
def check_annotation(json_path):
with open(json_path, 'r') as f:
data = json.load(f)
return bool(data.get('shapes'))
def get_annotated_images(folder_path):
annotated_images = []
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith('.json'):
json_path = os.path.join(root, file)
image_path = os.path.splitext(json_path)[0] + '.jpg'
if os.path.exists(image_path) and check_annotation(json_path):
annotated_images.append(image_path)
return annotated_images
def delete_unannotated_files(folder_path, annotated_images):
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith('.json') or file.endswith('.jpg'):
file_path = os.path.join(root, file)
if file.split(".")[0] not in annotated_images:
os.remove(file_path)
if __name__ == "__main__":
# 用法示例
folder_path = '/path/to/your/folder'
annotated_images = get_annotated_images(folder_path)
pick = [os.path.basename(img.split(".")[0]) for img in annotated_images]
delete_unannotated_files(folder_path, pick)