# script by dapeng:760715590@qq.comimport os
import argparse
import json
import xml.etree.ElementTree as ET
from typing import Dict, List
import re
defget_label2id(labels_path:str)-> Dict[str,int]:"""id is 1 start"""withopen(labels_path,'r')as f:
labels_str = f.read().split()
labels_ids =list(range(1,len(labels_str)+1))returndict(zip(labels_str, labels_ids))defget_annpaths(ann_dir_path:str=None,
ann_ids_path:str=None,
ext:str='',
annpaths_list_path:str=None)-> List[str]:# If use annotation paths listif annpaths_list_path isnotNone:withopen(annpaths_list_path,'r')as f:
ann_paths = f.read().split()return ann_paths
# If use annotaion ids list
ext_with_dot ='.'+ ext if ext !=''else''withopen(ann_ids_path,'r')as f:
ann_ids = f.read().split()
ann_paths =[os.path.join(ann_dir_path, aid+ext_with_dot)for aid in ann_ids]return ann_paths
defget_image_info(annotation_root, extract_num_from_imgid=True):
path = annotation_root.findtext('path')if path isNone:
filename = annotation_root.findtext('filename')else:
filename = os.path.basename(path)
img_name = os.path.basename(filename)
img_id = os.path.splitext(img_name)[0]if extract_num_from_imgid andisinstance(img_id,str):
img_id =int(re.findall(r'\d+', img_id)[0])
size = annotation_root.find('size')
width =int(size.findtext('width'))
height =int(size.findtext('height'))
image_info ={'file_name': filename,'height': height,'width': width,'id': img_id
}return image_info
defget_coco_annotation_from_obj(obj, label2id):
label = obj.findtext('name')assert label in label2id,f"Error: {label} is not in label2id !"
category_id = label2id[label]
bndbox = obj.find('bndbox')
xmin =int(bndbox.findtext('xmin'))-1
ymin =int(bndbox.findtext('ymin'))-1
xmax =int(bndbox.findtext('xmax'))
ymax =int(bndbox.findtext('ymax'))assert xmax > xmin and ymax > ymin,f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"
o_width = xmax - xmin
o_height = ymax - ymin
ann ={'area': o_width * o_height,'iscrowd':0,'bbox':[xmin, ymin, o_width, o_height],'category_id': category_id,'ignore':0,'segmentation':[]# This script is not for segmentation}return ann
defconvert_xmls_to_cocojson(annotation_paths: List[str],
label2id: Dict[str,int],
output_jsonpath:str,
extract_num_from_imgid:bool=True):
output_json_dict ={"images":[],"type":"instances","annotations":[],"categories":[]}
bnd_id =1# START_BOUNDING_BOX_ID, TODO input as args ?for a_path in annotation_paths:# Read annotation xml
ann_tree = ET.parse(a_path)
ann_root = ann_tree.getroot()
img_info = get_image_info(annotation_root=ann_root,
extract_num_from_imgid=extract_num_from_imgid)
img_id = img_info['id']
output_json_dict['images'].append(img_info)for obj in ann_root.findall('object'):
ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)
ann.update({'image_id': img_id,'id': bnd_id})
output_json_dict['annotations'].append(ann)
bnd_id = bnd_id +1for label, label_id in label2id.items():
category_info ={'supercategory':'none','id': label_id,'name': label}
output_json_dict['categories'].append(category_info)withopen(output_jsonpath,'w')as f:
output_json = json.dumps(output_json_dict)
f.write(output_json)print('Convert successfully !')defmain():
parser = argparse.ArgumentParser(
description='This script support converting voc format xmls to coco format json')
parser.add_argument('--ann_dir',type=str, default='./Annotations')
parser.add_argument('--ann_ids',type=str, default='./ImageSets/Main/test.txt')
parser.add_argument('--ann_paths_list',type=str, default=None)
parser.add_argument('--labels',type=str, default='./labels.txt')
parser.add_argument('--output',type=str, default='./output/annotations/test.json')
parser.add_argument('--ext',type=str, default='xml')
args = parser.parse_args()
label2id = get_label2id(labels_path=args.labels)
ann_paths = get_annpaths(
ann_dir_path=args.ann_dir,
ann_ids_path=args.ann_ids,
ext=args.ext,
annpaths_list_path=args.ann_paths_list
)
convert_xmls_to_cocojson(
annotation_paths=ann_paths,
label2id=label2id,
output_jsonpath=args.output,
extract_num_from_imgid=True)if __name__ =='__main__':ifnot os.path.exists('./output/annotations'):
os.makedirs('./output/annotations')
main()
4. 拷贝图像文件
# copy image script by dapeng:760715590@qq.comimport os
import shutil
images_file_path ='./JPEGImages/'
split_data_file_path ='./ImageSets/Main/'
new_images_file_path ='./output/'ifnot os.path.exists(new_images_file_path +'train'):
os.makedirs(new_images_file_path +'train')ifnot os.path.exists(new_images_file_path +'val'):
os.makedirs(new_images_file_path +'val')ifnot os.path.exists(new_images_file_path +'test'):
os.makedirs(new_images_file_path +'test')
dst_train_Image = new_images_file_path +'train/'
dst_val_Image = new_images_file_path +'val/'
dst_test_Image = new_images_file_path +'test/'
total_txt = os.listdir(split_data_file_path)for i in total_txt:
name = i[:-4]if name =='train':
txt_file =open(split_data_file_path + i,'r')for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line +'.jpg'
dstImage = dst_train_Image + line +'.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()elif name =='val':
txt_file =open(split_data_file_path + i,'r')for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line +'.jpg'
dstImage = dst_val_Image + line +'.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()elif name =='test':
txt_file =open(split_data_file_path + i,'r')for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line +'.jpg'
dstImage = dst_test_Image + line +'.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()else:print("Error, Please check the file name of folder")