"""
Created on 2/26
1. read csv,save as cocostyle dataset:
--dataset/
--trainset/
--image/
--***.jpg
***.jpg
...
--annotation/
--annotation.json
--validset/
--testset/
@author: Wu
"""
import json
import os
import numpy as np
import pandas as pd
import re
import SimpleITK as sitk
import cv2
from tqdm import tqdm
from glob import glob
import shutil
from PIL import Image
label_dict = {}
label_dict[1] = 'all_type'
'''
label_dict[1] = 'bone'
label_dict[2] = 'abdomen'
label_dict[3] = 'mediastinum'
label_dict[4] = 'liver'
label_dict[5] = 'lung'
label_dict[6] = 'kidney'
label_dict[7] = 'soft tissue'
label_dict[8] = 'pelvis'
'''
im_file_path = 'xxx/imgs/'
anns_path = 'xxx/DL_info.csv'
output_root = 'xxx/deeplesion_cocostyle/'
output_path = list()
output_path.append(os.path.join(output_root, 'trainset'))
output_path.append(os.path.join(output_root, 'validset'))
output_path.append(os.path.join(output_root, 'testset'))
def transform_deeplesion2coco(anns_all):
'''
input:
anns_all: the annotation provided (Dataframe).
'''
# 初始化dataset
Dataset = list() # Dataset[0]为training set 1为valid set 2为test set
for i in range(3):
dataset = dict()
dataset['images'] = []
dataset['type'] = 'instances'
dataset['annotations'] = []
dataset['categories'] = []
dataset['info'] = None
dataset['licenses'] = None
Dataset.append(dataset)
annotation_id = [0, 0, 0]
image_id = [0, 0, 0]
# add dataset['categories']
for category_id, category_name in label_dict.items():
category_item = dict()
category_item['supercategory'] = category_name
category_item['id'] = category_id
category_item['name'] = category_name
for dataset in Dataset:
dataset['categories'].append(category_item)
# 清空待处理目录
for path in output_path:
if os.path.exists(path):
shutil.rmtree(path)
os.makedirs(os.path.join(path,'image'))
os.makedirs(os.path.join(path, 'annotation'))
else:
os.makedirs(path)
os.makedirs(os.path.join(path,'image'))
os.makedirs(os.path.join(path, 'annotation'))
# using a list to choose the tail index of repeated annotations of the same image
# assume that multi-lesion of the same image has continuous index
multi_lesion_index = []
file_name = None
for index, row in anns_all.iterrows():
if file_name == row['File_name']:
multi_lesion_index.append(index)
file_name = row['File_name']
for index, row in anns_all.iterrows():
file_name = row['File_name']
datatype = row.Train_Val_Test - 1
img_path = row['File_name']
img = cv2.imread(os.path.join(im_file_path, img_path))
cv2.imwrite(os.path.join(output_path[datatype],'image', file_name[0:-3] + 'jpg'), img)
# add 'image'
if index not in multi_lesion_index:
image = dict()
image['id'] = image_id[datatype]
image_id[datatype] = image_id[datatype] + 1
image['file_name'] = file_name[0:-3] + 'jpg'
image['width'] = img.shape[1]
image['height'] = img.shape[0]
Dataset[datatype]['images'].append(image)
# add 'annotations'
annotation_item = dict()
bbox = [float(i) for i in row.Bounding_boxes.split(',')]
x1 = min(bbox[0], bbox[2])
y1 = min(bbox[1], bbox[3])
x2 = max(bbox[0], bbox[2])
y2 = max(bbox[1], bbox[3])
x = x1
y = y1
w = int(x2 - x1)
h = int(y2 - y1)
annotation_item['segmentation'] = [[x, y, x+w, y, x+w, y+h, x, y+h]]
annotation_item['image_id'] = image['id']
annotation_item['iscrowd'] = 0
annotation_item['bbox'] = [x, y, w, h]
annotation_item['area'] = w * h
annotation_item['id'] = annotation_id[datatype]
annotation_id[datatype] = annotation_id[datatype] + 1
annotation_item['category_id'] = 1
Dataset[datatype]['annotations'].append(annotation_item)
print(f'{index+1}/{anns_all.shape[0]}done!')
for i in range(3):
json.dump(Dataset[i], open(os.path.join(output_path[i] ,'annotation', 'annotation.json'), 'w'))
if __name__ == '__main__':
anns_all = pd.read_csv(anns_path)
transform_deeplesion2coco(anns_all)
将deeplesion数据集转化为coco格式的检测数据集,可参考
最新推荐文章于 2022-02-06 00:52:06 发布