将deeplesion数据集转化为coco格式的检测数据集,可参考

"""
Created on 2/26
1. read csv,save as cocostyle dataset:
--dataset/
  --trainset/
    --image/
      --***.jpg
        ***.jpg
        ...
    --annotation/
      --annotation.json
  --validset/
  --testset/
@author: Wu
"""
import json
import os
import numpy as np
import pandas as pd
import re
import SimpleITK as sitk
import cv2
from tqdm import tqdm
from glob import glob
import shutil
from PIL import Image

label_dict = {}
label_dict[1] = 'all_type'
'''
label_dict[1]  = 'bone'
label_dict[2]  = 'abdomen'
label_dict[3]  = 'mediastinum'
label_dict[4] = 'liver'
label_dict[5] = 'lung'
label_dict[6]  = 'kidney'
label_dict[7] = 'soft tissue'
label_dict[8] = 'pelvis'
'''

im_file_path = 'xxx/imgs/'
anns_path = 'xxx/DL_info.csv'
output_root = 'xxx/deeplesion_cocostyle/'

output_path = list()
output_path.append(os.path.join(output_root, 'trainset'))
output_path.append(os.path.join(output_root, 'validset'))
output_path.append(os.path.join(output_root, 'testset'))

def transform_deeplesion2coco(anns_all):

    '''
    input:
    anns_all:  the annotation provided (Dataframe).
    '''

    # 初始化dataset
    Dataset = list()  # Dataset[0]为training set  1为valid set 2为test set
    for i in range(3):
        dataset = dict()
        dataset['images'] = []
        dataset['type'] = 'instances'
        dataset['annotations'] = []
        dataset['categories'] = []
        dataset['info'] = None
        dataset['licenses'] = None
        Dataset.append(dataset)
    annotation_id = [0, 0, 0]
    image_id = [0, 0, 0]

    # add dataset['categories']
    for category_id, category_name in label_dict.items():
        category_item = dict()
        category_item['supercategory'] = category_name
        category_item['id'] = category_id
        category_item['name'] = category_name
        for dataset in Dataset:
            dataset['categories'].append(category_item)


    # 清空待处理目录
    for path in output_path:
        if os.path.exists(path):
            shutil.rmtree(path)
            os.makedirs(os.path.join(path,'image'))
            os.makedirs(os.path.join(path, 'annotation'))
        else:
            os.makedirs(path)
            os.makedirs(os.path.join(path,'image'))
            os.makedirs(os.path.join(path, 'annotation'))
    
    # using a list to choose the tail index of repeated annotations of the same image
    # assume that multi-lesion of the same image has continuous index
    multi_lesion_index = []
    file_name = None
    for index, row in anns_all.iterrows():
        if file_name == row['File_name']:
            multi_lesion_index.append(index)
        file_name = row['File_name']
    

    for index, row in anns_all.iterrows():
        
        file_name = row['File_name']
        datatype = row.Train_Val_Test - 1
        img_path = row['File_name']
        img = cv2.imread(os.path.join(im_file_path, img_path))
        cv2.imwrite(os.path.join(output_path[datatype],'image', file_name[0:-3] + 'jpg'), img)

        # add 'image'
        if index not in multi_lesion_index:
            image = dict()
            image['id'] = image_id[datatype]
            image_id[datatype] = image_id[datatype] + 1
            image['file_name'] = file_name[0:-3] + 'jpg'
            image['width'] = img.shape[1]
            image['height'] = img.shape[0]
            Dataset[datatype]['images'].append(image)

        # add 'annotations'
        annotation_item = dict()
        bbox = [float(i) for i in row.Bounding_boxes.split(',')]
        x1 = min(bbox[0], bbox[2])
        y1 = min(bbox[1], bbox[3])
        x2 = max(bbox[0], bbox[2])
        y2 = max(bbox[1], bbox[3])
        x = x1
        y = y1
        w = int(x2 - x1)
        h = int(y2 - y1)
        annotation_item['segmentation'] = [[x, y, x+w, y, x+w, y+h, x, y+h]]
        annotation_item['image_id'] = image['id']
        annotation_item['iscrowd'] = 0
        annotation_item['bbox'] = [x, y, w, h]
        annotation_item['area'] = w * h
        annotation_item['id'] = annotation_id[datatype]
        annotation_id[datatype] = annotation_id[datatype] + 1
        annotation_item['category_id'] = 1
        Dataset[datatype]['annotations'].append(annotation_item)

        print(f'{index+1}/{anns_all.shape[0]}done!')

    for i in range(3):
        json.dump(Dataset[i], open(os.path.join(output_path[i] ,'annotation', 'annotation.json'), 'w'))




if __name__ == '__main__':
    anns_all = pd.read_csv(anns_path) 
    transform_deeplesion2coco(anns_all)
  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值