目标检测—将自己的标注文件转换为COCO格式

目录

COCO数据集格式简介:

images键,其全部的信息如下

annotations,全部信息如下

categories,全部信息如下

一个简单的例子

将自己的数据集转换为COCO格式:

定义一个MyData2COCO类

调用定义好的类实现转换

整个程序文件可以在此链接下载



COCO数据集格式简介:

COCO的文件夹主要包含标注文件夹和图片文件夹,格式如下

标注文件主要包括5个主keys,如下(官网http://cocodataset.org/#format-data

{
"info": info,                 # 可以设置为"mydata"
"images": [image],            # list的形式为所有图片的数据
"annotations": [annotation],  # list的形式,所有图片的标注信息
"categories": [category],     # 类别信息
"licenses": [license],        # 可以设置为['mylicenses']
}

images键,其全部的信息如下

image{
"id": int,                 
"width": int, 
"height": int, 
"file_name": str, 
"license": int, 
"flickr_url": str, 
"coco_url": str, 
"date_captured": datetime,
}

简化后的信息可以如下

image{
"id": int, 
"width": int, 
"height": int, 
"file_name": str, 
}

annotations,全部信息如下

annotation{
"id": int, 
"image_id": int, 
"category_id": int,               # 就是类别的编号,也就是label
"segmentation": RLE or [polygon], 
"area": float, 
"bbox": [x,y,width,height],       # 注意一下格式
"iscrowd": 0 or 1,
}

categories,全部信息如下

categories[{
"id": int,
"name": str, 
"supercategory": str,
}]

一个简单的例子

{
"info": 'coco',
"license": ['none'],
"images": [
{
"height": 224,
"width": 224,
"id": 0,
"file_name": 'figure1.jpg'
},
{
"height": 224,
"width": 224,
"id": 1,
"file_name": 'figure2.jpg'
}]
"annotations":[
{
"id": 0,
"image_id": 0,
"category_id": 10,
"segmentation": [[1, 1, 1, 1, 1, 1, 1, 1],],
"bbox": [0, 0, 1, 1],
"iscrowd": 0,
"area": 1
},]
"categories": [
{
"id": 0,
"name": "1",
"supercategory": "name"
},]
}

将自己的数据集转换为COCO格式:

原始数据格式如下

[
    {
        "name": "T2019_0.jpg",   # 图片名
        "category": 0,           # 该bbox对应的类别
        "bbox": {
            "x": 20642,
            "y": 20295,
            "w": 163,
            "h": 134
        }
    },]

定义一个MyData2COCO类

class MyData2COCO:

    def __init__(self):                                     # 初始化

    def _categories(self, num_categories):                  # 获取categories信息
      
    def _image(self, path, h, w):                           # 获取images信息

    def _annotation(self, label, bbox):                     # 获取annotations信息

    def to_coco(self, anno_file, img_dir, num_categories):  # 转换实现函数
  
    def save_coco_json(self, instance, save_path):          # 保存文件
def __init__(self):
    self.images = []        # 存储images键对应的数据
    self.annotations = []   # 存储annotations键对应的数据
    self.categories = []    # 存储categories键对应的数据
    self.img_id = 0         # 统计image的id
    self.ann_id = 0         # 统计annotation的id
def _categories(self, num_categories):   # num_categories 为总的类别数
    for i in range(0, num_categories):
        category = {}
        category['id'] = i
        category['name'] = str(i)             # 可根据实际需要修改
        category['supercategory'] = 'name'    # 可根据实际需要修改
        self.categories.append(category)
def _image(self, path, h, w):
    image = {}
    image['height'] = h
    image['width'] = w
    image['id'] = self.img_id
    image['file_name'] = os.path.basename(path)
    return image
def _annotation(self, label, bbox):
    bbox = list(bbox.values())
    area = bbox[2] * bbox[3]
    points = [[bbox[0], bbox[1]], [bbox[0] + bbox[2], bbox[1]], [bbox[2], bbox[1] + bbox[3]], [bbox[0], bbox[1] + bbox[3]]]
    annotation = {}
    annotation['id'] = self.ann_id
    annotation['image_id'] = self.img_id
    annotation['category_id'] = label
    annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
    annotation['bbox'] = bbox
    annotation['iscrowd'] = 0
    annotation['area'] = area
    return annotation
def to_coco(self, anno_file, img_dir, num_categories):
    """
    anno_file: 自己数据的文件路径
    img_dir: 图片文件夹路径(coco分为train和calid)
    num_categories: bbox对应的总类别数目
    """
    self._categories(num_categories)  # 初始化categories基本信息

    with open(anno_file, "r") as f_json:
        all_anno_pd = pd.read_json(f_json)
    img_names = os.listdir(img_dir)
    for img_name in tqdm.tqdm(img_names):
         each_img_anno = all_anno_pd[all_anno_pd["name"] == img_name]
         bboxs = each_img_anno["bbox"].tolist()
         labels = each_img_anno["category"].tolist()
         assert each_img_anno["name"].unique()[0] == img_name
         for bbox, label in zip(bboxs, labels):
             annotation = self._annotation(label, bbox)
             self.annotations.append(annotation)
             self.ann_id += 1

         img_path = os.path.join(img_dir, img_name)
         img = cv2.imread(img_path)
         h, w, c = img.shape
         self.images.append(self._image(img_path, h, w))

         self.img_id += 1
    instance = {}
    instance['info'] = 'mydata2coco'
    instance['license'] = ['none']
    instance['images'] = self.images
    instance['annotations'] = self.annotations
    instance['categories'] = self.categories
    return instance
def save_coco_json(self, instance, save_path):
    with open(save_path, 'w') as fp:
        json.dump(instance, fp, indent=1, separators=(',', ': '))

调用定义好的类实现转换

if __name__ == '__main__':

    train_imgdir = ""
    valid_imgdir = ""
    anno_dir = ""
    save_path = ""

    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    fabric2coco_train = MyData2COCO()
    train_instance = fabric2coco_train.to_coco(anno_dir, train_imgdir)
    save_path_name = os.path.join(save_path, 'anno_train.json')
    fabric2coco_train.save_coco_json(train_instance, save_path_name)

    fabric2coco_valid = MyData2COCO()
    valid_instance = fabric2coco_valid.to_coco(anno_dir, valid_imgdir)
    save_path_name = os.path.join(save_path, 'anno_valid.json')
    fabric2coco_valid.save_coco_json(valid_instance, save_path_name)

整个程序文件可以在此链接下载

https://download.csdn.net/download/u011797832/11941496

  • 4
    点赞
  • 35
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值