对COCO格式数据集进行可视化

对COCO格式数据集进行可视化

由于有些coco数据集只有图片与train/val的json文件,难以直观看出数据集的标注效果,此办法可一步步拆解数据集并还原至每个图片的labelme格式json

拆解总json为每个图片的coco格式json

'''
从coco的标注文件里提取每张图片对应的json信息,并保存成新的json文件(以instance为例,其他的类似)。
修改处主要有总json文件路径,保存路径
'''
import matplotlib.pyplot as plt
import os, sys, zipfile
import urllib.request
import shutil
import numpy as np
import skimage.io as io
import pylab
import json
from pycocotools.coco import COCO

pylab.rcParams['figure.figsize'] = (8.0, 10.0)

# json_file='nyu_train.coco.json'
json_file='nyu_val.coco.json'  # 总json文件
# json_file='../../../coco dataset/annotations_trainval2017/instances_val2017.json' # # Object Instance 类型的标注
# json_file='./annotations/person_keypoints_val2017.json'  # Object Keypoint 类型的标注格式
# json_file='./annotations/captions_val2017.json' # Image Caption的标注格式

data=json.load(open(json_file,'r'))
imgnumber = len(data['images'])
for i in range(imgnumber):
    data_2={}
    data_2['info']=data['info']
    data_2['licenses']=data['licenses']
    data_2['images']=[data['images'][i]] # 只提取第一张图片
    data_2['categories']=data['categories']
    annotation=[]

    # 通过imgID 找到其所有instance
    imgID=data_2['images'][0]['id']
    #print(imgID)#397133
    # initialize COCO api for instance annotations
    coco=COCO(json_file)
    img = coco.loadImgs([imgID])
    #print(img)#[{'license': 4, 'file_name': '000000397133.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg',
    #            'height': 427, 'width': 640, 'date_captured': '2013-11-14 17:02:52', 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg', 'id': 397133}]
    #print(img['file_name'])
    # load and display image
    # I = io.imread('NYUv2_SEMSEG/val/color/%s' % img[0]['file_name'])
    # use url to load image
    #I = io.imread(img['coco_url'])
    #plt.axis('off')
    #plt.imshow(I)
    #plt.show()

    for ann in data['annotations']:
        if ann['image_id']==imgID:
            annotation.append(ann)

    data_2['annotations']=annotation

    # 保存到新的json
    json.dump(data_2,open('./img_coco_jsons/val/{}.json'.format(str(img[0]['file_name']).split('.')[0]),'w'),indent=4)

可视化每个图片的coco格式json

"""
coco格式的json文件可视化instance的mask
修改处主要有json路径,图片路径
"""
from __future__ import print_function
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import os, sys, zipfile
import urllib.request
import shutil
import numpy as np
import skimage.io as io
import pylab

pylab.rcParams['figure.figsize'] = (8.0, 10.0)

annFile = 'imgjsons/nyu_rgb_0003.json'  # json文件路径
coco = COCO(annFile)

cats = coco.loadCats(coco.getCatIds())
nms = [cat['name'] for cat in cats]

nms = set([cat['supercategory'] for cat in cats])

imgIds = coco.getImgIds()
img = coco.loadImgs(imgIds[0])[0]
# dataType = './satisfied_images_train2017'
dataType = './NYUv2_SEMSEG/train/color'  # 图片路径
I = io.imread('%s/%s' % (dataType, img['file_name']))

plt.axis('off')
plt.imshow(I)
plt.show()

# 加载和可视化instance标注信息
catIds = []
for ann in coco.dataset['annotations']:
    if ann['image_id'] == imgIds[0]:
        catIds.append(ann['category_id'])

plt.imshow(I);
plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
plt.show()

将每个coco格式json转化为labelme格式json

"""
将用coco格式的json转化成labelme标注格式的json
修改处主要有参考labelme格式json路径,类别索引,图片路径,coco格式json路径,保存json路径
"""

import json
import cv2
import numpy as np
import os
import base64

def image_to_base64(image_path):
    # 读取二进制图片,获得原始字节码
    with open(image_path, 'rb') as jpg_file:
        byte_content = jpg_file.read()

    # 把原始字节码编码成base64字节码
    base64_bytes = base64.b64encode(byte_content)

    # 把base64字节码解码成utf-8格式的字符串
    base64_string = base64_bytes.decode('utf-8')

    return base64_string


# 用一个labelme格式的json作为参考,因为很多信息都是相同的,不需要修改。
def reference_labelme_json():
    ref_json_path = 'rgb0.json'
    data = json.load(open(ref_json_path))
    return data


def labelme_shapes(data, data_ref):
    shapes = []
    NAME_MAP = {1: 'bed', 2: 'books', 3: 'chair', 4: 'furniture', 5: 'painting',
                6: 'sofa', 7: 'table', 8: 'tv', 9: 'window'}
    label_num = {'bed': 0, 'books': 1, 'chair': 2, 'furniture': 3, 'painting': 4, 'sofa': 5, 'table': 6, 'tv': 7, 'window': 8}  # 根据你的数据来修改
    for ann in data['annotations']:
        shape = {}
        class_name = [i['name'] for i in data['categories'] if i['id'] == ann['category_id']]
        # label要对应每一类从_1开始编号
        label_num[class_name[0]] += 1
        # shape['label'] = class_name[0] + '_' + str(label_num[class_name[0]])
        shape['label'] = class_name[0]

        # 参考json里没有这两项
        # shape['line_color'] = data_ref['shapes'][0]['line_color']
        # shape['fill_color'] = data_ref['shapes'][0]['fill_color']

        shape['points'] = []
        # ~ print(ann['segmentation'])
        if not type(ann['segmentation']) == list:
            continue
        else:
            x = ann['segmentation'][0][::2]  # 奇数个是x的坐标
            y = ann['segmentation'][0][1::2]  # 偶数个是y的坐标
            for j in range(len(x)):
                shape['points'].append([x[j], y[j]])

            shape['shape_type'] = data_ref['shapes'][0]['shape_type']
            shape['flags'] = data_ref['shapes'][0]['flags']
            shapes.append(shape)
    return shapes


def Coco2labelme(json_path, data_ref):
    with open(json_path, 'r') as fp:
        data = json.load(fp)  # 加载json文件
        data_labelme = {}
        data_labelme['version'] = data_ref['version']
        data_labelme['flags'] = data_ref['flags']

        data_labelme['shapes'] = labelme_shapes(data, data_ref)

        # data_labelme['lineColor'] = data_ref['lineColor']
        # data_labelme['fillColor'] = data_ref['fillColor']
        data_labelme['imagePath'] = data['images'][0]['file_name']

        imgpath = './NYUv2_SEMSEG/train/color/'+ data['images'][0]['file_name']  # 图片路径
        base64_data = image_to_base64(imgpath)
        data_labelme['imageData'] = base64_data
        # ~ data_labelme['imageData'] = data_ref['imageData']

        data_labelme['imageHeight'] = data['images'][0]['height']
        data_labelme['imageWidth'] = data['images'][0]['width']

        return data_labelme


if __name__ == '__main__':
    # root_dir = './ROOT DIR'
    root_dir = './img_coco_jsons/val'  # coco格式json路径
    json_list = os.listdir(root_dir)
    # 参考的json
    data_ref = reference_labelme_json()

    for json_path in json_list:
        if json_path.split('.')[-1] == 'json':
            print('当前文件: ', json_path)
            data_labelme = Coco2labelme(os.path.join(root_dir, json_path), data_ref)
            file_name = data_labelme['imagePath']
            # 保存json文件
            json.dump(data_labelme, open('./labelmejsons/%s.json' % file_name.split('.')[0], 'w'), indent=4)

可能有小伙伴没有现成可供参考的labelme格式的json,或许可以用以下这个做参考

{
  "version": "5.0.1",
  "flags": {},
  "shapes": [
    {
      "label": "c",
      "points": [
        [
          532.4430379746835,
          49.227848101265806
        ],
        [
          534.9746835443037,
          94.79746835443038
        ],
        [
          540.0379746835442,
          101.12658227848101
        ],
        [
          510.9240506329114,
          153.0253164556962
        ],
        [
          510.9240506329114,
          358.08860759493666
        ],
        [
          522.3164556962025,
          379.60759493670884
        ],
        [
          624.8481012658227,
          375.8101265822785
        ],
        [
          634.9746835443037,
          366.9493670886076
        ],
        [
          638.7721518987341,
          166.94936708860757
        ],
        [
          627.379746835443,
          136.56962025316454
        ],
        [
          612.1898734177215,
          115.0506329113924
        ],
        [
          609.6582278481012,
          94.79746835443038
        ],
        [
          617.253164556962,
          84.67088607594937
        ],
        [
          615.9873417721519,
          54.29113924050631
        ],
        [
          607.126582278481,
          50.49367088607593
        ],
        [
          571.6835443037975,
          42.898734177215175
        ]
      ],
      "group_id": null,
      "shape_type": "polygon",
      "flags": {}
    },
    {
      "label": "b",
      "points": [
        [
          645.1012658227847,
          178.34177215189874
        ],
        [
          640.0379746835442,
          485.9367088607595
        ],
        [
          653.9620253164557,
          482.1392405063291
        ],
        [
          657.7594936708861,
          492.2658227848101
        ],
        [
          965.3544303797468,
          499.8607594936709
        ],
        [
          1062.8227848101264,
          502.3924050632911
        ],
        [
          1065.3544303797469,
          491.0
        ],
        [
          1080.5443037974683,
          501.126582278481
        ],
        [
          1084.3417721518988,
          182.1392405063291
        ],
        [
          1071.6835443037974,
          187.2025316455696
        ],
        [
          1067.886075949367,
          180.873417721519
        ],
        [
          666.620253164557,
          177.07594936708858
        ]
      ],
      "group_id": null,
      "shape_type": "polygon",
      "flags": {}
    }
  ],
  "imagePath": "rgb0.jpg",
  "imageData": "/9j/4AA.........pXHqf/Z",
  "imageHeight": 720,
  "imageWidth": 1280
}

处理后即可得到每个图片的labelme格式json,与自己使用labelme标后的格式一样,进行其他后处理

主要参考链接:
https://www.cnblogs.com/taotingz/p/11443209.html

  • 7
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值