COCO数据集介绍及使用cocoapi对COCO数据集人体关键点进行可视化

最新推荐文章于 2024-08-07 09:49:14 发布

baddeku

最新推荐文章于 2024-08-07 09:49:14 发布

阅读量7.9k

点赞数 5

分类专栏：关键点检测 COCO python 文章标签：可视化深度学习

本文链接：https://blog.csdn.net/qq_30283085/article/details/107736828

版权

关键点检测同时被 3 个专栏收录

4 篇文章 1 订阅

订阅专栏

python

4 篇文章 1 订阅

订阅专栏

COCO

3 篇文章 0 订阅

订阅专栏

COCO数据集标注详解：https://www.cnblogs.com/leebxo/p/10607955.html#object-instance-%E7%B1%BB%E5%9E%8B%E7%9A%84%E6%A0%87%E6%B3%A8%E6%A0%BC%E5%BC%8F

上文博客介绍的很清楚。

要使用人体关键点的数据，详细介绍：

{
    "info": { 
        "description": "COCO 2017 Dataset",
        "url": "http://cocodataset.org",
        "version": "1.0",
        "year": 2017,
        "contributor": "COCO Consortium",
        "date_created": "2017/09/01"
    },
    "licenses": [
        {
            "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
            "id": 1,
            "name": "Attribution-NonCommercial-ShareAlike License"
        },
        ……
        ……
    ],
    "images": [
        {
            "license": 4,
            "file_name": "000000397133.jpg", # 图片名
            "coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg", # coco 链接地址
            "height": 427, # 高
            "width": 640, # 宽
            "date_captured": "2013-11-14 17:02:52", # 获取日期
            "flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg", # flickr 链接地址
            "id": 397133 # 图片ID（每张图片ID唯一）
        }
    ],
    "categories": [
        {
            "supercategory": "person", # 主类
            "id": 1,  # class id
            "name": "person", # 子类（具体类别）
            "keypoints": [ # 相比Object Instance多了这个字段
                "nose",
                "left_eye",
                "right_eye",
                "left_ear",
                "right_ear",
                "left_shoulder",
                "right_shoulder",
                "left_elbow",
                "right_elbow",
                "left_wrist",
                "right_wrist",
                "left_hip",
                "right_hip",
                "left_knee",
                "right_knee",
                "left_ankle",
                "right_ankle"
            ],
            "skeleton": [ # 骨架
                [
                    16,14
                ],
                [
                    14,12
                ],
               ……
               ……
                [
                    5,7
                ]
            ]
        }
    ],
    "annotations": [
        {
            "segmentation": [
                [
                    446.71,70.66, # 多边形(对象mask)第一个点 x，y
                    466.07,72.89,
                    471.28,78.85,
                    473.51,88.52,
                    473.51,98.2,
                   ……
                   ……
                    443.74,69.92
                ]
            ],
            "num_keypoints": 13, # 关键点数
            "area": 17376.91885,
            "iscrowd": 0,
            "keypoints": [
                # v=0 表示这个关键点没有标注（这种情况下x=y=v=0）
                # v=1 表示这个关键点标注了但是不可见(被遮挡了）
                # v=2 表示这个关键点标注了同时也可见
                433,94,2, # x,y,v 
                434,90,2,
                0,0,0,
                443,98,2,
                0,0,0,
                ……
                ……
            ],
            "image_id": 397133, # 对应的图片ID
            "bbox": [
                388.66,69.92,109.41,277.62 # [x,y,w,h] 对象定位框
            ],
            "category_id": 1, # 类别id
            "id": 200887 # 对象id（每个对象id都是唯一的，即不能出现重复）
        },
        ……
        ……
    ]
}

使用cocoapi中的函数，进行可视化。

对COCO类中showAnns()可视化分割结果部分进行注释，即不展示分割注释的部分。

下面是一张图片标注信息的展示(两个person实例，手动打的换行方便观察)：

[{'segmentation': [[140.25, 347.14, 149.92, 300.39, 154.76, 281.04, 167.66, 253.64, 216.02, 226.23, 245.04, 205.27, 267.61, 185.93, 274.06, 155.3, 277.28, 150.46, 283.73, 150.46, 288.56, 206.88, 286.95, 229.45, 259.55, 240.74, 261.16, 303.61, 254.71, 382.6, 241.81, 385.83, 228.92, 458.37, 225.69, 511.57, 257.93, 514.79, 253.1, 526.08, 199.9, 526.08, 203.12, 411.62, 183.78, 480.94, 183.78, 500.29, 193.45, 505.12, 196.68, 508.35, 193.45, 514.79, 166.05, 521.24, 154.76, 521.24, 161.21, 489, 159.6, 455.15, 164.43, 429.35, 169.27, 411.62, 154.76, 384.21]], 'num_keypoints': 10, 'area': 27560.57365, 'iscrowd': 0, 'keypoints': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180, 171, 1, 226, 171, 1, 0, 0, 0, 270, 221, 2, 0, 0, 0, 271, 162, 1, 184, 293, 2, 209, 291, 2, 182, 404, 2, 216, 420, 2, 165, 498, 2, 213, 505, 2], 'image_id': 512836, 'bbox': [140.25, 150.46, 148.31, 375.62], 'category_id': 1, 'id': 214281},

{'segmentation': [[64.36, 96.01, 70.44, 80.45, 89.58, 78.06, 97.35, 84.64, 100.94, 100.79, 96.16, 104.98, 103.93, 116.34, 113.5, 116.94, 112.9, 150.43, 103.93, 152.83, 87.18, 137.87, 106.92, 158.81, 108.12, 173.76, 112.9, 215.03, 115.3, 226.99, 80.6, 231.78, 82.4, 125.31, 62.06, 119.33]], 'num_keypoints': 11, 'area': 4663.56705, 'iscrowd': 0, 'keypoints': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, 101, 2, 61, 121, 1, 88, 123, 2, 0, 0, 0, 108, 146, 2, 0, 0, 0, 98, 115, 2, 65, 188, 1, 87, 188, 2, 77, 243, 1, 92, 244, 1, 78, 280, 1, 92, 286, 1], 'image_id': 512836, 'bbox': [62.06, 78.06, 53.24, 153.72], 'category_id': 1, 'id': 1243067}]

用下面的代码进行可视化（不可视化分割注释效果，如需可视化取消showAnns()内代码的注释）:

代码如下，对函数功能及用法做了注释：

import skimage.io as io
import pylab
import time as time
import json
import numpy as np
from collections import defaultdict
import itertools
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection


def _isArrayLike(obj):
    return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
class COCO:
    def __init__(self, annotation_file=None):
        """
        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
        :param annotation_file (str): location of annotation file
        :param image_folder (str): location to the folder that hosts images.
        :return:
        """
        # load dataset
        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
        if not annotation_file == None:
            print('loading annotations into memory...')
            tic = time.time()
            dataset = json.load(open(annotation_file, 'r'))
            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
            print('Done (t={:0.2f}s)'.format(time.time()- tic))
            self.dataset = dataset
            self.createIndex()
    def createIndex(self):
        # create index
        print('creating index...')
        anns, cats, imgs = {}, {}, {}
        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
        if 'annotations' in self.dataset:
            for ann in self.dataset['annotations']:
                imgToAnns[ann['image_id']].append(ann)
                anns[ann['id']] = ann

        if 'images' in self.dataset:
            for img in self.dataset['images']:
                imgs[img['id']] = img

        if 'categories' in self.dataset:
            for cat in self.dataset['categories']:
                cats[cat['id']] = cat

        if 'annotations' in self.dataset and 'categories' in self.dataset:
            for ann in self.dataset['annotations']:
                catToImgs[ann['category_id']].append(ann['image_id'])

        print('index created!')

        # create class members
        self.anns = anns
        self.imgToAnns = imgToAnns
        self.catToImgs = catToImgs
        self.imgs = imgs
        self.cats = cats
    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
        """
        filtering parameters. default skips that filter.
        :param catNms (str array)  : get cats for given cat names
        :param supNms (str array)  : get cats for given supercategory names
        :param catIds (int array)  : get cats for given cat ids
        :return: ids (int array)   : integer array of cat ids
        """
        catNms = catNms if _isArrayLike(catNms) else [catNms]
        supNms = supNms if _isArrayLike(supNms) else [supNms]
        catIds = catIds if _isArrayLike(catIds) else [catIds]

        if len(catNms) == len(supNms) == len(catIds) == 0:
            cats = self.dataset['categories']

        else:
            cats = self.dataset['categories']
            # print(' ')
            # print('keypoints的cat就只有人1种')
            # print(cats)
            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms]
            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds]
            # print(cats)
        ids = [cat['id'] for cat in cats]
        return ids
    def loadCats(self, ids=[]):
        """
        Load cats with the specified ids.
        :param ids (int array)       : integer ids specifying cats
        :return: cats (object array) : loaded cat objects
        """
        if _isArrayLike(ids):
            return [self.cats[id] for id in ids]
        elif type(ids) == int:
            return [self.cats[ids]]
    def getImgIds(self, imgIds=[], catIds=[]):
        '''
        Get img ids that satisfy given filter conditions.
        :param imgIds (int array) : get imgs for given ids
        :param catIds (int array) : get imgs with all given cats
        :return: ids (int array)  : integer array of img ids
        '''
        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
        catIds = catIds if _isArrayLike(catIds) else [catIds]

        if len(imgIds) == len(catIds) == 0:
            ids = self.imgs.keys()
        else:
            ids = set(imgIds)
            for i, catId in enumerate(catIds):
                if i == 0 and len(ids) == 0:
                    ids = set(self.catToImgs[catId])
                else:
                    ids &= set(self.catToImgs[catId])
        return list(ids)
    def loadImgs(self, ids=[]):
        """
        Load anns with the specified ids.
        :param ids (int array)       : integer ids specifying img
        :return: imgs (object array) : loaded img objects
        """
        if _isArrayLike(ids):
            return [self.imgs[id] for id in ids]
        elif type(ids) == int:
            return [self.imgs[ids]]
    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
        """
        Get ann ids that satisfy given filter conditions. default skips that filter
        :param imgIds  (int array)     : get anns for given imgs
               catIds  (int array)     : get anns for given cats
               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
               iscrowd (boolean)       : get anns for given crowd label (False or True)
        :return: ids (int array)       : integer array of ann ids
        """
        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
        catIds = catIds if _isArrayLike(catIds) else [catIds]

        if len(imgIds) == len(catIds) == len(areaRng) == 0:
            anns = self.dataset['annotations']
        else:
            #根据imgIds找到所有的ann
            if not len(imgIds) == 0:
                lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
                anns = list(itertools.chain.from_iterable(lists))
            else:
                anns = self.dataset['annotations']
            #通过各类条件如catIds对anns进行筛选
            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
        if not iscrowd == None:
            ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
        else:

            ids = [ann['id'] for ann in anns]
        return ids
    def loadAnns(self, ids=[]):
        """
        Load anns with the specified ids.
        :param ids (int array)       : integer ids specifying anns
        :return: anns (object array) : loaded ann objects
        """
        if _isArrayLike(ids):
            return [self.anns[id] for id in ids]
        elif type(ids) == int:
            return [self.anns[ids]]
    def showAnns(self, anns):
        """
        Display the specified annotations.
        :param anns (array of object): annotations to display
        :return: None
        """
        if len(anns) == 0:
            return 0
        if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
            datasetType = 'instances'
        elif 'caption' in anns[0]:
            datasetType = 'captions'
        else:
            raise Exception('datasetType not supported')
        if datasetType == 'instances':
            ax = plt.gca()
            ax.set_autoscale_on(False)
            polygons = []
            color = []
            for ann in anns:
                c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
                # if 'segmentation' in ann:
                #     if type(ann['segmentation']) == list:
                #         # polygon
                #         for seg in ann['segmentation']:
                #             poly = np.array(seg).reshape((int(len(seg)/2), 2))
                #             polygons.append(Polygon(poly))
                #             color.append(c)
                #     else:
                #         # mask
                #         t = self.imgs[ann['image_id']]
                #         if type(ann['segmentation']['counts']) == list:
                #             rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
                #         else:
                #             rle = [ann['segmentation']]
                #         m = maskUtils.decode(rle)
                #         img = np.ones( (m.shape[0], m.shape[1], 3) )
                #         if ann['iscrowd'] == 1:
                #             color_mask = np.array([2.0,166.0,101.0])/255
                #         if ann['iscrowd'] == 0:
                #             color_mask = np.random.random((1, 3)).tolist()[0]
                #         for i in range(3):
                #             img[:,:,i] = color_mask[i]
                #         ax.imshow(np.dstack( (img, m*0.5) ))
                if 'keypoints' in ann and type(ann['keypoints']) == list:
                    # turn skeleton into zero-based index
                    sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
                    kp = np.array(ann['keypoints'])
                    x = kp[0::3]
                    y = kp[1::3]
                    v = kp[2::3]
                    for sk in sks:
                        if np.all(v[sk]>0):
                            # 画点之间的连接线
                            plt.plot(x[sk],y[sk], linewidth=1, color=c)
                    # 画点
                    plt.plot(x[v>0], y[v>0],'o',markersize=4, markerfacecolor=c, markeredgecolor='k',markeredgewidth=1)
                    plt.plot(x[v>1], y[v>1],'o',markersize=4, markerfacecolor=c, markeredgecolor=c, markeredgewidth=1)
            p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
            ax.add_collection(p)
            p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
            ax.add_collection(p)
        elif datasetType == 'captions':
            for ann in anns:
                print(ann['caption'])


pylab.rcParams['figure.figsize'] = (8.0, 10.0)

annFile = '/home/data1/human-pose-estimation.pytorch/data/coco/annotations/person_keypoints_val2017.json'
img_prefix='/home/data1/human-pose-estimation.pytorch/data/coco/images/val2017'

# initialize COCO api for instance annotations
coco = COCO(annFile)

# getCatIds(catNms=[], supNms=[], catIds=[])
# 通过输入类别的名字、大类的名字或是种类的id，来筛选得到图片所属类别的id
catIds = coco.getCatIds(catNms=['person'])

# getImgIds(imgIds=[], catIds=[])
# 通过图片的id或是所属种类的id得到图片的id
imgIds = coco.getImgIds(catIds=catIds)
# imgIds = coco.getImgIds(imgIds=[1407])

# loadImgs(ids=[])
# 得到图片的id信息后，就可以用loadImgs得到图片的信息了
# 在这里我们随机选取之前list中的一张图片
img = coco.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]

I = io.imread('%s/%s' % (img_prefix, img['file_name']))
plt.imshow(I)
plt.axis('off')
ax = plt.gca()

# getAnnIds(imgIds=[], catIds=[], areaRng=[], iscrowd=None)
# 通过输入图片的id、类别的id、实例的面积、是否是人群来得到图片的注释id
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)

# loadAnns(ids=[])
# 通过注释的id，得到注释的信息
anns = coco.loadAnns(annIds)
print('\n')
print(anns)

coco.showAnns(anns)
plt.imshow(I)
plt.axis('off')
plt.show()

其他图片的可视化效果示例：