制作自己的COCO关键点数据集

最新推荐文章于 2025-04-11 21:47:14 发布

baddeku

最新推荐文章于 2025-04-11 21:47:14 发布

阅读量5k

点赞数 11

分类专栏：姿态估计 COCO 关键点检测文章标签：深度学习计算机视觉 python

本文链接：https://blog.csdn.net/qq_30283085/article/details/112491704

版权

关键点检测同时被 3 个专栏收录

4 篇文章

订阅专栏

COCO

3 篇文章

订阅专栏

姿态估计

2 篇文章

订阅专栏

依据MS COCO数据集关键点的标准,制作自己的人体关键点数据集.

MS COCO的关键点标注等详细信息:https://blog.csdn.net/qq_30283085/article/details/107736828

标注软件:开源的labelme,包含标点和标框等功能.

从收集自己的图片,使用labelme标注,生成coco数据集格式的关键点是一个经历很多步骤的过程.

在我的项目中,因为目标的特殊性还加入了一些其他操作.数据大概包含23个类,每个类预测不同的关键点.

1.收集到的图片存在图像的显示效果与图片的属性中的长宽不统一,导致神经网络训练时,dataloader读取图像发生错误.

解决方法:cv2.imread和cv2.imwrite将图片重新读写一次.

import os
import cv2
path = ''
a = 1
while(a <= 23):
    dir_path = path + '/' + str(a)
    file_list = os.listdir(dir_path)
    filename_list = []
    for item in file_list:
        if item.split('.')[1] == 'json':
            continue
        img_path = dir_path + '/' + item
        img = cv2.imread(img_path)
        cv2.imwrite(img_path, img)
    a = a + 1

2.为了降低图像采集的成本,一部分数据由已采集的数据水平翻转得到,这包含了图像的水平翻转和对应的json标注文件的标注信息随翻转进行坐标变换(这里的json由labelme标注生成)

(1)图片水平翻转

import cv2
import os

# 对图片进行翻转，并在原名字上加toflip，已经是翻转得到的图片不进行二次翻转

def img_flip(a, b):
    data_path = ''
    folder_path = data_path + '/' + str(a)
    result_path = data_path + '/' + str(b)
    file_list = os.listdir(folder_path)
    # file_list.sort()
    result_list = os.listdir(result_path)
    # result_list.sort()
    i = 0
    j = 0
    for file in file_list:
        c, e = file.split('.')
        if e == 'json':
            continue
        if len(file.split('.')[0].split('to')) == 2:
            print('{} is not handled'.format(file))
            j = j + 1
            continue
        if file[0] != '.':
            img_path = folder_path + '/' + file.strip()
            img = cv2.imread(img_path)
            img1 = cv2.flip(img, 1)
            result_img_path = result_path + '/' + c + 'toflip.' + e
            cv2.imwrite(result_img_path, img1)
            print('{} is handled'.format(file))
        i = i + 1
    print('{} imgs is not handled'.format(j))
    print('{} imgs is handled'.format(i))

if __name__ == '__main__':
    print("img_flip(4, 5)")
    img_flip(4, 5)

    # print("img_flip(9, 8)")
    # img_flip(9, 8)
    print("img_flip(8, 9)")
    img_flip(8, 9)
    # img_flip(11, 10)
    print("img_flip(10, 11)")
    img_flip(10, 11)

    print("img_flip(17, 16)")
    img_flip(17, 16)
    print("img_flip(16, 17)")
    img_flip(16, 17)

    print("img_flip(18, 19)")
    img_flip(18, 19)
    print("img_flip(19, 18)")
    img_flip(19, 18)

    print("img_flip(21, 20)")
    img_flip(21, 20)
    print("img_flip(20, 21)")
    img_flip(20, 21)

    print("img_flip(23, 22)")
    img_flip(23, 22)
    print("img_flip(22, 23)")
    img_flip(22, 23)

(2)json文件内的标注信息随图片的水平翻转进行坐标变换

import numpy as np
import json
import glob
import codecs
import os

# 由于对图片进行水平翻转，并在原名字上加toflip，对应的json文件也需要对标注的点进行转换，转换的规则为imgwidth - point.width - 1，高度不变。名字与图片名字统一。

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

class json_flip(object):
    def __init__(self, jsonfile, save_path):
        self.shapes = []
        self.imagePath = ''
        self.imageHeight = 0
        self.imageWidth = 0
        self.jsonfile = jsonfile
        self.save_path = save_path  # 保存json的路径
        self.labelme = {}

    def json_flip(self):
        data = codecs.open(self.jsonfile, 'r', 'gbk')
        data = json.load(data)
        imagePath = data["imagePath"].split('.')[0] + 'toflip.' + data["imagePath"].split('.')[1]
        self.imagePath = imagePath
        self.imageHeight = data["imageHeight"]
        self.imageWidth = data["imageWidth"]
        ori_shapes = data["shapes"]
        for shape in ori_shapes:
            if shape['shape_type'] == 'point':
                temp = {}
                temp["shape_type"] = 'point'
                temp["label"] = shape["label"]
                point = [[]]
                point[0] = self.point_flip(shape["points"][0])
                temp["points"] = point
                self.shapes.append(temp)
            if shape['shape_type'] == 'rectangle':
                temp = {}
                temp["shape_type"] = 'rectangle'
                temp["label"] = shape["label"]
                point = [[],[]]
                point[0] = self.point_flip(shape["points"][0])
                point[1] = self.point_flip(shape["points"][1])
                temp["points"] = point
                self.shapes.append(temp)


        self.labelme["shapes"] = self.shapes
        self.labelme["imagePath"] = self.imagePath
        self.labelme["imageHeight"] = self.imageHeight
        self.labelme["imageWidth"] = self.imageWidth


    def point_flip(self, point):
        return [self.imageWidth - point[0]- 1 , point[1]]



    def save_json(self):
        self.json_flip()
        labelme_data = self.labelme
        # 保存json文件
        json.dump(labelme_data, open(self.save_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美观显示


def dir_json_flip(json_path, a, b):
    json_inputdir_path = json_path + '/' + str(a)
    json_outputdir_path = json_path + '/' + str(b)

    json_list = os.listdir(json_inputdir_path)
    i = 0
    j = 0
    for file in json_list:
        if file.split('.')[1] != 'json':
            continue
        if len(file.split('.')[0].split('to')) == 2:
            print('{} is not handled'.format(file))
            j = j + 1
            continue
        json_file_path = json_inputdir_path + '/' + file
        json_output_path = json_outputdir_path + '/' + file.split('.')[0] + 'toflip.' + file.split('.')[1]
        c = json_flip(json_file_path, json_output_path)
        c.save_json()
        print('{} is handled'.format(file))
        i = i + 1
    print('{} jsons is not handled'.format(j))
    print('{} jsons is handled\n'.format(i))

if __name__ == '__main__':
    json_path = ''

    print("dir_json_flip(4, 5)")
    dir_json_flip(json_path, 4, 5)

    print("dir_json_flip(8, 9)")
    dir_json_flip(json_path, 8, 9)

    print("dir_json_flip(10, 11)")
    dir_json_flip(json_path, 10, 11)

    print("dir_json_flip(17, 16)")
    dir_json_flip(json_path, 17, 16)
    print("dir_json_flip(16, 17)")
    dir_json_flip(json_path, 16, 17)

    print("dir_json_flip(18, 19)")
    dir_json_flip(json_path, 18, 19)
    print("dir_json_flip(19, 18)")
    dir_json_flip(json_path, 19, 18)

    print("dir_json_flip(21, 20)")
    dir_json_flip(json_path, 21, 20)
    print("dir_json_flip(20, 21)")
    dir_json_flip(json_path, 20, 21)

    print("dir_json_flip(23, 22)")
    dir_json_flip(json_path, 23, 22)
    print("dir_json_flip(22, 23)")
    dir_json_flip(json_path, 22, 23)

3.COCO数据集的图片名为12位数字,00000000xxxx.jpg.标注文件中"images":"id"与"annotations":"image_id"相同,他们的值与对应的图片名的数字大小相同,所以要对自己收集的图片先进行rename,同时对应的标注文件的名称也需要和图片名称统一.

import os

path = ''
a = 1
b = 0
while(a <= 23):
    dir_path = path + '/' + str(a)
    file_list = os.listdir(dir_path)
    filename_list = []
    for item in file_list:
        if item.split('.')[0] not in filename_list:
            filename_list.append(item.split('.')[0])
    # list -> dict
    dict = {}
    for key in filename_list:
        dict[key] = '%012d' % b
        b = b + 1

    for item1 in file_list:
        if item1.split('.')[1] == 'json':
            src_path = dir_path + '/' + item1
            result_path = dir_path + '/' + dict[item1.split('.')[0] ] + '.json'
            os.rename(src_path, result_path)
        else:
            src_path1 = dir_path + '/' + item1
            result_path1 = dir_path + '/' + dict[item1.split('.')[0] ] + '.jpg'
            os.rename(src_path1, result_path1)
    a = a + 1

4.关键的一步:将labelme生成标注文件转化为coco格式,并放在一个文件中,如:person_keypoints_train2017.json

具体通过三步进行

(1)读取每个图片的json文件信息,生成coco数据集格式的标注文件.每个类别文件夹下标注文件合成为一个包含多张图片的标注信息的标注文件.

import numpy as np
import json
import glob
import codecs

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)
    
class tococo(object):
    def __init__(self, jsonfile, save_path, a, image_id):
        self.images = []
        self.categories = []
        self.annotations = []
        self.jsonfile = jsonfile
        self.save_path = save_path  # 保存json的路径
        self.class_id = a           # class
        self.coco = {}
        self.image_id = image_id

    def labelme_to_coco(self):
        class_num = [27, 5, 10, 6, 6, 4, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        idx_num =  [0, 27, 32, 42, 48, 54, 58, 60, 63, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
        # annotations = [] # 整个annotations包含了所有图片的sample的标注
        for num, json_file in enumerate(self.jsonfile):
            data = codecs.open(json_file, 'r', 'gbk')
            data = json.load(data)
            self.images.append(self.get_images(json_file[-17:-4] + 'jpg', data["imageHeight"], data["imageWidth"], int(json_file[-17:-5])) )
            shapes = data["shapes"]
            annotation = {} # 一个annotation代表一张图片中的所有samples
            num_keypoints = 0
            # keypoints = [0, 0, 2] * 80
            keypoints = [0] * 3 * 80
            flag = 0
            for shape in shapes:
                if shape['shape_type'] == 'rectangle' or shape["label"] == '90' or shape["label"] == '99':
                    bbox = []
                    temp = shape["points"]
                    try:
                        x_min = min(temp[0][0], temp[1][0])
                    except IndexError as e:
                        print('class: {}, image: {}'.format(self.class_id, int(json_file[-17:-5])))

                    x_max = max(temp[0][0], temp[1][0])
                    y_min = min(temp[0][1], temp[1][1])
                    y_max = max(temp[0][1], temp[1][1])
                    bbox.append(x_min)
                    bbox.append(y_min)
                    w = x_max - x_min + 1
                    h = y_max - y_min + 1
                    bbox.append(w)
                    bbox.append(h)
                    annotation['bbox'] = bbox
                    flag = flag + 1
                else:
                    idx = int(shape['label']) + idx_num[self.class_id - 1]
                    try:
                        keypoints[idx * 3 + 0] = shape['points'][0][0]
                        keypoints[idx * 3 + 1] = shape['points'][0][1]
                        keypoints[idx * 3 + 2] = 2
                        num_keypoints = num_keypoints + 1
                    except IndexError as e:
                        print('class: {}, image: {}'.format(self.class_id, int(json_file[-17:-5])))

            if flag == 0:
                print('{}\\{} does not contain bbox\n'.format(self.class_id, json_file) )
            annotation['segmentation'] = [[]]
            annotation['num_keypoints'] = num_keypoints
            try:
                annotation['area'] =  w * h
            except ValueError as e:
                print(json_file[-17:-5])
                print(w,h)
            annotation['iscrowd'] = 0
            annotation['keypoints'] = keypoints
            annotation['image_id'] = int(json_file[-17:-5])  # 对应的图片ID
            annotation['category_id'] = 1
            annotation['cls_id'] =  self.class_id - 1
            annotation['id'] = int(json_file[-17:-5])  # 对象id
            self.annotations.append(annotation)
            self.image_id = self.image_id + 1

        self.coco["images"] = self.images
        self.coco["categories"] = self.categories
        self.coco["annotations"] = self.annotations

    def get_images(self, filename, height, width, image_id):
        image = {}
        image["height"] = height
        image['width'] = width
        image["id"] = image_id
        image["file_name"] = filename
        return image

    def get_categories(self, name, class_id):
        category = {}
        category["supercategory"] = "Cancer"
        category['id'] = class_id
        category['name'] = name
        return category

    def get_annotations(self, points, image_id, ann_id, class_name):
        #分割专用
        annotation = {}
        mins = np.amin(points, axis=0)
        maxs = np.amax(points, axis=0)
        wh = maxs - mins
        x = mins[0]
        y = mins[1]
        w = wh[0]
        h = wh[1]
        area = w * h
        annotation['segmentation'] = [list(np.asarray(points).flatten())]
        annotation['iscrowd'] = 0
        annotation['image_id'] = image_id
        annotation['bbox'] = [x, y, w, h]
        annotation['area'] = area
        annotation['category_id'] = 1
        annotation['cls_id'] = self.class_id - 1
        annotation['id'] = ann_id
        return annotation

    def save_json(self):
        self.labelme_to_coco()
        coco_data = self.coco
        # 保存json文件
        json.dump(coco_data, open(self.save_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美观显示
        return self.image_id

print('Generate train json')
train_path = ''
a = 1
image_id = 0  # data_train's ids are started from 0
while(a <= 23):
    json_path = train_path + '/' + str(a) + '/*.json'
    result_path = train_path + '/' + 'train_' + str(a) + '.json'
    labelme_json = glob.glob(json_path)
    c = tococo(labelme_json, result_path, a, image_id)
    image_id = c.save_json()
    # image_id = image_id + 1
    a = a + 1
print('{} is handled\n'.format(image_id))

print('Generate val json')
val_path = ''
b = 1
# image_val_id = image_id   # data_val's ids are started from 10000
image_val_id = 0
while(b <= 23):
    json_path = val_path + '/' + str(b) + '/*.json'
    result_path = val_path + '/' + 'val_' + str(b) + '.json'
    labelme_json = glob.glob(json_path)
    d = tococo(labelme_json, result_path, b, image_val_id)
    image_val_id = d.save_json()
    # image_id = image_id + 1
    b = b + 1
print('{} is handled\n'.format(image_val_id))

(2)不同文件夹的json合成一个json文件

import numpy as np
import json
import glob
import codecs

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

class tococo(object):
    def __init__(self, jsonfile, save_path):
        self.images = []
        self.categories = []
        self.annotations = []
        self.jsonfile = jsonfile
        self.save_path = save_path  # 保存json的路径
        self.coco = {}

    def labelme_to_coco(self):
        for json_file in self.jsonfile:
            data = codecs.open(json_file, 'r', 'gbk')
            data = json.load(data)
            self.images = self.images + data["images"]
            self.annotations = self.annotations + data["annotations"]
        self.coco["images"] = self.images
        self.coco["categories"] = self.categories
        self.coco["annotations"] = self.annotations




    def save_json(self):
        self.labelme_to_coco()
        coco_data = self.coco
        # 保存json文件
        json.dump(coco_data, open(self.save_path, 'w'), indent=4, cls=MyEncoder)  # indent=4 更加美观显示

train_path = ''
result_path = train_path + '/' + 'train.json'
# labelme_json = glob.glob(json_path)
labelme_json = []
for i in range(23):
    json_path = train_path + '/' + 'train_' + str(i+1) + '.json'
    labelme_json.append(json_path)
c = tococo(labelme_json, result_path)
c.save_json()

val_path = ''
result_path = val_path + '/' + 'val.json'
# labelme_json = glob.glob(json_path)
labelme_json = []
for i in range(23):
    json_path = val_path + '/' + 'val_' + str(i+1) + '.json'
    labelme_json.append(json_path)
c = tococo(labelme_json, result_path)
c.save_json()

(3)这里懒了一下,coco格式的标注文件中有以下信息,我是手动复制上去的

    "categories": [
        {
            "supercategory": "person", 
            "id": 1, 
            "name": "person", 
            "keypoints": [ 
            ],
            "skeleton": [
            ]
        }
    ],

5.将图片移动到../data/coco/images/文件夹下,上面生成json重命名为person_keypoints_train2017.json与person_keypoints_val2017.json放在../data/coco/annotations/文件夹下

移动图片的代码

import os
import shutil

train_path = ''
val_path = ''


result_train_path = '..../data/coco/images/train2017'
result_val_path = '..../data/coco/images/val2017'

traindir_list = os.listdir(train_path)
for dir in traindir_list:
    if len(dir) <= 2:
        imgdir_path = train_path + '/' + dir
        img_list = os.listdir(imgdir_path)
        for file in img_list:
            if file.split('.')[1] == 'json':
                continue
            img_path = imgdir_path + '/' + file
            img_result_path = result_train_path + '/' + file
            shutil.copy(img_path, img_result_path)

valdir_list = os.listdir(val_path)
for dir in valdir_list:
    if len(dir) <= 2:
        imgdir_path = val_path + '/' + dir
        img_list = os.listdir(imgdir_path)
        for file in img_list:
            if file.split('.')[1] == 'json':
                continue
            img_path = imgdir_path + '/' + file
            img_result_path = result_val_path + '/' + file
            shutil.copy(img_path, img_result_path)