如何将自己的关键点数据集转换成 COCO 的JSON 格式

最新推荐文章于 2024-07-16 10:34:03 发布

嘿hei嘿

最新推荐文章于 2024-07-16 10:34:03 发布

阅读量2.9k

点赞数 7

分类专栏：学习配置应用 coco 文章标签： python

本文链接：https://blog.csdn.net/weixin_43462370/article/details/109752375

版权

学习同时被 3 个专栏收录

6 篇文章 0 订阅

订阅专栏

配置应用

4 篇文章 0 订阅

订阅专栏

coco

1 篇文章 0 订阅

订阅专栏

这里写自定义目录标题

教程

教程

这篇文章的代码是基于这个代码的：Code
他的文章将 COCO 格式讲的很清晰了，但是，他的代码部分讲的不够细致，原始数据的格式也没有给出，导致后面的代码也对不上，所以我将其中一些部分修改并注释了。
不过目前的代码还比较冗余，后期会进行优化

数据格式

文件排列格式
test1/anno_process
├── anno.txt (标注文件位置)
├── images (图片存放位置)
标注文件内容格式
上面的 test1/anno_process/anno.txt 是记录你自己信息的标注文件，文件内容如下：

000000009448.jpg 47,94,410,546 212,190,229,167,195,173,263,185,172,196,308,258,166,298,360,224,172,435,263,270,257,514,314,514,223,531,326,634,229,634,297,276,223,549,
000000016598.jpg 8,1,478,636 266,166,280,126,220,140,306,126,160,160,373,325,81,345,479,511,67,617,439,345,21,630,379,630,207,630,74,617,47,338,419,338,47,630,
000000054654.jpg 37,74,370,521 232,139,252,117,222,117,283,122,206,128,319,220,237,198,335,345,196,209,232,339,139,225,299,448,252,432,309,589,257,589,324,589,257,589,
000000054931.jpg 104,145,323,495 305,201,314,196,296,196,319,201,278,196,314,248,269,248,310,320,261,320,341,335,287,289,301,377,278,377,274,480,314,480,225,578,332,588,

上面每行的含义为：

图片名字 x,y,width,height "nose"(x,y),"left_eye"(x,y),"right_eye"(x,y),"left_ear"(x,y),"right_ear"(x,y),"left_shoulder"(x,y),"right_shoulder"(x,y),"left_elbow"(x,y),"right_elbow"(x,y),"left_wrist"(x,y),"right_wrist"(x,y),"left_hip"(x,y),"right_hip"(x,y),"left_knee"(x,y),"right_knee"(x,y),"left_ankle"(x,y),"right_ankle"(x,y)

代码

Talking is cheap, show me the code!

转换文件 convet2coco(modify).py 如下（已经加了很详细的注解，应该能看懂了吧）：

# *_* : coding: utf-8 *_*

"""
将数据转换成想要的 coco 数据的 json 格式
需要：文件名 标注框 关键点 类别

datasets process for object detection project.
for convert customer dataset format to coco data format,
"""

import traceback
import argparse
import datetime
import json
import cv2
import os

__CLASS__ = ['__background__', 'person']   # class dictionary, background must be in first index.

def argparser():
    parser = argparse.ArgumentParser("define argument parser for pycococreator!")
    # 根目录
    # parser.add_argument("-r", "--root_path", default="test1/ccpd_300x300", help="path of root directory")
    parser.add_argument("-r", "--root_path", default="test1/anno_process", help="path of root directory")

    # 三个文件夹：train, val, test
    parser.add_argument("-p", "--phase_folder", default=["test"], help="datasets path of [train, val, test]")

    # 判断是否有关键点
    parser.add_argument("-po", "--have_points", default=True, help="if have points we will deal with it!")

    # 图片文件夹
    parser.add_argument("-im", "--images", default="images", help="folder of images")

    # 标注文件路径
    parser.add_argument("-anno", "--annotations", default="anno.txt", help="file of annotations")

    return parser.parse_args()

def MainProcessing(args):
    '''main process source code.'''
    annotations = {}                                                # annotations dictionary, which will dump to json format file.
    root_path = args.root_path                                      # "test1/anno_process"
    phase_folder = args.phase_folder                                # ["test"]
    images_folder = os.path.join(root_path, args.images)            # 图片文件夹 "test1/anno_process/images"
    anno_path = os.path.join(root_path, args.annotations)           # 标注文件路径 "test1/anno_process/anno.txt"

    # coco annotations info.
    annotations["info"] = {
        "description": "customer dataset format convert to COCO format",
        "url": "http://cocodataset.org",
        "version": "1.0",
        "year": 2020,
        "contributor": "lqqq",
        "date_created": "2020"
    }
    # coco annotations licenses.
    annotations["licenses"] = [{
        "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
        "id": 1,
        "name": "Apache License 2.0"
    }]
    # coco annotations categories.
    annotations["categories"] = []
    for cls, clsname in enumerate(__CLASS__):
        if clsname == '__background__':
            continue
        annotations["categories"].append(
            {
                "supercategory": "person",
                "id": cls,
                "name": clsname
            }
        )
        for catdict in annotations["categories"]:
            if "lpr" == catdict["name"] and args.have_points:
                catdict["keypoints"] = ["nose", "left_eye", "right_eye", "left_ear", "right_ear",
                                        "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
                                        "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee",
                                        "right_knee", "left_ankle", "right_ankle"]                          # 关键点
                catdict["skeleton"] = [[16, 14], [14, 12],[17, 15],[15, 13],[12, 13],[6, 12],
                                       [7, 13],[6, 7],[6, 8],[7, 9],[8, 10],[9, 11],[2, 3],
                                       [1, 2],[1, 3],[2, 4],[3, 5],[4, 6],[5, 7]]                           # 骨架

    for phase in phase_folder:
        annotations["images"] = []
        annotations["annotations"] = []

        if os.path.isfile(anno_path) and os.path.exists(images_folder):
            print("convert datasets {} to coco format!".format(phase))
            fd = open(anno_path, "r")
            # fd_w = open(filename_mapping_path, "w")
            step = 0
            for id, line in enumerate(fd.readlines()):
                if line:
                    label_info = line.split()

                    image_name = label_info[0]                              # 图片名
                    bbox = [int(x) for x in label_info[1].split(",")]       # 标注框 bbox
                    # cls = int(label_info[-1])                             # 类别编号，这里为 0

                    filename = os.path.join(images_folder, image_name)      # 图片文件
                    img = cv2.imread(filename)
                    height, width, _ = img.shape                            # 读取图片大小
                    x1 = bbox[0]
                    y1 = bbox[1]                                            # bbox 中心点
                    # bw = bbox[2] - bbox[0]                                # 右侧内边距
                    # bh = bbox[3] - bbox[1]                                # 上册内边距
                    bw = bbox[2]                                            # 宽 w
                    bh = bbox[3]                                            # 高 h

                    # coco annotations images.
                    file_name = str(image_name)                                       # annotation["images"] 下 “file_name” 的值

                    annotations["images"].append(
                        {
                            "license": 1,
                            "file_name": file_name,
                            "coco_url": "",
                            "height": height,
                            "width": width,
                            "date_captured": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                            "flickr_url": "",
                            "id": id
                        }
                    )
                    # coco annotations annotations.
                    annotations["annotations"].append(
                        {
                            "id": id,
                            "image_id": id,
                            "category_id": 1,           # 类别编号，对于人体关键点检测任务，他只有一个类别，所以恒为 1
                            "segmentation": [[]],
                            "area": 0,                  # 这个 area 是图像分割的东西，置为 0 就行
                            "bbox": [x1, y1, bw, bh],
                            "iscrowd": 0,               # 目标是否被遮盖，默认为0
                        }
                    )
                    if args.have_points:
                        v = 2                           # v 字段表示关键点属性，0表示未标注，1表示已标注但不可见，2表示已标注且可见
                        catdict = annotations["annotations"][id]
                        if "lpr" == __CLASS__[catdict["category_id"]]:
                            # points = [int(p) for p in label_info[2].split(",")]
                            points = label_info[2].split(",")[:-1]                  # [:-1]是为了去掉尾部空格
                            # print(points)
                            # print(len(points))
                            pp =[]
                            for i in range(0, len(points), 2):
                                pp.append(int(points[i]))
                                pp.append(int(points[i + 1]))
                                pp.append(2)
                            # catdict["keypoints"] = [points[0], points[1], v, points[2], points[3], v, points[4]]
                            catdict["keypoints"] = pp

                            catdict["num_keypoints"] = int(len(points) / 2)      # 一般是 17

                    step += 1
                    if step % 100 == 0:
                        print("processing {} ...".format(step))
            fd.close()
            # fd_w.close()
        else:
            print("WARNNING: file path incomplete, please check!")

        json_path = os.path.join(root_path, phase+".json")
        with open(json_path, "w") as f:
            json.dump(annotations, f)


if __name__ == "__main__":
    print("beginning to convert customer format to coco format!")
    args = argparser()
    try:
        MainProcessing(args)
    except Exception as e:
        traceback.print_exc()
    print("successful to convert customer format to coco format")

结果

他最后输出的 json 格式是压缩过了的，不太方便观察，可以在网上打开一个 json 解析网站看就很直观。最后附上结果：
test.json

{
	"info": {
		"description": "customer dataset format convert to COCO format",
		"url": "http://cocodataset.org",
		"version": "1.0",
		"year": 2020,
		"contributor": "lqqq",
		"date_created": "2020"
	},
	"licenses": [{
		"url": "https://www.apache.org/licenses/LICENSE-2.0.html",
		"id": 1,
		"name": "Apache License 2.0"
	}],
	"categories": [{
		"supercategory": "person",
		"id": 1,
		"name": "lpr",
		"keypoints": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"],
		"skeleton": [
			[16, 14],
			[14, 12],
			[17, 15],
			[15, 13],
			[12, 13],
			[6, 12],
			[7, 13],
			[6, 7],
			[6, 8],
			[7, 9],
			[8, 10],
			[9, 11],
			[2, 3],
			[1, 2],
			[1, 3],
			[2, 4],
			[3, 5],
			[4, 6],
			[5, 7]
		]
	}],
	"images": [{
		"license": 1,
		"file_name": "000000009448.jpg",
		"coco_url": "",
		"height": 640,
		"width": 551,
		"date_captured": "2020-11-19 20:29:12",
		"flickr_url": "",
		"id": 0
	}, {
		"license": 1,
		"file_name": "000000016598.jpg",
		"coco_url": "",
		"height": 640,
		"width": 478,
		"date_captured": "2020-11-19 20:29:12",
		"flickr_url": "",
		"id": 1
	}, {
		"license": 1,
		"file_name": "000000054654.jpg",
		"coco_url": "",
		"height": 640,
		"width": 407,
		"date_captured": "2020-11-19 20:29:12",
		"flickr_url": "",
		"id": 2
	}, {
		"license": 1,
		"file_name": "000000054931.jpg",
		"coco_url": "",
		"height": 640,
		"width": 427,
		"date_captured": "2020-11-19 20:29:12",
		"flickr_url": "",
		"id": 3
	}],
	"annotations": [{
		"id": 0,
		"image_id": 0,
		"category_id": 1,
		"segmentation": [
			[]
		],
		"area": 0,
		"bbox": [47, 94, 410, 546],
		"iscrowd": 0,
		"keypoints": [212, 190, 2, 229, 167, 2, 195, 173, 2, 263, 185, 2, 172, 196, 2, 308, 258, 2, 166, 298, 2, 360, 224, 2, 172, 435, 2, 263, 270, 2, 257, 514, 2, 314, 514, 2, 223, 531, 2, 326, 634, 2, 229, 634, 2, 297, 276, 2, 223, 549, 2],
		"num_keypoints": 17
	}, {
		"id": 1,
		"image_id": 1,
		"category_id": 1,
		"segmentation": [
			[]
		],
		"area": 0,
		"bbox": [8, 1, 478, 636],
		"iscrowd": 0,
		"keypoints": [266, 166, 2, 280, 126, 2, 220, 140, 2, 306, 126, 2, 160, 160, 2, 373, 325, 2, 81, 345, 2, 479, 511, 2, 67, 617, 2, 439, 345, 2, 21, 630, 2, 379, 630, 2, 207, 630, 2, 74, 617, 2, 47, 338, 2, 419, 338, 2, 47, 630, 2],
		"num_keypoints": 17
	}, {
		"id": 2,
		"image_id": 2,
		"category_id": 1,
		"segmentation": [
			[]
		],
		"area": 0,
		"bbox": [37, 74, 370, 521],
		"iscrowd": 0,
		"keypoints": [232, 139, 2, 252, 117, 2, 222, 117, 2, 283, 122, 2, 206, 128, 2, 319, 220, 2, 237, 198, 2, 335, 345, 2, 196, 209, 2, 232, 339, 2, 139, 225, 2, 299, 448, 2, 252, 432, 2, 309, 589, 2, 257, 589, 2, 324, 589, 2, 257, 589, 2],
		"num_keypoints": 17
	}, {
		"id": 3,
		"image_id": 3,
		"category_id": 1,
		"segmentation": [
			[]
		],
		"area": 0,
		"bbox": [104, 145, 323, 495],
		"iscrowd": 0,
		"keypoints": [305, 201, 2, 314, 196, 2, 296, 196, 2, 319, 201, 2, 278, 196, 2, 314, 248, 2, 269, 248, 2, 310, 320, 2, 261, 320, 2, 341, 335, 2, 287, 289, 2, 301, 377, 2, 278, 377, 2, 274, 480, 2, 314, 480, 2, 225, 578, 2, 332, 588, 2],
		"num_keypoints": 17
	}]
}