COCO - Common Objects in Context - 2017 Train/Val 解析为 Darknet-YOLO 训练数据 (backpack-handbag-suitcase)

最新推荐文章于 2024-04-16 22:56:38 发布

Yongqiang Cheng

最新推荐文章于 2024-04-16 22:56:38 发布

阅读量912

点赞数 1

分类专栏： COCO - Open Images 文章标签： COCO - Common Objects in Context 2017 Train/Val 解析 backpack - handbag - suitcase

世上没有白读的书，每一页都算数。

本文链接：https://blog.csdn.net/chengyq116/article/details/103448054

版权

COCO - Open Images 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

COCO - Common Objects in Context - 2017 Train/Val 解析为 Darknet - YOLO 训练数据 (backpack - handbag - suitcase)

category_id = 27 - backpack
category_id = 31 - handbag
category_id = 33 - suitcase

1. COCO 2017

2017 Train images [118K/18GB] - train2017.zip - 118287 images (jpgs) - 860001 annotations (bbox)
2017 Val images [5K/1GB] - val2017.zip - 5000 images (jpgs) - 36781 annotations (bbox)
2017 Train/Val annotations [241MB] - annotations_trainval2017.zip
captions_train2017.json + captions_val2017.json +
instances_train2017.json + instances_val2017.json +
person_keypoints_train2017.json + person_keypoints_val2017.json

strong@foreverstrong:~/data_update/COCO_2017$ ls -l
total 19927124
-rwxrwxrwx 1 strong strong   252907541 May  8  2018 annotations_trainval2017.zip
-rwxrwxrwx 1 strong strong 19336861798 May  8  2018 train2017.zip
-rwxrwxrwx 1 strong strong   815585330 May  8  2018 val2017.zip
strong@foreverstrong:~/data_update/COCO_2017$
strong@foreverstrong:~/data_update/COCO_2017$ ls -l
total 19927284
drwxrwxr-x 2 strong strong        4096 Dec  4 14:08 annotations
-rwxrwxrwx 1 strong strong   252907541 May  8  2018 annotations_trainval2017.zip
-rwxrwxrwx 1 strong strong 19336861798 May  8  2018 train2017.zip
drwxrwxr-x 2 strong strong      155648 Aug 31  2017 val2017
-rwxrwxrwx 1 strong strong   815585330 May  8  2018 val2017.zip
strong@foreverstrong:~/data_update/COCO_2017$ 
strong@foreverstrong:~/data_update/COCO_2017$ ls -l ./annotations
total 814884
-rw-rw-r-- 1 strong strong  91865115 Sep  1  2017 captions_train2017.json
-rw-rw-r-- 1 strong strong   3872473 Sep  1  2017 captions_val2017.json
-rw-rw-r-- 1 strong strong 469785474 Sep  1  2017 instances_train2017.json
-rw-rw-r-- 1 strong strong  19987840 Sep  1  2017 instances_val2017.json
-rw-rw-r-- 1 strong strong 238884731 Sep  1  2017 person_keypoints_train2017.json
-rw-rw-r-- 1 strong strong  10020657 Sep  1  2017 person_keypoints_val2017.json
strong@foreverstrong:~/data_update/COCO_2017$

2. Data format

将 JSON 文件 instances_val2017.json 拖放到浏览器查看。

在这里插入图片描述

All annotations share the same basic data structure below:
info:
licenses:
images:
annotations:
categories:

images 字段列表元素的数量等同于训练集 (或验证集) 中图片的数量。
annotations 字段列表元素的数量等同于训练集 (或验证集) 中 bounding box 的数量。
categories 字段列表元素的数量等同于类别的数量 (coco 为 80 类)。

2.1 info:

在这里插入图片描述

2.2 licenses:

在这里插入图片描述

2.3 images:

在这里插入图片描述

2.4 annotations:

在这里插入图片描述

2.5 categories:

在这里插入图片描述

category_id = 27 - backpack
category_id = 31 - handbag
category_id = 33 - suitcase

3. coco_2017_parser_and_selection.py

COCO - Common Objects in Context - 2017 Train/Val 解析为 Darknet-YOLO 训练数据 (backpack-handbag-suitcase)
Darknet - YOLO format.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Yongqiang Cheng

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import shutil
import subprocess

import cv2

# category_id = 27 - backpack - 0
# category_id = 31 - handbag - 1
# category_id = 33 - suitcase - 2

labels = {27: 0, 31: 1, 33: 2}

image_val2017 = "/home/strong/data_update/COCO_2017/val2017"
annotation_val2017 = "/home/strong/data_update/COCO_2017/annotations/instances_val2017.json"

image_train2017 = "/home/strong/data_update/COCO_2017/train2017"
annotation_train2017 = "/home/strong/data_update/COCO_2017/annotations/instances_train2017.json"

# train_images = "/home/strong/data_update/COCO_2017/val2017_train/JPEGImages"
# train_labels = "/home/strong/data_update/COCO_2017/val2017_train/labels"
train_images = "/home/strong/data_update/COCO_2017/train2017_train/JPEGImages"
train_labels = "/home/strong/data_update/COCO_2017/train2017_train/labels"

# images_labels_path = "/home/strong/data_update/COCO_2017/val2017_train/JPEGImages-labels"
images_labels_path = "/home/strong/data_update/COCO_2017/train2017_train/JPEGImages-labels"

image_2017 = image_val2017
annotation_2017 = annotation_val2017

if not os.path.exists(train_images):
    os.makedirs(train_images)

if not os.path.exists(train_labels):
    os.makedirs(train_labels)

if not os.path.exists(images_labels_path):
    os.makedirs(images_labels_path)


# cv2.namedWindow("yongqiang", cv2.WINDOW_NORMAL)

# size = (img_width, img_height)
# box = (float(box_xmin_pixel), float(box_xmax_pixel), float(box_ymin_pixel), float(box_ymax_pixel))
def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def bbox_fix(box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel, img_file_name):
    if (box_xmin_pixel < 0) or (box_xmin_pixel > (img_width - 1)):
        print("box_xmin_pixel error!")
        print(img_file_name)

    if (box_xmax_pixel < 0) or (box_xmax_pixel > (img_width - 1)):
        print("box_xmax_pixel error!")
        print(img_file_name)

    if (box_ymin_pixel < 0) or (box_ymin_pixel > (img_height - 1)):
        print("box_ymin_pixel error!")
        print(img_file_name)

    if (box_ymax_pixel < 0) or (box_ymax_pixel > (img_height - 1)):
        print("box_ymax_pixel error!")
        print(img_file_name)

    if (box_xmin_pixel < 1):
        box_xmin_pixel = 1
        print("box_xmin_pixel bug fix!")
        print(img_file_name)

    if (box_xmin_pixel > (img_width - 2)):
        box_xmin_pixel = (img_width - 2)
        print("box_xmin_pixel bug fix!")
        print(img_file_name)

    if (box_ymin_pixel < 1):
        box_ymin_pixel = 1
        print("box_ymin_pixel bug fix!")
        print(img_file_name)

    if (box_ymin_pixel > (img_height - 2)):
        box_ymin_pixel = (img_height - 2)
        print("box_ymin_pixel bug fix!")
        print(img_file_name)

    if (box_xmax_pixel > (img_width - 2)):
        box_xmax_pixel = (img_width - 2)
        print("box_xmax_pixel bug fix!")
        print(img_file_name)

    if (box_xmax_pixel < 1):
        box_xmax_pixel = 1
        print("box_xmax_pixel bug fix!")
        print(img_file_name)

    if (box_ymax_pixel > (img_height - 2)):
        box_ymax_pixel = (img_height - 2)
        print("box_ymax_pixel bug fix!")
        print(img_file_name)

    if (box_ymax_pixel <= 1):
        box_ymax_pixel = 1
        print("box_ymax_pixel bug fix!")
        print(img_file_name)

    return box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel


with open(annotation_2017) as f:
    annotation_file = json.load(f)
    print("annotation_file.keys(): %s" % (annotation_file.keys()))
    print("annotation_file.keys(): %s" % (type(annotation_file['licenses'])))
    print("annotation_file.keys(): %s" % (type(annotation_file['info'])))
    print("annotation_file.keys(): %s" % (type(annotation_file['categories'])))
    print("annotation_file.keys(): %s" % (type(annotation_file['images'])))
    print("annotation_file.keys(): %s" % (type(annotation_file['annotations'])))

    # Note that a single object (iscrowd=0) may require multiple polygons, for example if occluded.
    # Crowd annotations (iscrowd=1) are used to label large groups of objects (e.g. a crowd of people).
    annotation_list = []
    for annotation in annotation_file['annotations']:
        if 1 == annotation['iscrowd']:
            # print("iscrowd=1: %s" % (annotation))
            continue

        if annotation['category_id'] == 27 or annotation['category_id'] == 31 or annotation['category_id'] == 33:
            annotation_list.append(annotation)

    for image_info in annotation_file['images']:
        if os.path.exists(image_2017 + '/' + image_info['file_name']):
            pass
        else:
            wget_command = "wget " + "-P " + image_2017 + "/ " + image_info['coco_url']
            print("wget_command: %s" % (wget_command))

            while True:
                status, info = subprocess.getstatusoutput(wget_command)
                if 0 == status:
                    break
            pass

        img_file = image_2017 + '/' + image_info['file_name']
        img = cv2.imread(img_file)
        # print(img_file)

        imgcopy = img.copy()
        img_height, img_width, img_channel = img.shape

        img_file_name = image_info['file_name']
        image_name_txt = image_info['file_name'].replace('.jpg', '.txt')
        image_id_int = int(str(image_info['file_name']).strip('.jpg'))

        bag_flag = 0

        for annotation in annotation_list:
            if int(annotation['image_id']) == image_id_int:
                print(image_info['file_name'])

                # "bbox": [xmin, ymin, width, height]
                box_xmin_pixel = float(annotation['bbox'][0])
                box_xmax_pixel = float(annotation['bbox'][0]) + float(annotation['bbox'][2])
                box_ymin_pixel = float(annotation['bbox'][1])
                box_ymax_pixel = float(annotation['bbox'][1]) + float(annotation['bbox'][3])

                box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel = bbox_fix(box_xmin_pixel,
                                                                                          box_xmax_pixel,
                                                                                          box_ymin_pixel,
                                                                                          box_ymax_pixel,
                                                                                          img_file_name)

                if (box_xmin_pixel < 0) or (box_xmin_pixel > (img_width - 1)):
                    print("box_xmin_pixel continue error!")
                    print(img_file_name)
                    continue

                if (box_xmax_pixel < 0) or (box_xmax_pixel > (img_width - 1)):
                    print("box_xmax_pixel continue error!")
                    print(img_file_name)
                    continue

                if (box_ymin_pixel < 0) or (box_ymin_pixel > (img_height - 1)):
                    print("box_ymin_pixel continue error!")
                    print(img_file_name)
                    continue

                if (box_ymax_pixel < 0) or (box_ymax_pixel > (img_height - 1)):
                    print("box_ymax_pixel continue error!")
                    print(img_file_name)
                    continue

                box_w_pixel = box_xmax_pixel - box_xmin_pixel + 1
                box_h_pixel = box_ymax_pixel - box_ymin_pixel + 1

                if (box_w_pixel < 16) or (box_h_pixel < 16):
                    print("box_w_pixel & box_h_pixel continue error!")
                    print(img_file_name)
                    continue

                train_label_txt = open("%s/%s" % (train_labels, image_name_txt), 'a+')
                lable = labels[annotation['category_id']]

                b = (float(box_xmin_pixel), float(box_xmax_pixel), float(box_ymin_pixel), float(box_ymax_pixel))
                bb = convert((img_width, img_height), b)

                train_label_txt.write(str(lable) + " " + " ".join([str(a) for a in bb]) + '\n')
                train_label_txt.close()

                bag_flag = 1

                start_x = int((bb[0] - bb[2] / 2.0) * img_width)
                start_y = int((bb[1] - bb[3] / 2.0) * img_height)
                end_x = int((bb[0] + bb[2] / 2.0) * img_width)
                end_y = int((bb[1] + bb[3] / 2.0) * img_height)

                cv2.rectangle(imgcopy, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
                cv2.putText(imgcopy, str(lable), (start_x + 2, start_y + 26), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
                            2)
            pass
        pass

        cv2.putText(imgcopy, img_file_name, (16, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255))

        if 1 == bag_flag:
            cv2.imshow("yongqiang", imgcopy)

            shutil.copy("%s" % (img_file), "%s/" % (train_images))

            # cv2.imwrite("%s/%s" % (train_images, img_file_name.strip()), img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
            cv2.imwrite("%s/%s" % (images_labels_path, img_file_name.strip()), imgcopy)

            keyboard = cv2.waitKey(5) & 0xFF
            # wait for ESC key to exit
            if keyboard == 27:
                break
        pass

        # selection function
        # if 1 == bag_flag:
        #     cv2.imshow("yongqiang", imgcopy)
        #
        #     keyboard = cv2.waitKey(0) & 0xFF
        #     if keyboard == 27:  # wait for ESC key to exit
        #         break
        #     elif keyboard == ord('s'):  # wait for 's' key to save and exit
        #         shutil.copy("%s" % (img_file), "%s/" % (train_images))
        #
        #         # cv2.imwrite("%s/%s" % (train_images, img_file_name.strip()), img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
        #         cv2.imwrite("%s/%s" % (images_labels_path, img_file_name.strip()), imgcopy)
        #     pass
        # pass

    pass

    cv2.destroyAllWindows()

if __name__ == '__main__':
    current_directory = os.path.dirname(os.path.abspath(__file__))
    print("current_directory:", current_directory)

在这里插入图片描述