COCO - Common Objects in Context - 2017 Train/Val 解析为 Darknet - YOLO 训练数据 (backpack - handbag - suitcase)
category_id = 27 - backpack
category_id = 31 - handbag
category_id = 33 - suitcase
1. COCO 2017
2017 Train images [118K/18GB] - train2017.zip - 118287 images (jpgs) - 860001 annotations (bbox)
2017 Val images [5K/1GB] - val2017.zip - 5000 images (jpgs) - 36781 annotations (bbox)
2017 Train/Val annotations [241MB] - annotations_trainval2017.zip
captions_train2017.json + captions_val2017.json +
instances_train2017.json + instances_val2017.json +
person_keypoints_train2017.json + person_keypoints_val2017.json
strong@foreverstrong:~/data_update/COCO_2017$ ls -l
total 19927124
-rwxrwxrwx 1 strong strong 252907541 May 8 2018 annotations_trainval2017.zip
-rwxrwxrwx 1 strong strong 19336861798 May 8 2018 train2017.zip
-rwxrwxrwx 1 strong strong 815585330 May 8 2018 val2017.zip
strong@foreverstrong:~/data_update/COCO_2017$
strong@foreverstrong:~/data_update/COCO_2017$ ls -l
total 19927284
drwxrwxr-x 2 strong strong 4096 Dec 4 14:08 annotations
-rwxrwxrwx 1 strong strong 252907541 May 8 2018 annotations_trainval2017.zip
-rwxrwxrwx 1 strong strong 19336861798 May 8 2018 train2017.zip
drwxrwxr-x 2 strong strong 155648 Aug 31 2017 val2017
-rwxrwxrwx 1 strong strong 815585330 May 8 2018 val2017.zip
strong@foreverstrong:~/data_update/COCO_2017$
strong@foreverstrong:~/data_update/COCO_2017$ ls -l ./annotations
total 814884
-rw-rw-r-- 1 strong strong 91865115 Sep 1 2017 captions_train2017.json
-rw-rw-r-- 1 strong strong 3872473 Sep 1 2017 captions_val2017.json
-rw-rw-r-- 1 strong strong 469785474 Sep 1 2017 instances_train2017.json
-rw-rw-r-- 1 strong strong 19987840 Sep 1 2017 instances_val2017.json
-rw-rw-r-- 1 strong strong 238884731 Sep 1 2017 person_keypoints_train2017.json
-rw-rw-r-- 1 strong strong 10020657 Sep 1 2017 person_keypoints_val2017.json
strong@foreverstrong:~/data_update/COCO_2017$
2. Data format
将 JSON 文件 instances_val2017.json 拖放到浏览器查看。
All annotations share the same basic data structure below:
info:
licenses:
images:
annotations:
categories:
- images 字段列表元素的数量等同于训练集 (或验证集) 中图片的数量。
- annotations 字段列表元素的数量等同于训练集 (或验证集) 中 bounding box 的数量。
- categories 字段列表元素的数量等同于类别的数量 (coco 为 80 类)。
2.1 info:
2.2 licenses:
2.3 images:
2.4 annotations:
2.5 categories:
category_id = 27 - backpack
category_id = 31 - handbag
category_id = 33 - suitcase
3. coco_2017_parser_and_selection.py
COCO - Common Objects in Context - 2017 Train/Val 解析为 Darknet-YOLO 训练数据 (backpack-handbag-suitcase)
Darknet - YOLO format.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Yongqiang Cheng
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os
import shutil
import subprocess
import cv2
# category_id = 27 - backpack - 0
# category_id = 31 - handbag - 1
# category_id = 33 - suitcase - 2
labels = {27: 0, 31: 1, 33: 2}
image_val2017 = "/home/strong/data_update/COCO_2017/val2017"
annotation_val2017 = "/home/strong/data_update/COCO_2017/annotations/instances_val2017.json"
image_train2017 = "/home/strong/data_update/COCO_2017/train2017"
annotation_train2017 = "/home/strong/data_update/COCO_2017/annotations/instances_train2017.json"
# train_images = "/home/strong/data_update/COCO_2017/val2017_train/JPEGImages"
# train_labels = "/home/strong/data_update/COCO_2017/val2017_train/labels"
train_images = "/home/strong/data_update/COCO_2017/train2017_train/JPEGImages"
train_labels = "/home/strong/data_update/COCO_2017/train2017_train/labels"
# images_labels_path = "/home/strong/data_update/COCO_2017/val2017_train/JPEGImages-labels"
images_labels_path = "/home/strong/data_update/COCO_2017/train2017_train/JPEGImages-labels"
image_2017 = image_val2017
annotation_2017 = annotation_val2017
if not os.path.exists(train_images):
os.makedirs(train_images)
if not os.path.exists(train_labels):
os.makedirs(train_labels)
if not os.path.exists(images_labels_path):
os.makedirs(images_labels_path)
# cv2.namedWindow("yongqiang", cv2.WINDOW_NORMAL)
# size = (img_width, img_height)
# box = (float(box_xmin_pixel), float(box_xmax_pixel), float(box_ymin_pixel), float(box_ymax_pixel))
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def bbox_fix(box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel, img_file_name):
if (box_xmin_pixel < 0) or (box_xmin_pixel > (img_width - 1)):
print("box_xmin_pixel error!")
print(img_file_name)
if (box_xmax_pixel < 0) or (box_xmax_pixel > (img_width - 1)):
print("box_xmax_pixel error!")
print(img_file_name)
if (box_ymin_pixel < 0) or (box_ymin_pixel > (img_height - 1)):
print("box_ymin_pixel error!")
print(img_file_name)
if (box_ymax_pixel < 0) or (box_ymax_pixel > (img_height - 1)):
print("box_ymax_pixel error!")
print(img_file_name)
if (box_xmin_pixel < 1):
box_xmin_pixel = 1
print("box_xmin_pixel bug fix!")
print(img_file_name)
if (box_xmin_pixel > (img_width - 2)):
box_xmin_pixel = (img_width - 2)
print("box_xmin_pixel bug fix!")
print(img_file_name)
if (box_ymin_pixel < 1):
box_ymin_pixel = 1
print("box_ymin_pixel bug fix!")
print(img_file_name)
if (box_ymin_pixel > (img_height - 2)):
box_ymin_pixel = (img_height - 2)
print("box_ymin_pixel bug fix!")
print(img_file_name)
if (box_xmax_pixel > (img_width - 2)):
box_xmax_pixel = (img_width - 2)
print("box_xmax_pixel bug fix!")
print(img_file_name)
if (box_xmax_pixel < 1):
box_xmax_pixel = 1
print("box_xmax_pixel bug fix!")
print(img_file_name)
if (box_ymax_pixel > (img_height - 2)):
box_ymax_pixel = (img_height - 2)
print("box_ymax_pixel bug fix!")
print(img_file_name)
if (box_ymax_pixel <= 1):
box_ymax_pixel = 1
print("box_ymax_pixel bug fix!")
print(img_file_name)
return box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel
with open(annotation_2017) as f:
annotation_file = json.load(f)
print("annotation_file.keys(): %s" % (annotation_file.keys()))
print("annotation_file.keys(): %s" % (type(annotation_file['licenses'])))
print("annotation_file.keys(): %s" % (type(annotation_file['info'])))
print("annotation_file.keys(): %s" % (type(annotation_file['categories'])))
print("annotation_file.keys(): %s" % (type(annotation_file['images'])))
print("annotation_file.keys(): %s" % (type(annotation_file['annotations'])))
# Note that a single object (iscrowd=0) may require multiple polygons, for example if occluded.
# Crowd annotations (iscrowd=1) are used to label large groups of objects (e.g. a crowd of people).
annotation_list = []
for annotation in annotation_file['annotations']:
if 1 == annotation['iscrowd']:
# print("iscrowd=1: %s" % (annotation))
continue
if annotation['category_id'] == 27 or annotation['category_id'] == 31 or annotation['category_id'] == 33:
annotation_list.append(annotation)
for image_info in annotation_file['images']:
if os.path.exists(image_2017 + '/' + image_info['file_name']):
pass
else:
wget_command = "wget " + "-P " + image_2017 + "/ " + image_info['coco_url']
print("wget_command: %s" % (wget_command))
while True:
status, info = subprocess.getstatusoutput(wget_command)
if 0 == status:
break
pass
img_file = image_2017 + '/' + image_info['file_name']
img = cv2.imread(img_file)
# print(img_file)
imgcopy = img.copy()
img_height, img_width, img_channel = img.shape
img_file_name = image_info['file_name']
image_name_txt = image_info['file_name'].replace('.jpg', '.txt')
image_id_int = int(str(image_info['file_name']).strip('.jpg'))
bag_flag = 0
for annotation in annotation_list:
if int(annotation['image_id']) == image_id_int:
print(image_info['file_name'])
# "bbox": [xmin, ymin, width, height]
box_xmin_pixel = float(annotation['bbox'][0])
box_xmax_pixel = float(annotation['bbox'][0]) + float(annotation['bbox'][2])
box_ymin_pixel = float(annotation['bbox'][1])
box_ymax_pixel = float(annotation['bbox'][1]) + float(annotation['bbox'][3])
box_xmin_pixel, box_xmax_pixel, box_ymin_pixel, box_ymax_pixel = bbox_fix(box_xmin_pixel,
box_xmax_pixel,
box_ymin_pixel,
box_ymax_pixel,
img_file_name)
if (box_xmin_pixel < 0) or (box_xmin_pixel > (img_width - 1)):
print("box_xmin_pixel continue error!")
print(img_file_name)
continue
if (box_xmax_pixel < 0) or (box_xmax_pixel > (img_width - 1)):
print("box_xmax_pixel continue error!")
print(img_file_name)
continue
if (box_ymin_pixel < 0) or (box_ymin_pixel > (img_height - 1)):
print("box_ymin_pixel continue error!")
print(img_file_name)
continue
if (box_ymax_pixel < 0) or (box_ymax_pixel > (img_height - 1)):
print("box_ymax_pixel continue error!")
print(img_file_name)
continue
box_w_pixel = box_xmax_pixel - box_xmin_pixel + 1
box_h_pixel = box_ymax_pixel - box_ymin_pixel + 1
if (box_w_pixel < 16) or (box_h_pixel < 16):
print("box_w_pixel & box_h_pixel continue error!")
print(img_file_name)
continue
train_label_txt = open("%s/%s" % (train_labels, image_name_txt), 'a+')
lable = labels[annotation['category_id']]
b = (float(box_xmin_pixel), float(box_xmax_pixel), float(box_ymin_pixel), float(box_ymax_pixel))
bb = convert((img_width, img_height), b)
train_label_txt.write(str(lable) + " " + " ".join([str(a) for a in bb]) + '\n')
train_label_txt.close()
bag_flag = 1
start_x = int((bb[0] - bb[2] / 2.0) * img_width)
start_y = int((bb[1] - bb[3] / 2.0) * img_height)
end_x = int((bb[0] + bb[2] / 2.0) * img_width)
end_y = int((bb[1] + bb[3] / 2.0) * img_height)
cv2.rectangle(imgcopy, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
cv2.putText(imgcopy, str(lable), (start_x + 2, start_y + 26), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
2)
pass
pass
cv2.putText(imgcopy, img_file_name, (16, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255))
if 1 == bag_flag:
cv2.imshow("yongqiang", imgcopy)
shutil.copy("%s" % (img_file), "%s/" % (train_images))
# cv2.imwrite("%s/%s" % (train_images, img_file_name.strip()), img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
cv2.imwrite("%s/%s" % (images_labels_path, img_file_name.strip()), imgcopy)
keyboard = cv2.waitKey(5) & 0xFF
# wait for ESC key to exit
if keyboard == 27:
break
pass
# selection function
# if 1 == bag_flag:
# cv2.imshow("yongqiang", imgcopy)
#
# keyboard = cv2.waitKey(0) & 0xFF
# if keyboard == 27: # wait for ESC key to exit
# break
# elif keyboard == ord('s'): # wait for 's' key to save and exit
# shutil.copy("%s" % (img_file), "%s/" % (train_images))
#
# # cv2.imwrite("%s/%s" % (train_images, img_file_name.strip()), img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# cv2.imwrite("%s/%s" % (images_labels_path, img_file_name.strip()), imgcopy)
# pass
# pass
pass
cv2.destroyAllWindows()
if __name__ == '__main__':
current_directory = os.path.dirname(os.path.abspath(__file__))
print("current_directory:", current_directory)