Objects365是北京旷视科技有限公司与北京智源人工智能研究院共同发布的全球最大的目标检测数据集。该数据集总共包含63万张图像,覆盖365个类别,高达1000万框数,具有规模大、质量高、泛化能力强的特点,远超Pascal VOC、COCO等传统数据集。数据集包括人、衣物、居室、浴室、厨房、办公、电器、交通、食物、水果、蔬菜、动物、运动、乐器14个大类,平均每一类有大约26个小类。与COCO数据集相比,Objects365具有5倍的图像数量、4倍的类别数量、以及10倍以上标注框数量。Objects365数据集可以在网上搜索下载,其中Train数据集包含60万张图片,验证数据集包含3万张图片。
为了更有效的在Tensorflow下进行目标检测的模型训练,我把下载的Objects365数据制作为TFRECORD格式的文件。以下的代码将把Train数据集转化为TFRECORD格式,其中用到了CPU的多个核心来同时处理,提高了数据处理效率。代码如下:
#-*- encoding: utf-8 -*-
import tensorflow as tf
import cv2
import numpy as np
import os
from multiprocessing import Process, Queue
import sys
import time
import random
import math
import json
annFile='Annotations/train/train.json'
image_path = 'train/'
cores=8
max_num=2000
prefix = 'train_'
fileobject = open(annFile)
annJson = json.load(fileobject)
#Get the category names from the annotation file
categories = {}
for category in annJson['categories']:
categories[category['id']] = category['name']
images_infor = {}
for item in annJson['images']:
image_id = item['id']
filename = item['file_name']
height = item['height']
width = item['width']
images_infor[image_id] = {'filename': filename, 'height': height, 'width': width, 'bbox':[[],[],[],[],[]]}
images_id = list(images_infor.keys())
#Get the total images number
total = len(images_id)
each_process_files_num = int(total/cores)
last_process_num = total - (cores-1)*each_process_files_num
for ann in annJson['annotations']:
if ann['iscrowd']==0:
image_id = ann['image_id']
images_infor[image_id]['bbox'][0].append(ann['category_id'])
bbox = ann['bbox']
xmin = int(bbox[0])
xmax = int(bbox[0] + bbox[2])
ymin = int(bbox[1])
ymax = int(bbox[1] + bbox[3])
images_infor[image_id]['bbox'][1].append(xmin)
images_infor[image_id]['bbox'][2].append(ymin)
images_infor[image_id]['bbox'][3].append(xmax)
images_infor[image_id]['bbox'][4].append(ymax)
def make_example(image, height, width, bbox, filename):
colorspace = b'RGB'
channels = 3
img_format = b'JPEG'
return tf.train.Example(features=tf.train.Features(feature={
'image' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
'height' : tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
'width' : tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
'channels' : tf.train.Feature(int64_list=tf.train.Int64List(value=[channels])),
'colorspace' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[colorspace])),
'img_format' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_format])),
'label' : tf.train.Feature(int64_list=tf.train.Int64List(value=bbox[0])),
'bbox_xmin' : tf.train.Feature(int64_list=tf.train.Int64List(value=bbox[1])),
'bbox_xmax' : tf.train.Feature(int64_list=tf.train.Int64List(value=bbox[3])),
'bbox_ymin' : tf.train.Feature(int64_list=tf.train.Int64List(value=bbox[2])),
'bbox_ymax' : tf.train.Feature(int64_list=tf.train.Int64List(value=bbox[4])),
'filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[filename]))
}))
def gen_tfrecord(corenum, targetfolder, queue):
tfrecords_file_num = corenum*math.ceil(each_process_files_num/max_num)
file_num = 0
pid = os.getpid()
queue.put((pid, file_num))
writer = tf.io.TFRecordWriter(targetfolder+prefix+str(tfrecords_file_num)+".tfrecord")
for i in range(corenum*each_process_files_num,(corenum+1)*each_process_files_num):
record = images_infor[images_id[i]]
file_num += 1
image_string = open(image_path+record['filename'], 'rb').read()
ex = make_example(image_string, record['height'], record['width'], record['bbox'], record['filename'].encode())
writer.write(ex.SerializeToString())
#每写入100条记录,向父进程发送消息,报告进度
if file_num%100==0:
queue.put((pid, file_num))
if file_num%max_num==0:
writer.close()
tfrecords_file_num += 1
writer = tf.io.TFRecordWriter(targetfolder+prefix+str(tfrecords_file_num)+".tfrecord")
writer.close()
queue.put((pid, file_num))
def process_in_queues(cores, targetfolder):
queues_list = []
processes_list = []
for i in range(cores):
queues_list.append(Queue())
processes_list.append(Process(target=gen_tfrecord, args=(i, targetfolder, queues_list[i])))
for p in processes_list:
Process.start(p)
#父进程循环查询队列的消息,并且每0.5秒更新一次
while(True):
try:
processed_total = 0
progress_str=''
for i in range(cores):
files_number = each_process_files_num
if i==(cores-1):
files_number = last_process_num
msg=queues_list[i].get()
processed_total += msg[1]
progress_str+='PID'+str(msg[0])+':'+str(msg[1])+'/'+ str(files_number)+'|'
progress_str+='\r'
print(progress_str, end='')
if processed_total == total:
for p in processes_list:
p.terminate()
p.join()
break
time.sleep(0.5)
except:
break
return total
if __name__ == '__main__':
print('Start processing train data using %i CPU cores:'%cores)
starttime=time.time()
total_processed = process_in_queues(cores, targetfolder='train_tf/')
endtime=time.time()
print('\nProcess finish, total process %i images in %i seconds'%(total_processed, int(endtime-starttime)), end='')
在我的电脑,8个核心,用时1100秒左右,把43GB的训练集数据处理完毕。处理后的数据可以用以下Tensorflow代码来读取:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
import json
import tensorflow as tf
annFile='Annotations/val/val.json'
fileobject = open(annFile)
val = json.load(fileobject)
category_colors = ['#FF00FF', '#FF0000', '#A020F0', '#A020F0', '#FFFF00']
categories = {}
for category in val['categories']:
categories[category['id']] = category['name']
def _parse_function(example_proto):
features = {
"image": tf.io.FixedLenFeature([], tf.string, default_value=""),
"height": tf.io.FixedLenFeature([1], tf.int64, default_value=[0]),
"width": tf.io.FixedLenFeature([1], tf.int64, default_value=[0]),
"channels": tf.io.FixedLenFeature([1], tf.int64, default_value=[3]),
"colorspace": tf.io.FixedLenFeature([], tf.string, default_value=""),
"img_format": tf.io.FixedLenFeature([], tf.string, default_value=""),
"label": tf.io.VarLenFeature(tf.int64),
"bbox_xmin": tf.io.VarLenFeature(tf.int64),
"bbox_xmax": tf.io.VarLenFeature(tf.int64),
"bbox_ymin": tf.io.VarLenFeature(tf.int64),
"bbox_ymax": tf.io.VarLenFeature(tf.int64),
"filename": tf.io.FixedLenFeature([], tf.string, default_value="")
}
parsed_features = tf.io.parse_single_example(example_proto, features)
label = tf.expand_dims(parsed_features["label"].values, 0)
label = tf.cast(label, tf.float32)
image_raw = tf.image.decode_jpeg(parsed_features["image"], channels=3)
image_decoded = tf.cast(image_raw, dtype=tf.float32)
filename = parsed_features["filename"]
#Get the coco image id as we need to use COCO API to evaluate
image_id = tf.strings.to_number(tf.strings.substr(filename, -16, 12), tf.int32)
image_id = tf.expand_dims(image_id, 0)
#Get the bbox
xmin = tf.cast(tf.expand_dims(parsed_features["bbox_xmin"].values, 0), tf.float32)
xmax = tf.cast(tf.expand_dims(parsed_features["bbox_xmax"].values, 0), tf.float32)
ymin = tf.cast(tf.expand_dims(parsed_features["bbox_ymin"].values, 0), tf.float32)
ymax = tf.cast(tf.expand_dims(parsed_features["bbox_ymax"].values, 0), tf.float32)
mixup_w = tf.ones_like(xmin)
boxes = tf.concat([xmin,ymin,xmax,ymax,label,mixup_w], axis=0)
boxes = tf.transpose(boxes, [1, 0])
features = {'images':image_decoded, 'bbox':boxes, 'image_id':image_id}
return features
def input_fn():
files = tf.data.Dataset.list_files("val_tf/*.tfrecord")
dataset_train = files.interleave(tf.data.TFRecordDataset, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset_train = dataset_train.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset_train
train_data = iter(input_fn())
features = train_data.next()
img = features['images'].numpy()/255.
bboxes = features['bbox'].numpy()
img_id = features['image_id'].numpy()
fig = plt.figure(figsize=(20,20))
ax = fig.add_subplot(1,1,1)
linewidth = 2
for i in range(bboxes.shape[0]):
box = bboxes[i]
cat_id = int(box[4])
label_name = categories[cat_id]
label_color = category_colors[cat_id%5]
rect = patches.Rectangle(
(int(box[0]), int(box[1])),
int(box[2]-box[0]),
int(box[3]-box[1]),
fill=False,
edgecolor=label_color,
linewidth=linewidth)
ax.add_patch(rect)
va = 'bottom'
text_y_adjust = linewidth
if box[1] < 10:
va = 'top'
text_y_adjust = -linewidth
ax.text(int(box[0]), int(box[1])-text_y_adjust, label_name, style='normal', bbox={'facecolor': label_color, 'alpha': 0.5, 'pad': 0}, va=va)
ax.imshow(img)
plt.show()
以下是读取的图片: