前言
WIDER FACE是一个人脸数据集,COCO是一个目标检测数据集。目前由很多针对COCO数据集格式的代码,因此将WIDER FACE的标注文件转换成COCO标注格式可以很方便的应用现有代码。
下面给出两个数据集的相关网址:
WIDER FACE: A Face Detection Benchmark
COCO
准备工作
我们需要准备两样东西:图片
和对应标注文件
,都可以在WIDERE FACE网站上进行获取,如下图所示。
从上到下依次是训练、验证、测试集图片和标注文件,最后一个是提交格式示例,不需要管。
这一步,我下载了上面的三个文件夹,获得了所有的图片数据。而标注文件,我选择了从https://github.com/vitoralbiero/img2pose/wiki/Annotations获取。
原因是他提供了json格式的标注文件,读取起来更加方便顺手。
代码
"""
将WIDER FACE数据集标注文件格式 转换为 COCO数据集格式
"""
import os
import json
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
g_samples = []
g_bboxes = []
def load_data(json_list, dataset_path):
image_paths = pd.read_csv(json_list, delimiter=" ", header=None)
image_paths = np.asarray(image_paths).squeeze()
print("Loading frames paths...")
for image_path in tqdm(image_paths): # 遍历各个图片的json标注文件
with open(image_path) as f:
image_json = json.load(f)
# path to the image
img_path = image_json["image_path"]
# if not absolute path, append the dataset path
if dataset_path is not None:
img_path = os.path.join(dataset_path, img_path)
g_samples.append(img_path)
g_bboxes.append(image_json["bboxes"])
def main():
cnt = 0
json_list = r".\annotations\WIDER_train_annotations.txt"
dataset_path = r".\datasets\WIDER_Face\WIDER_train\images"
print("Loading dataset from %s" % json_list)
load_data(json_list, dataset_path)
res = {
"info": {},
"licenses": {},
"images": [],
"annotations": [],
"categories": []
}
for index in tqdm(range(len(g_samples))):
image_path = g_samples[index]
img = Image.open(image_path)
(w, h) = img.size
# del img
image_id = image_path.split("\\")[-1].split("/")[-1].split(".")[0]
image = {
"file_name": image_path.split("\\")[-1].split("/")[-1],
"height": h,
"width": w,
"data_captured": "2022-07-08 17:02:52",
"id": image_id,
}
res["images"].append(image)
bboxes = g_bboxes[index] # [[a, b, c, d], [a, b, c, d], ...]
for i in range(len(bboxes)):
bbox = np.asarray(bboxes[i])[:4].astype(float)
x = bbox[0] + bbox[2]
y = bbox[1] + bbox[3]
bbox_w = bbox[2] - bbox[0]
bbox_h = bbox[3] - bbox[1]
annotation = {
"segmentation": [],
"area": 0,
"iscrowd": 0,
"image_id": image_id,
"bbox": [x, y, bbox_w, bbox_h],
"category_id": 1,
"id": cnt,
}
cnt += 1
res["annotations"].append(annotation)
with open("instances_train2017_without_trans.json", "w") as f:
json.dump(res, f)
if __name__ == '__main__':
main()
代码看起来很长,但其实很简单,重要的只有四行:
x = bbox[0] + bbox[2]
y = bbox[1] + bbox[3]
bbox_w = bbox[2] - bbox[0]
bbox_h = bbox[3] - bbox[1]
这四行完成了bbox标注差异的转换。
代码中categories列表的内容是空的[ ],可以换成附录中默认的内容,也可以自己仿照格式进行自定义。
由于WIDER FACE全是人脸,因此annotation中的category_id全是1,即默认person这个类别的id。
参考资料
http://shuoyang1213.me/WIDERFACE/
https://cocodataset.org/#format-data
MMDetection学习笔记-04自定义COCO数据集
【数据集转换】COCO数据集格式
SSD人脸检测实战:Wider Face数据集介绍、标注格式、下载等
附录
默认COCO的categories。
{
"supercategory": "person",
"id": 1,
"name": "person"
},
{
"supercategory": "vehicle",
"id": 2,
"name": "bicycle"
},
{
"supercategory": "vehicle",
"id": 3,
"name": "car"
},
{
"supercategory": "vehicle",
"id": 4,
"name": "motorcycle"
},
{
"supercategory": "vehicle",
"id": 5,
"name": "airplane"
},
{
"supercategory": "vehicle",
"id": 6,
"name": "bus"
},
{
"supercategory": "vehicle",
"id": 7,
"name": "train"
},
{
"supercategory": "vehicle",
"id": 8,
"name": "truck"
},
{
"supercategory": "vehicle",
"id": 9,
"name": "boat"
},
{
"supercategory": "outdoor",
"id": 10,
"name": "traffic light"
},
{
"supercategory": "outdoor",
"id": 11,
"name": "fire hydrant"
},
{
"supercategory": "outdoor",
"id": 13,
"name": "stop sign"
},
{
"supercategory": "outdoor",
"id": 14,
"name": "parking meter"
},
{
"supercategory": "outdoor",
"id": 15,
"name": "bench"
},
{
"supercategory": "animal",
"id": 16,
"name": "bird"
},
{
"supercategory": "animal",
"id": 17,
"name": "cat"
},
{
"supercategory": "animal",
"id": 18,
"name": "dog"
},
{
"supercategory": "animal",
"id": 19,
"name": "horse"
},
{
"supercategory": "animal",
"id": 20,
"name": "sheep"
},
{
"supercategory": "animal",
"id": 21,
"name": "cow"
},
{
"supercategory": "animal",
"id": 22,
"name": "elephant"
},
{
"supercategory": "animal",
"id": 23,
"name": "bear"
},
{
"supercategory": "animal",
"id": 24,
"name": "zebra"
},
{
"supercategory": "animal",
"id": 25,
"name": "giraffe"
},
{
"supercategory": "accessory",
"id": 27,
"name": "backpack"
},
{
"supercategory": "accessory",
"id": 28,
"name": "umbrella"
},
{
"supercategory": "accessory",
"id": 31,
"name": "handbag"
},
{
"supercategory": "accessory",
"id": 32,
"name": "tie"
},
{
"supercategory": "accessory",
"id": 33,
"name": "suitcase"
},
{
"supercategory": "sports",
"id": 34,
"name": "frisbee"
},
{
"supercategory": "sports",
"id": 35,
"name": "skis"
},
{
"supercategory": "sports",
"id": 36,
"name": "snowboard"
},
{
"supercategory": "sports",
"id": 37,
"name": "sports ball"
},
{
"supercategory": "sports",
"id": 38,
"name": "kite"
},
{
"supercategory": "sports",
"id": 39,
"name": "baseball bat"
},
{
"supercategory": "sports",
"id": 40,
"name": "baseball glove"
},
{
"supercategory": "sports",
"id": 41,
"name": "skateboard"
},
{
"supercategory": "sports",
"id": 42,
"name": "surfboard"
},
{
"supercategory": "sports",
"id": 43,
"name": "tennis racket"
},
{
"supercategory": "kitchen",
"id": 44,
"name": "bottle"
},
{
"supercategory": "kitchen",
"id": 46,
"name": "wine glass"
},
{
"supercategory": "kitchen",
"id": 47,
"name": "cup"
},
{
"supercategory": "kitchen",
"id": 48,
"name": "fork"
},
{
"supercategory": "kitchen",
"id": 49,
"name": "knife"
},
{
"supercategory": "kitchen",
"id": 50,
"name": "spoon"
},
{
"supercategory": "kitchen",
"id": 51,
"name": "bowl"
},
{
"supercategory": "food",
"id": 52,
"name": "banana"
},
{
"supercategory": "food",
"id": 53,
"name": "apple"
},
{
"supercategory": "food",
"id": 54,
"name": "sandwich"
},
{
"supercategory": "food",
"id": 55,
"name": "orange"
},
{
"supercategory": "food",
"id": 56,
"name": "broccoli"
},
{
"supercategory": "food",
"id": 57,
"name": "carrot"
},
{
"supercategory": "food",
"id": 58,
"name": "hot dog"
},
{
"supercategory": "food",
"id": 59,
"name": "pizza"
},
{
"supercategory": "food",
"id": 60,
"name": "donut"
},
{
"supercategory": "food",
"id": 61,
"name": "cake"
},
{
"supercategory": "furniture",
"id": 62,
"name": "chair"
},
{
"supercategory": "furniture",
"id": 63,
"name": "couch"
},
{
"supercategory": "furniture",
"id": 64,
"name": "potted plant"
},
{
"supercategory": "furniture",
"id": 65,
"name": "bed"
},
{
"supercategory": "furniture",
"id": 67,
"name": "dining table"
},
{
"supercategory": "furniture",
"id": 70,
"name": "toilet"
},
{
"supercategory": "electronic",
"id": 72,
"name": "tv"
},
{
"supercategory": "electronic",
"id": 73,
"name": "laptop"
},
{
"supercategory": "electronic",
"id": 74,
"name": "mouse"
},
{
"supercategory": "electronic",
"id": 75,
"name": "remote"
},
{
"supercategory": "electronic",
"id": 76,
"name": "keyboard"
},
{
"supercategory": "electronic",
"id": 77,
"name": "cell phone"
},
{
"supercategory": "appliance",
"id": 78,
"name": "microwave"
},
{
"supercategory": "appliance",
"id": 79,
"name": "oven"
},
{
"supercategory": "appliance",
"id": 80,
"name": "toaster"
},
{
"supercategory": "appliance",
"id": 81,
"name": "sink"
},
{
"supercategory": "appliance",
"id": 82,
"name": "refrigerator"
},
{
"supercategory": "indoor",
"id": 84,
"name": "book"
},
{
"supercategory": "indoor",
"id": 85,
"name": "clock"
},
{
"supercategory": "indoor",
"id": 86,
"name": "vase"
},
{
"supercategory": "indoor",
"id": 87,
"name": "scissors"
},
{
"supercategory": "indoor",
"id": 88,
"name": "teddy bear"
},
{
"supercategory": "indoor",
"id": 89,
"name": "hair drier"
},
{
"supercategory": "indoor",
"id": 90,
"name": "toothbrush"
}