人脸检测数据集构造(wireface转VOC格式)

最新推荐文章于 2023-01-31 14:06:01 发布

榨菜rose

最新推荐文章于 2023-01-31 14:06:01 发布

阅读量1.3k

点赞数 2

本文链接：https://blog.csdn.net/sheqianweilong/article/details/104161066

版权

一、下载wireface数据集

http://shuoyang1213.me/WIDERFACE/

选择下载，标注信息、训练集、验证集、测试集四个相压缩包：
下载文件位置截图
二、标注信息说明
解压缩后得到四个文件夹：
文件目录说明
标注信息在第一个文件夹中，其他的文件夹分别存放的是测试图片集、训练图片集、验证图片集。
进入标注信息文件夹：
标注信息目录说明
打开wider-face-train-bbx-gt.txt:
标注信息说明
可以看到第一行是图片的路径，第二行则是人脸的个数，然后接下来就是人脸的坐标信息，以此类推。比如说，第七行是0–Parade/0_Parade_marchingband_1_799.jpg表示的是图片的路径，第八行21表示的是该图有21张人脸，第九行到三十行则分别表示21张人脸的坐标位置。

三、VOC格式说明

VOC格式其实就是使用xml语言进行了一种描述，因此我们可以使用python脚本进行转换。
首先，将刚刚解压缩后的文件夹放到一个主目录下：
VOC格式目录结构
并在其目录下创建Annotations、ImageSets/Main以及JPEGImages等文件夹
新建一个dataset.py的文件：

import os,cv2,sys,shutil
from xml.dom.minidom import Document

def writexml(filename,saveimg,bboxes,xmlpath):
    doc = Document()

    annotation = doc.createElement('annotation')

    doc.appendChild(annotation)

    folder = doc.createElement('folder')

    folder_name = doc.createTextNode('widerface')
    folder.appendChild(folder_name)
    annotation.appendChild(folder)
    filenamenode = doc.createElement('filename')
    filename_name = doc.createTextNode(filename)
    filenamenode.appendChild(filename_name)
    annotation.appendChild(filenamenode)
    source = doc.createElement('source')
    annotation.appendChild(source)
    database = doc.createElement('database')
    database.appendChild(doc.createTextNode('wider face Database'))
    source.appendChild(database)
    annotation_s = doc.createElement('annotation')
    annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
    source.appendChild(annotation_s)
    image = doc.createElement('image')
    image.appendChild(doc.createTextNode('flickr'))
    source.appendChild(image)
    flickrid = doc.createElement('flickrid')
    flickrid.appendChild(doc.createTextNode('-1'))
    source.appendChild(flickrid)
    owner = doc.createElement('owner')
    annotation.appendChild(owner)
    flickrid_o = doc.createElement('flickrid')
    flickrid_o.appendChild(doc.createTextNode('yanyu'))
    owner.appendChild(flickrid_o)
    name_o = doc.createElement('name')
    name_o.appendChild(doc.createTextNode('yanyu'))
    owner.appendChild(name_o)

    size = doc.createElement('size')
    annotation.appendChild(size)

    width = doc.createElement('width')
    width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
    height = doc.createElement('height')
    height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
    depth = doc.createElement('depth')
    depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))

    size.appendChild(width)

    size.appendChild(height)
    size.appendChild(depth)
    segmented = doc.createElement('segmented')
    segmented.appendChild(doc.createTextNode('0'))
    annotation.appendChild(segmented)
    for i in range(len(bboxes)):
        bbox = bboxes[i]
        objects = doc.createElement('object')
        annotation.appendChild(objects)
        object_name = doc.createElement('name')
        object_name.appendChild(doc.createTextNode('face'))
        objects.appendChild(object_name)
        pose = doc.createElement('pose')
        pose.appendChild(doc.createTextNode('Unspecified'))
        objects.appendChild(pose)
        truncated = doc.createElement('truncated')
        truncated.appendChild(doc.createTextNode('1'))
        objects.appendChild(truncated)
        difficult = doc.createElement('difficult')
        difficult.appendChild(doc.createTextNode('0'))
        objects.appendChild(difficult)
        bndbox = doc.createElement('bndbox')
        objects.appendChild(bndbox)
        xmin = doc.createElement('xmin')
        xmin.appendChild(doc.createTextNode(str(bbox[0])))
        bndbox.appendChild(xmin)
        ymin = doc.createElement('ymin')
        ymin.appendChild(doc.createTextNode(str(bbox[1])))
        bndbox.appendChild(ymin)
        xmax = doc.createElement('xmax')
        xmax.appendChild(doc.createTextNode(str(bbox[0] + bbox[2])))
        bndbox.appendChild(xmax)
        ymax = doc.createElement('ymax')
        ymax.appendChild(doc.createTextNode(str(bbox[1] + bbox[3])))
        bndbox.appendChild(ymax)
    f = open(xmlpath, "w")
    f.write(doc.toprettyxml(indent=''))
    f.close()

# 指定根目录
rootdir = "./wider_face"

def convertimgset(img_set):
  # 图片的目录
    imgdir = rootdir + "/WIDER_" + img_set + "/images"
    # 标注信息文件
    gtfilepath = rootdir + "/wider_face_split/wider_face_" + img_set + "_bbx_gt.txt"

    fwrite = open(rootdir + "/ImageSets/Main/" + img_set + ".txt", 'w')

    index = 0

    with open(gtfilepath, 'r') as gtfiles:
        while True: #true
            filename = gtfiles.readline()[:-1]
            # 获取标注信息中的图片路径
            if filename == None or filename == "":
                break
            imgpath = imgdir + "/" + filename

            img = cv2.imread(imgpath)
			# 验证是否是一张图片
            if not img.data:
                break

			# 获取人脸个数
            numbbox = int(gtfiles.readline())

            bboxes = []

            print(numbbox)
			# 获取人脸矩形框的位置
            for i in range(numbbox):
                line = gtfiles.readline()
                lines = line.split(" ")
                lines = lines[0:4]

                bbox = (int(lines[0]), int(lines[1]), int(lines[2]), int(lines[3]))

                if int(lines[2]) < 40 or int(lines[3]) < 40:
                    continue

                bboxes.append(bbox)

                #cv2.rectangle(img, (bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),color=(255,255,0),thickness=1)

            filename = filename.replace("/", "_")

            if len(bboxes) == 0:
                print("no face")
                continue
            #cv2.imshow("img", img)
            #cv2.waitKey(0)

            cv2.imwrite("{}/JPEGImages/{}".format(rootdir,filename), img)

            fwrite.write(filename.split(".")[0] + "\n")

            xmlpath = "{}/Annotations/{}.xml".format(rootdir,filename.split(".")[0])
			# 生成xml文件
            writexml(filename, img, bboxes, xmlpath)

            print("success number is ", index)
            index += 1

    fwrite.close()

if __name__=="__main__":
    img_sets = ["train","val"]
    for img_set in img_sets:
        convertimgset(img_set)

    shutil.move(rootdir + "/ImageSets/Main/" + "train.txt", rootdir + "/ImageSets/Main/" + "trainval.txt")
    shutil.move(rootdir + "/ImageSets/Main/" + "val.txt", rootdir + "/ImageSets/Main/" + "test.txt")

榨菜rose

关注

2
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
人脸检测数据集构造(wireface转VOC格式)

一、下载wireface数据集http://shuoyang1213.me/WIDERFACE/选择下载，标注信息、训练集、验证集、测试集四个相压缩包：二、标注信息说明解压缩后得到四个文件夹：标注信息在第一个文件夹中，其他的文件夹分别存放的是测试图片集、训练图片集、验证图片集。进入标注信息文件夹：打开wider-face-train-bbx-gt.txt:可以看到第一行是...
复制链接

扫一扫