[人脸数据集] 数据集处理：将MAFA人脸数据集的.mat标注格式转为.xml格式

最新推荐文章于 2021-12-06 18:00:32 发布

小新GSUNG0222

最新推荐文章于 2021-12-06 18:00:32 发布

阅读量3.9k

点赞数 15

分类专栏： python 数据集文章标签：深度学习 python

本文链接：https://blog.csdn.net/qq_36848732/article/details/106220352

版权

python 同时被 2 个专栏收录

4 篇文章 1 订阅

订阅专栏

数据集

4 篇文章 0 订阅

订阅专栏

一. 数据集分析

1.1解压从官方下载的MAFA数据集后，文件结构：

（官方链接已失效，博主分享出来链接：https://pan.baidu.com/s/1HaBc5ybGG7X429Nzz-2CUg
提取码：j58x）

train_images/

test_images/

LabelTrainAll.mat

LabelTestAll.mat

readme-test.txt

readme-train.txt

可用看看这个txt文件，解释了标注规范，重点！这个数据集的训练集标注出了有遮挡的人类面部，而没有遮挡的面部没有标出，测试集中，将面部分成了 { 没遮挡的脸、遮挡的脸、 invalid 脸}

二. mat 2 xml

.mat的数据标注格式用起来总会不爽，改成常用的xml非常重要。

先把装xml的文件夹建好，

test_xml/， train_xml/

train 和 test的数据集标注略有不同，作者的txt文件也解释得相当清楚了，所以直接把代码拿出来看吧

import scipy.io as sio
import numpy as np
from PIL import Image


label_map = {'1':'face_mask','1.0':'face_mask','2':'face','2.0':'face','3':'invalid','3.0':'invalid'}

def testMat2xml():

	xmlflie = 'test_xml/'
	imgflie = 'test_images/'
	load_path = 'LabelTestAll.mat'
	dict_keys_data = 'LabelTest'

	load_data = sio.loadmat(load_path)
	# print(load_data.keys())
	# dict_keys(['__header__', '__version__', '__globals__', 'LabelTest'])
	# dict_keys(['__header__', '__version__', '__globals__', 'label_train'])

	for idx in range(len(load_data[dict_keys_data][0])):
		# print(len(load_data[dict_keys_data][0]))
		imgName = load_data[dict_keys_data][0][idx][0][0]   # test_00004934.jpg
		# print(imgName)

		imgPath = imgflie + imgName
		img = Image.open(imgPath)
		imgWidth, imgHeight = img.size

		xml_file = open(xmlflie + imgName.split(".")[-2] + '.xml', 'w')
		xml_file.write('<annotation>\n')
		xml_file.write('	<folder>' + 'MAFA/' + imgflie + '</folder>\n')
		xml_file.write('	<filename>' + imgName + '</filename>\n')
		xml_file.write('	<size>\n')
		xml_file.write('		<width>' + str(imgWidth) + '</width>\n')
		xml_file.write('		<height>' + str(imgHeight) + '</height>\n')
		xml_file.write('		<depth>' + str(3) + '</depth>\n')
		xml_file.write('	</size>\n')

		bboxLen = len(load_data[dict_keys_data][0][idx][2]) # 每张图标有几个bbox

		# print(bboxLen)
		for bbox in range(bboxLen):
			x,y,w,h,label = load_data[dict_keys_data][0][idx][2][bbox][0:5]
			# print(x,y,w,h,label)
			# print(str(label_map[str(label)]))

			xml_file.write('	<object>\n')
			xml_file.write('		<name>' + str(label_map[str(label)]) + '</name>\n')
			xml_file.write('		<bndbox>\n')
			xml_file.write('			<xmin>' + str(x) + '</xmin>\n')
			xml_file.write('			<ymin>' + str(y) + '</ymin>\n')
			xml_file.write('			<xmax>' + str(x + w) + '</xmax>\n')
			xml_file.write('			<ymax>' + str(y + h) + '</ymax>\n')
			xml_file.write('		</bndbox>\n')
			xml_file.write('	</object>\n')
		xml_file.write('</annotation>\n')
		xml_file.close()

def trainMat2xml():

	xmlflie = 'train_xml/'
	imgflie = 'train_images/'
	load_path = 'LabelTrainAll.mat'
	dict_keys_data = 'label_train'

	load_data = sio.loadmat(load_path)


	for idx in range(len(load_data[dict_keys_data][0])):
	# for idx in range(400):
		imgName = load_data[dict_keys_data][0][idx][1][0]   # train_0000xxxx.jpg
		# print(imgName)
		# print(load_data[dict_keys_data][0][idx][1][0])

		imgPath = imgflie + imgName
		img = Image.open(imgPath)
		imgWidth, imgHeight = img.size

		xml_file = open(xmlflie + imgName.split(".")[-2] + '.xml', 'w')
		xml_file.write('<annotation>\n')
		xml_file.write('	<folder>' + 'MAFA/' + imgflie + '</folder>\n')
		xml_file.write('	<filename>' + imgName + '</filename>\n')
		xml_file.write('	<size>\n')
		xml_file.write('		<width>' + str(imgWidth) + '</width>\n')
		xml_file.write('		<height>' + str(imgHeight) + '</height>\n')
		xml_file.write('		<depth>' + str(3) + '</depth>\n')
		xml_file.write('	</size>\n')

		bboxLen = len(load_data[dict_keys_data][0][idx][2])

		for i in range(bboxLen):
			x,y,w,h, x1,y1,x2,y2, x3,y3,w3,h3, occ_type, occ_degree, gender, race, orientation, x4,y4,w4,h4 = load_data[dict_keys_data][0][idx][2][i][0:21]
			print(x,y,w,h)
			print(x1,y1,x2,y2)
			print(x3,y3,w3,h3)
			print(occ_type, occ_degree, gender, race, orientation)
			print(x4,y4,w4,h4)

			xml_file.write('	<object>\n')
			xml_file.write('		<name>' + 'face/maskface' + '</name>\n')
			xml_file.write('		<bndbox>\n')
			xml_file.write('			<xmin>' + str(x) + '</xmin>\n')
			xml_file.write('			<ymin>' + str(y) + '</ymin>\n')
			xml_file.write('			<xmax>' + str(x + w) + '</xmax>\n')
			xml_file.write('			<ymax>' + str(y + h) + '</ymax>\n')
			xml_file.write('		</bndbox>\n')
			xml_file.write('	</object>\n')

			xml_file.write('	<object>\n')
			xml_file.write('		<name>' + '2eyes' + '</name>\n')
			xml_file.write('		<bndbox>\n')
			xml_file.write('			<xmin>' + str(x1) + '</xmin>\n')
			xml_file.write('			<ymin>' + str(y1) + '</ymin>\n')
			xml_file.write('			<xmax>' + str(x2) + '</xmax>\n')
			xml_file.write('			<ymax>' + str(y2) + '</ymax>\n')
			xml_file.write('		</bndbox>\n')
			xml_file.write('	</object>\n')
			
			xml_file.write('	<object>\n')
			xml_file.write('		<name>' + 'occluder' + '</name>\n')
			xml_file.write('		<bndbox>\n')
			xml_file.write('			<xmin>' + str(x + x3) + '</xmin>\n')
			xml_file.write('			<ymin>' + str(y + y3) + '</ymin>\n')
			xml_file.write('			<xmax>' + str(x + x3 + w3) + '</xmax>\n')
			xml_file.write('			<ymax>' + str(y + y3 + h3) + '</ymax>\n')
			xml_file.write('		</bndbox>\n')
			xml_file.write('	</object>\n')

		xml_file.write('</annotation>\n')
		xml_file.close()



if __name__ == '__main__':
    testMat2xml()
    trainMat2xml()

原创小脚本，希望对你有所帮助！

(•̀ᴗ•́)و ̑̑点赞

小新GSUNG0222

关注

15
点赞
踩
23

收藏

觉得还不错? 一键收藏
18
评论
[人脸数据集] 数据集处理：将MAFA人脸数据集的.mat标注格式转为.xml格式

一. 数据集分析1.1解压从官方下载的MAFA数据集后，文件结构：train_images/test_images/LabelTrainAll.matLabelTestAll.matreadme-test.txtreadme-train.txt可用看看这个txt文件，解释了标注规范，重点！这个数据集的训练集标注出了有遮挡的人类面部，而没有遮挡的面部没有标出，测试集中，将面部分成了 { 没遮挡的脸、遮挡的脸、 invalid 脸}二. mat 2 xmltrain 和
复制链接

扫一扫