一. 数据集分析
1.1解压从官方下载的MAFA数据集后,文件结构:
(官方链接已失效,博主分享出来链接:https://pan.baidu.com/s/1HaBc5ybGG7X429Nzz-2CUg
提取码:j58x)
train_images/
test_images/
LabelTrainAll.mat
LabelTestAll.mat
readme-test.txt
readme-train.txt
可用看看这个txt文件,解释了标注规范,重点!这个数据集的训练集标注出了有遮挡的人类面部,而没有遮挡的面部没有标出,测试集中,将面部分成了 { 没遮挡的脸、遮挡的脸、 invalid 脸}
二. mat 2 xml
.mat的数据标注格式用起来总会不爽,改成常用的xml非常重要。
先把装xml的文件夹建好,
test_xml/, train_xml/
train 和 test的数据集标注略有不同,作者的txt文件也解释得相当清楚了,所以直接把代码拿出来看吧
import scipy.io as sio
import numpy as np
from PIL import Image
label_map = {'1':'face_mask','1.0':'face_mask','2':'face','2.0':'face','3':'invalid','3.0':'invalid'}
def testMat2xml():
xmlflie = 'test_xml/'
imgflie = 'test_images/'
load_path = 'LabelTestAll.mat'
dict_keys_data = 'LabelTest'
load_data = sio.loadmat(load_path)
# print(load_data.keys())
# dict_keys(['__header__', '__version__', '__globals__', 'LabelTest'])
# dict_keys(['__header__', '__version__', '__globals__', 'label_train'])
for idx in range(len(load_data[dict_keys_data][0])):
# print(len(load_data[dict_keys_data][0]))
imgName = load_data[dict_keys_data][0][idx][0][0] # test_00004934.jpg
# print(imgName)
imgPath = imgflie + imgName
img = Image.open(imgPath)
imgWidth, imgHeight = img.size
xml_file = open(xmlflie + imgName.split(".")[-2] + '.xml', 'w')
xml_file.write('<annotation>\n')
xml_file.write(' <folder>' + 'MAFA/' + imgflie + '</folder>\n')
xml_file.write(' <filename>' + imgName + '</filename>\n')
xml_file.write(' <size>\n')
xml_file.write(' <width>' + str(imgWidth) + '</width>\n')
xml_file.write(' <height>' + str(imgHeight) + '</height>\n')
xml_file.write(' <depth>' + str(3) + '</depth>\n')
xml_file.write(' </size>\n')
bboxLen = len(load_data[dict_keys_data][0][idx][2]) # 每张图标有几个bbox
# print(bboxLen)
for bbox in range(bboxLen):
x,y,w,h,label = load_data[dict_keys_data][0][idx][2][bbox][0:5]
# print(x,y,w,h,label)
# print(str(label_map[str(label)]))
xml_file.write(' <object>\n')
xml_file.write(' <name>' + str(label_map[str(label)]) + '</name>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(x) + '</xmin>\n')
xml_file.write(' <ymin>' + str(y) + '</ymin>\n')
xml_file.write(' <xmax>' + str(x + w) + '</xmax>\n')
xml_file.write(' <ymax>' + str(y + h) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write('</annotation>\n')
xml_file.close()
def trainMat2xml():
xmlflie = 'train_xml/'
imgflie = 'train_images/'
load_path = 'LabelTrainAll.mat'
dict_keys_data = 'label_train'
load_data = sio.loadmat(load_path)
for idx in range(len(load_data[dict_keys_data][0])):
# for idx in range(400):
imgName = load_data[dict_keys_data][0][idx][1][0] # train_0000xxxx.jpg
# print(imgName)
# print(load_data[dict_keys_data][0][idx][1][0])
imgPath = imgflie + imgName
img = Image.open(imgPath)
imgWidth, imgHeight = img.size
xml_file = open(xmlflie + imgName.split(".")[-2] + '.xml', 'w')
xml_file.write('<annotation>\n')
xml_file.write(' <folder>' + 'MAFA/' + imgflie + '</folder>\n')
xml_file.write(' <filename>' + imgName + '</filename>\n')
xml_file.write(' <size>\n')
xml_file.write(' <width>' + str(imgWidth) + '</width>\n')
xml_file.write(' <height>' + str(imgHeight) + '</height>\n')
xml_file.write(' <depth>' + str(3) + '</depth>\n')
xml_file.write(' </size>\n')
bboxLen = len(load_data[dict_keys_data][0][idx][2])
for i in range(bboxLen):
x,y,w,h, x1,y1,x2,y2, x3,y3,w3,h3, occ_type, occ_degree, gender, race, orientation, x4,y4,w4,h4 = load_data[dict_keys_data][0][idx][2][i][0:21]
print(x,y,w,h)
print(x1,y1,x2,y2)
print(x3,y3,w3,h3)
print(occ_type, occ_degree, gender, race, orientation)
print(x4,y4,w4,h4)
xml_file.write(' <object>\n')
xml_file.write(' <name>' + 'face/maskface' + '</name>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(x) + '</xmin>\n')
xml_file.write(' <ymin>' + str(y) + '</ymin>\n')
xml_file.write(' <xmax>' + str(x + w) + '</xmax>\n')
xml_file.write(' <ymax>' + str(y + h) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write(' <object>\n')
xml_file.write(' <name>' + '2eyes' + '</name>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(x1) + '</xmin>\n')
xml_file.write(' <ymin>' + str(y1) + '</ymin>\n')
xml_file.write(' <xmax>' + str(x2) + '</xmax>\n')
xml_file.write(' <ymax>' + str(y2) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write(' <object>\n')
xml_file.write(' <name>' + 'occluder' + '</name>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(x + x3) + '</xmin>\n')
xml_file.write(' <ymin>' + str(y + y3) + '</ymin>\n')
xml_file.write(' <xmax>' + str(x + x3 + w3) + '</xmax>\n')
xml_file.write(' <ymax>' + str(y + y3 + h3) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write('</annotation>\n')
xml_file.close()
if __name__ == '__main__':
testMat2xml()
trainMat2xml()
原创小脚本,希望对你有所帮助!
(•̀ᴗ•́)و ̑̑点赞