一. 数据集分析
1.1 解压wider_face数据集 和 官方的标注文件
我将数据集文件结构整理为:
(1) wider_face/ 为官方的数据集:
(2) wider_face_split/ 为官方的标注内容:
(3)wider_xml/ 为自己创建的存转化后的xml 文件的位置:
二. txt 2 xml
官方的标注示例:
0--Parade/0_Parade_marchingband_1_849.jpg
1
449 330 122 149 0 0 0 0 0 0
0--Parade/0_Parade_Parade_0_904.jpg
1
361 98 263 339 0 0 0 0 0 0
0--Parade/0_Parade_marchingband_1_799.jpg
21
78 221 7 8 2 0 0 0 0 0
78 238 14 17 2 0 0 0 0 0
113 212 11 15 2 0 0 0 0 0
134 260 15 15 2 0 0 0 0 0
163 250 14 17 2 0 0 0 0 0
201 218 10 12 2 0 0 0 0 0
182 266 15 17 2 0 0 0 0 0
245 279 18 15 2 0 0 0 0 0
304 265 16 17 2 0 0 0 2 1
328 295 16 20 2 0 0 0 0 0
389 281 17 19 2 0 0 0 2 0
406 293 21 21 2 0 1 0 0 0
436 290 22 17 2 0 0 0 0 0
522 328 21 18 2 0 1 0 0 0
643 320 23 22 2 0 0 0 0 0
653 224 17 25 2 0 0 0 0 0
793 337 23 30 2 0 0 0 0 0
535 311 16 17 2 0 0 0 1 0
29 220 11 15 2 0 0 0 0 0
3 232 11 15 2 0 0 0 2 0
20 215 12 16 2 0 0 0 2 0
txt 2 xml 转化代码 txt2xml.py
import scipy.io as sio
import numpy as np
from PIL import Image
arg = 'val'
if arg == 'train':
gtfile = "./wider_face_split/wider_face_train_bbx_gt.txt"
# gtfile = "./wider_face_split/test.txt"
im_folder = "./wider_face/WIDER_train/images/"
xmlflie = "./wider_xml/WIDER_train_xml/"
NoLabelXmlPath = "./wider_xml/WIDER_train_nolabel_xml/"
if arg == 'val':
gtfile = "./wider_face_split/wider_face_val_bbx_gt.txt"
im_folder = "./wider_face/WIDER_val/images/"
xmlflie = "./wider_xml/WIDER_val_xml/"
NoLabelXmlPath = "./wider_xml/WIDER_val_nolabel_xml/"
with open(gtfile, "r") as gt:
jpg_num = 0
picWithoutLabel = 0
picWithoutLabelName = [] *100
while(True):
# gt_con = gt.readline()[:-1]
gt_con = gt.readline()
print(gt_con)
# print(gt_con.split('.')[-1],type(gt_con.split('.')[-1]))
if gt_con.split('.')[-1] == 'jpg\n':
print('aaaaa')
jpg_num += 1
print('jpg_num:',jpg_num)
imgPath = im_folder + gt_con.split('.')[-2] + '.jpg'
# gt_con.split('.')[-2] + '.jpg' 0--Parade/0_Parade_marchingband_1_849.jpg
img = Image.open(imgPath)
imgWidth, imgHeight = img.size
print(imgWidth, imgHeight)
bbox_num = gt.readline()
print('bbox_num:',bbox_num)
xml_file = open(xmlflie + gt_con.split('.')[-2].split('/')[1] + '.xml', 'w')
if int(bbox_num) == 0: # 数据集中存在没有标注的图,把他们找出来
picWithoutLabel += 1
print('picWithoutLabel:', picWithoutLabel, gt_con.split('.')[-2] + '.jpg')
picWithoutLabelName.append(gt_con.split('.')[-2] + '.jpg')
print('picWithoutLabelName:',picWithoutLabelName)
bbox_num = 1
xml_file = open(NoLabelXmlPath + gt_con.split('.')[-2].split('/')[1] + '.xml', 'w')
xml_file.write('<annotation>\n')
xml_file.write(' <folder>' + 'wider_face/' + im_folder + '</folder>\n')
xml_file.write(' <filename>' + gt_con.split('.')[-2] + '.jpg' + '</filename>\n')
xml_file.write(' <size>\n')
xml_file.write(' <width>' + str(imgWidth) + '</width>\n')
xml_file.write(' <height>' + str(imgHeight) + '</height>\n')
xml_file.write(' <depth>' + str(3) + '</depth>\n')
xml_file.write(' </size>\n')
for i in range(int(bbox_num)):
bbox_mess = [] * 10
bbox_mess = gt.readline().split(' ')
print('bbox_mess:',bbox_mess)
x, y, w, h = bbox_mess[0:4]
xml_file.write(' <object>\n')
xml_file.write(' <name>' + 'face' + '</name>\n')
xml_file.write(' <bndbox>\n')
xml_file.write(' <xmin>' + str(x) + '</xmin>\n')
xml_file.write(' <ymin>' + str(y) + '</ymin>\n')
xml_file.write(' <xmax>' + str(int(x) + int(w)) + '</xmax>\n')
xml_file.write(' <ymax>' + str(int(y) + int(h)) + '</ymax>\n')
xml_file.write(' </bndbox>\n')
xml_file.write(' </object>\n')
xml_file.write('</annotation>\n')
xml_file.close()
else:
print('bbbbb')
break
原创小脚本,希望能对你有所帮助!
(•̀ᴗ•́)و ̑̑点赞