WiderPerson转YOLO格式
WiderPerson介绍
WiderPerson论文引用格式:
Zhang S, Xie Y, Wan J, et al. Widerperson: A diverse dataset for dense pedestrian detection in the wild[J]. IEEE Transactions on Multimedia, 2019, 22(2): 380-393.
WiderPerson是比较拥挤场景的行人检测基准数据集,是针对户外行人检测的数据集。
数据集共13382张图像,并用各种遮挡标记约40万个注释。
WiderPerson数据集包含行人、骑自行车的人、部分可见人、人群和忽略区域5种标注
该数据集已经划分好train(8000张)、val(1000张)、test(4382张),其中test无标注文件
(注意:000041.jpg.txt文件乱码,训练时可删除)
原论文中数据信息
下载后的文件样式
这里我的路径是F:\WiderPerson (注意哦!)
如果大家和我的路径不一样,下面的代码中F:\WiderPerson替换成自己的路径,
注意和我是同一路径级别,F:\WiderPerson路径后面的不要改动,会自己生成
Annotations
Annotations中文件名类似:000041.jpg.txt,共9000条信息
除去乱码文件000040.jpg.txt,共7999条信息
第一行是该张图片中标注数量
第二行【所属类别 xmin ymin xmax ymax】
将标签可视化在图片上代码:
代码来源
注意:由于00040文件是问题文件,需要将annotations、images、train.txt中对应的删除掉再运行代码
import os
import cv2
if __name__ == '__main__':
path = 'F:\\WiderPerson\\train.txt'
with open(path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = 'F:\\WiderPerson\\Images\\' + img_id + '.jpg'
img = cv2.imread(img_path)
im_h = img.shape[0]
im_w = img.shape[1]
print(img_path)
label_path = img_path.replace('Images','Annotations') + '.txt'
print(label_path)
with open(label_path) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
print(cls)
# < class_label =1: pedestrians > 行人
# < class_label =2: riders > 骑车的
# < class_label =3: partially-visible persons > 遮挡的部分行人
# < class_label =4: ignore regions > 一些假人,比如图画上的人
# < class_label =5: crowd > 拥挤人群,直接大框覆盖了
if cls == 1 or cls == 3:
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)
line = file.readline()
cv2.imshow('result', img)
cv2.waitKey(0)
WiderPerson转YOLO
数据集下载地址
由于WiderPerson数据集内包含五种类别,可取自己所需类别进行转换
此处只留下第一种类别
转格式以及选取类别
import os
from PIL import Image
import shutil
# coding=utf-8
def check_charset(file_path):
import chardet
with open(file_path, "rb") as f:
data = f.read(4)
charset = chardet.detect(data)['encoding']
return charset
def convert(size, box0, box1, box2, box3):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box0 + box2) / 2 * dw
y = (box1 + box3) / 2 * dh
w = (box2 - box0) * dw
h = (box3 - box1) * dh
return (x, y, w, h)
if __name__ == '__main__':
outpath_txt = 'F:\\WiderPerson\\WiderPerson\\label\\val'
# 注意:这里F:\\WiderPerson是你存储文件侧地方,可以替换,后面的不要动
outpath_jpg = 'F:\\WiderPerson\\WiderPerson\\images\\val'
# 注意:这里F:\\WiderPerson是你存储文件侧地方,可以替换,后面的不要动
os.makedirs(outpath_txt)
os.makedirs(outpath_jpg)
path = 'F:\\WiderPerson\\val.txt'
with open(path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = 'F:\\WiderPerson\\Images\\' + img_id + '.jpg'
with Image.open(img_path) as Img:
img_size = Img.size
ans = ''
label_path = img_path.replace('Images', 'Annotations') + '.txt'
outpath = outpath_txt + "\\" + img_id + '.txt'
with open(label_path, encoding=check_charset(label_path)) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
if cls == 1:
# if cls == 1 or cls == 3:
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
print(img_size[0], img_size[1], xmin, ymin, xmax, ymax)
bb = convert(img_size, xmin, ymin, xmax, ymax)
ans = ans + '1' + ' ' + ' '.join(str(a) for a in bb) + '\n'
line = file.readline()
with open(outpath, 'w') as outfile:
outfile.write(ans)
# 想保留原文件用copy
# shutil.copy(img_path, outpath_o + '\\' + img_id + '.jpg')
# 直接移动用这个
shutil.move(img_path, outpath_jpg + '\\' + img_id + '.jpg')
运行完毕后:
运行后将代码27行、29行、34行中val替换成train 再运行一遍!
此处时间较长,耐心等候下啦!
运行完毕后,打开F:\WiderPerson\images文件夹 里面剩余4382张图片
放在F:\WiderPerson\WiderPerson\images\test(自行创建)里面去(其实也可以不要)
关于txt文件的处理:(直接运行)
import os
# 路径
otxt_path = "F:\\WiderPerson\\WiderPerson\\label\\val"
ntxt_path = "F:\\WiderPerson\\WiderPerson\\labels\\val"
os.makedirs(ntxt_path)
filer = []
for root, dirs, files in os.walk(otxt_path):
for i in files:
otxt = os.path.join(otxt_path, i)
ntxt = os.path.join(ntxt_path, i)
f = open(otxt, 'r', encoding='utf-8')
for line in f.readlines():
if line == '\n':
continue
cls = line.split(" ")
# cls = '%s'%(int(cls[0])-1) + " " + cls[1]+ " " + cls[2]+ " " + cls[3]+ " " + cls[4]
cls = '0' + " " + cls[1]+ " " + cls[2]+ " " + cls[3]+ " " + cls[4]
filer.append(cls)
with open(ntxt,"a") as f:
for i in filer:
f.write(i)
filer = []
注意代码注释掉的第30行:
cls = '%s'%(int(cls[0])-1) + " " + cls[1]+ " " + cls[2]+ " " + cls[3]+ " " + cls[4]
如果你的类别留的是1、2、3,这种多类别,此行代码是将其变为 0、1、2
主要针对YOLO算法的标签要从0开始的这个规定
而如果你需要将1、2、3类别都当做是一类
那么不用解开注释,这段代码也相当于是归类处理了
最终得到:
将val换成train再来一遍!!
OK! 此时,文件夹label就可以删除了
生成yolo格式的train、val.txt 里面存储图片路径
我们还需要最后一步!
sets = ['train', 'val']
labels_path = "F:\\WiderPerson\\WiderPerson\\labels"
txt_path = "F:\\WiderPerson\\WiderPerson\\labels"
for image_set in sets:
image_i = []
for image_ids in os.listdir(labels_path + '\\%s' % (image_set)):
_name = image_ids.split(".")[0]
image_i.append(_name)
list_file = open(txt_path + '\\%s.txt' % (image_set), 'a')
for c_id in image_i:
print(c_id)
list_file.write('最终项目的位置' + '/%s/%s.jpg\n' % (image_set,c_id))
# 举例:G:\\yolov5-main\\datasets\\images
list_file.close()
将F:\WiderPerson\WiderPerson文件夹命名为datasets,放置在项目中。