一、下载BDD100K数据集
数据集下载地址:http://bdd-data.berkeley.edu
二、json格式转yolo的txt格式
下面是一个bdd100k到yolov5的标注转换代码。其中把’car’,‘bus’,'truck’这三个类合并为了一类,“pedestrian”,“other person”,"rider"作为另一类,并过滤掉了晚上的数据
测试集:
import re
import os
import json
def search_file(data_dir, pattern=r'\.jpg$'):
root_dir = os.path.abspath(data_dir)
for root, dirs, files in os.walk(root_dir):
for f in files:
if re.search(pattern, f, re.I):
abs_path = os.path.join(root, f)
# print('new file %s' % absfn)
yield abs_path
class Bdd2yolov5:
def __init__(self):
self.bdd100k_width = 1280
self.bdd100k_height = 720
self.select_categorys=["pedestrian","other person","rider", "car", "bus", "truck"]
self.cat2id = {
"pedestrian": 0,
"other person": 0,
"rider": 0,
"car": 1,
"bus": 1,
"truck": 1
}
def _filter_by_attr(self, attr=None):
if attr is None:
return False
#过滤掉晚上的图片
if attr['timeofday'] == 'night':
return True
return False
def bdd2yolov5(self, path):
with open(path) as fp:
fp = json.load(fp)
for j in fp:
lines = ""
label_name = j["name"].replace("jpg", "txt")
image_name = j["name"]
if self._filter_by_attr(j['attributes']):
img_path = os.path.join(r"D:\Download\bdd100k\images\100k\train", image_name)
if os.path.exists(img_path):
os.remove(img_path)
continue
for fr in j["labels"]:
dw = 1.0 / self.bdd100k_width
dh = 1.0 / self.bdd100k_height
# for obj in fr["objects"]:
if fr["category"] in self.select_categorys:
idx = self.cat2id[fr["category"]]
cx = (fr["box2d"]["x1"] + fr["box2d"]["x2"]) / 2.0
cy = (fr["box2d"]["y1"] + fr["box2d"]["y2"]) / 2.0
w = fr["box2d"]["x2"] - fr["box2d"]["x1"]
h = fr["box2d"]["y2"] - fr["box2d"]["y1"]
if w<=0 or h<=0:
continue
#根据图片尺寸进行归一化
cx,cy,w,h = cx*dw,cy*dh,w*dw,h*dh
line = f"{idx} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n"
lines += line
if len(lines) != 0:
yolo_txt = os.path.join("./train/train", label_name)
# yolo_txt = path.replace(".json",".txt")
with open(yolo_txt, 'w') as fp2:
fp2.writelines(lines)
if len(lines)==0:
img_path = os.path.join(r"D:\Download\bdd100k\images\100k\train", image_name)
if os.path.exists(img_path):
os.remove(img_path)
if __name__ == "__main__":
bdd_label_dir = "./train"
cvt=Bdd2yolov5()
for path in search_file(bdd_label_dir, r"\.json$"):
cvt.bdd2yolov5(path)
如果图片文件数量和标签文件数量不匹配,删掉多出的一部分图片
import os
def search_file(data_dir, pattern=r'\.jpg$'):
root_dir = os.path.abspath(data_dir)
for root, dirs, files in os.walk(root_dir):
for f in files:
yield f
bdd_label_dir = r"D:\Download\bdd100k\images\100k\train"
for path in search_file(bdd_label_dir, r"\.jpg$"):
txtfile = path.replace("jpg", "txt")
txt_path = os.path.join(r"D:\Download\bdd100k\labels\det_20\train\train", txtfile)
jpg_path = os.path.join(bdd_label_dir, path)
if not os.path.exists(txt_path):
os.remove(jpg_path)
验证集同理,改下文件路径即可
处理完之后: