VisDrone是天津大学举办的比赛,应该有好几届了。这是我第一次参加这种竞赛,由于时间与毕业季冲突了一部分,小组成员也都有些其他事情要忙,导致真正的用于比赛的时间只有不到四天。我们选择的赛道是目标检测,使用的模型是yolov8,因为时间紧迫,所以对模型一点改进都没有,调参也都不会,所以最终要求就是有个结果正常完赛就行。然后是一些竞赛小白的踩坑与过程记录吧。
写在前头的小建议:
- 不要在自己的电脑上将文件传来传去,使用一台可以远程的机子
- 代码与超参数做好版本保护
- 对每一版结果做好备注,以免混淆
代码获取
git clone https://github.com/ultralytics/ultralytics.git
目录结构
注意dataset位置要放正确,需要跑的脚本都在detect目录下,参数及其他配置在cfg目录。
├─.github
├─datasets
│ └─VisDrone2019
│ ├─VisDrone2019-DET-test-dev
│ │ ├─annotations
│ │ ├─images
│ │ └─labels
│ ├─VisDrone2019-DET-train
│ │ ├─annotations
│ │ ├─images
│ │ └─labels
│ └─VisDrone2019-DET-val
│ ├─annotations
│ ├─images
│ └─labels
├─docker
├─docs
├─examples
├─tests
└─ultralytics
├─datasets
│ ├─datasets
│ ├─VisDrone.yaml
└─yolo
├─cfg
└─v8
└─detect
数据集
VisDrone官方提供的数据集标注格式为
[
x
,
y
,
w
,
h
]
[x,y,w,h]
[x,y,w,h]类型,需要将其转换为yolo数据格式。我们的代码直接copy的这位大佬,并在此基础上继续实现yolo格式转VisDrone。具体代码如下:
VisDrone.yaml
# Ultralytics YOLO 🚀, AGPL-3.0 license
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
# Example usage: yolo train data=VisDrone.yaml
# parent
# ├── ultralytics
# └── datasets
# └── VisDrone ← downloads here (2.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: F:\yolov8\datasets\VisDrone2019 # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# Classes
names:
0: pedestrian
1: people
2: bicycle
3: car
4: van
5: truck
6: tricycle
7: awning-tricycle
8: bus
9: motor
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import os
from pathlib import Path
from ultralytics.yolo.utils.downloads import download
def visdrone2yolo(dir):
from PIL import Image
from tqdm import tqdm
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
fl.writelines(lines) # write label.txt
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir, curl=True, threads=4)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
VisDrone2Yolo.py
from PIL import Image
from tqdm import tqdm
from pathlib import Path
import os
def visdrone2yolo(dir):
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1 # 类别号-1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
fl.writelines(lines) # write label.txt
dir = Path('./visdrone/exp928/') # datasets文件夹下Visdrone2019文件夹目录
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
Yolo2VisDrone.py
from PIL import Image
from tqdm import tqdm
from pathlib import Path
import os
def yolo2visdrone(dir):
def convert_box(size, box):
return int((box[0] - box[2] / 2) * size[0]), int((box[1] - box[3] / 2) * size[1]), int(box[2] *size[0]), int(box[3] * size[1])
(dir / 'post_labels').mkdir(parents=True, exist_ok=True)
pbar = tqdm((dir / 'labels').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file:
for row in [x.split(' ') for x in file.read().strip().splitlines()]:
cls = int(row[0]) + 1
box = convert_box(img_size, tuple(map(float, row[1:5])))
conf = float(row[5])
occ = -1
jieduan = -1
lines.append(f"{','.join(f'{x}' for x in box)},{conf:.4f},{cls},{occ},{jieduan}\n")
with open(str(f).replace(os.sep +'labels' + os.sep, os.sep + 'post_labels' + os.sep), 'w') as fl:
fl.writelines(lines)
if lines == []:
with open(str(f).replace(os.sep + 'labels' + os.sep, os.sep + 'post_labels' + os.sep), 'w') as fl:
fl.writelines(lines)
dir = Path('./visdrone/exp928/') # 预测的labels存放目录
yolo2visdrone(dir)
实验部分
train.py
需要注意数据集位置的问题,一定严格按照要求放置数据及位置
由于时间不足,我们直接在yolov8官方提供的yolov8n.pt
进行改善,继续训练100个epoch,没有改变默认参数配置。显卡使用的是2080Ti,大概需要4.5小时。我们最终提交也是这版结果,后续有继续尝试改善,旦无果。
在此基础上修改部分参数,继续训练300个epoch,但效果更差过拟合。
我们也尝试将数据集进行切割以改善小目标的检测效果,但在训练过程中直接早停,从验证集来看效果更差了,这可能与数据集切割是否正确有关系,也可能是其他参数的设置,或者是因为我们从头训练,刚开始没有早停的必要。
注意如果是windows,需要将default.yaml
中的workers
设置为0
predict.py
default.yaml
需要将与结果保存相关的参数设置为True
# Prediction settings --------------------------------------------------------------------------------------------------
source: # 填入预测数据集路径
save_txt: True # (bool) save results as .txt file
save_conf: True # (bool) save results with confidence scores
- 修改engine的
results.py
的save_txt
函数以正常保存没有检测结果的空文件
def save_txt(self, txt_file, save_conf=False):
boxes = self.boxes
masks = self.masks
probs = self.probs
kpts = self.keypoints
texts = []
if probs is not None:
# Classify
[texts.append(f'{probs.data[j]:.2f} {self.names[j]}') for j in probs.top5]
elif boxes:
# Detect/segment/pose
for j, d in enumerate(boxes):
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
line = (c, *d.xywhn.view(-1))
if masks:
seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
line = (c, *seg)
if kpts is not None:
kpt = kpts[j].xyn.reshape(-1).tolist()
line += (*kpt, )
line += (conf, ) * save_conf + (() if id is None else (id, ))
texts.append(('%g ' * len(line)).rstrip() % line)
with open(txt_file, 'a') as f:
f.writelines(text + '\n' for text in texts)
提交与结果
- 使用
Yolo2VisDrone.py
对predict
结果进行后处理以符合提交格式 - 提交需要写
description.doc
文档,主要是算法描述与团队信息
最终结果惨不忍睹😂
总结or碎碎念
- 去雾、去雨、色彩增强
雾、雨等自然环境对图片质量会产生显著影响,可以在预处理时增加去雾去雨操作 - 目标过多、存在大量重叠区域(密集)
当目标数量较少时,效果还是可以的,但是人一多就会特别多的重叠,也很难区分效果好不好
- 类别区分度
大致过了一些检测效果图发现,自行车和摩托车、三轮车等都很难区分(我自己也不是很能分得清),如何改善也没个想法QAQ
最终还是第一版的结果最好,不太理解但也接受了。在看检测结果时还发现存在将背包也检测成行人,在暗环境中无法检测到的问题,继续学习与改进吧。