项目开发流程
1.数据集
-
1.1.数据集选择:使用百度PP飞浆上的数据集,数据集地址:https://aistudio.baidu.com/datasetdetail/105521
-
1.2.数据集包含:Fatigue.zip,压缩包包含:Annotations和JPEGImages,分别是标注文件和图片文件
-
1.3.数据集说明:图片2915张,标注文件2914个,图片尺寸为640*4840,标签格式为xml,包含类别、坐标、宽高等信息
-
1.4.数据平衡:数据集类别’closed_eye’, 'open_eye’这一对比较平衡,‘closed_mouth’,'open_mouth’不平衡。
-
1.5.数据集预处理:数据集预处理代码在train3.ipynb中,大致流程:
-
1.5.1.压缩包读取并解压
-
1.5.2. 数据集中’JPEGImages’文件名换成’images’以适配YOLOv8的要求
-
1.5.3. 删除没有标签的图片,最终仅删除一张图片
-
1.5.4. 转换VOC格式数据为YOLO格式
-
1.5.5. 绘制数据集饼图,‘closed_mouth’,'open_mouth’这一对不平衡
-
1.5.6. 8:2分割训练集和测试集
-
1.5.7. 将数据集路径写入文本文件train_images.txt、val_images.txt、train_labels.txt、val_labels.txt
-
1.5.8. 生成my_det_data.yaml文件
-
1.5.9. 数据增强:使用数据增强,使用参数mosaic=0.7, mixup=0.3,copy_paste=0.2
与使用YOLOV8默认的数据增强参数对比图
-
2、模型
-
2.1. 模型选择:使用YOLOv8模型
-
2.2. 选择原因:YOLOv8模型具有丰富的功能,便于未来扩展安全驾驶相关的其他任务,比如是否一边开车一边玩手机、是否酒驾、是否系安全带等危险驾驶行为。并且YOLOv8具有高精度和实时检测的特点。它能够在较短时间内处理大量图像数据,非常适合需要快速响应的疲劳驾驶检测。
-
2.3. yolov8模型参数配置
- ‘train’: ‘paths/train_images.txt’,
- ‘val’: ‘paths/val_images.txt’,
- ‘nc’: 4,
- ‘names’: [‘closed_eye’, ‘open_eye’,‘closed_mouth’,‘open_mouth’]
-
2.4. 加载yolov8模型model = YOLO(“yolov8n.yaml”)
3、训练参数设置
- epochs=30
- imgsz=640
- batch=16
- workers=2
- mosaic=0.7
- mixup=0.3
- copy_paste=0.2
4、模型训练
- 4.1. 使用趋动云最低配置算力规格:B1.small
- 4.2. 将train3.ipynb挂在趋动云上,进入开发环境调试运行
- 4.3. 用时18分钟
- train3.ipynb代码
import os
if not os.path.exists('dataset'):
!unzip /gemini/data-1/Fatigue.zip
def rename_folder(dataset_path, old_name, new_name):
jpeg_images_folder = os.path.join(dataset_path, old_name)
if os.path.exists(jpeg_images_folder):
images_folder = os.path.join(dataset_path, new_name)
os.rename(jpeg_images_folder, images_folder)
print(f"Folder '{old_name}' has been renamed to '{new_name}'.")
else:
print(f"Folder '{old_name}' does not exist.")
dataset_path = 'dataset/'
rename_folder(dataset_path, 'JPEGImages', 'images')
from ultralytics import YOLO
from glob import glob
from sklearn.model_selection import train_test_split
import yaml
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
# 获取所有图像文件和标注文件路径
image_files = glob(os.path.join(dataset_path, 'images', '*.jpg'))
annotation_files = glob(os.path.join(dataset_path, 'Annotations', '*.xml'))
print(len(image_files))
# 处理没有标签的图片
for pic in image_files:
anno_name = (pic.split('.')[0] + '.xml').replace('images','Annotations')
if anno_name not in annotation_files:
os.remove(os.path.join(pic))
# 重置所有图像文件和标注文件路径并查看处理后的数据集的数量
image_files = glob(os.path.join(dataset_path, 'images', '*.jpg'))
annotation_files = glob(os.path.join(dataset_path, 'Annotations', '*.xml'))
print(len(image_files),len(annotation_files))
# 转换VOC格式数据为YOLO格式
import xml.etree.ElementTree as ET
classes = ['closed_eye', 'open_eye','closed_mouth','open_mouth']
# 初始化每个类别的计数
nums = [0] * len(classes)
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open(dataset_path+'Annotations/%s.xml' % (image_id))
out_file = open(dataset_path+'labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
nums[cls_id] += 1
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text),
float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if not os.path.exists(dataset_path+'labels/'):
os.makedirs(dataset_path+'labels/')
for filename in annotation_files:
if filename.endswith('.xml'):
# file_path = os.path.join(path, filename)
image_id = os.path.splitext(os.path.basename(filename))[0]
convert_annotation(image_id)
# 数据集分布
import matplotlib.pyplot as plt
print(classes,nums)
# 数据集标签和对应的数量
sizes = nums
# 生成饼状图
plt.figure(figsize=(8, 8))
plt.pie(sizes, labels=classes, autopct='%1.1f%%', startangle=140)
plt.axis('equal') # 使饼图为圆形
plt.title('数据集分布')
plt.show()
# 进行8:2分割
labels_files = glob(os.path.join(dataset_path, 'labels', '*.txt'))
train_images, val_images, train_labels, val_labels = train_test_split(
image_files, labels_files, test_size=0.2, random_state=42)
# 将文件路径写入文本文件
dataset_path = 'datasets/'
if not os.path.exists(dataset_path+'paths/'):
os.makedirs(dataset_path+'paths/')
def write_file_list(file_list, filename):
filename = os.path.join(dataset_path,'paths', filename)
with open(filename, 'w') as f:
for file in file_list:
f.write(f"{file}\n")
write_file_list(train_images, 'train_images.txt')
write_file_list(val_images, 'val_images.txt')
write_file_list(train_labels, 'train_labels.txt')
write_file_list(val_labels, 'val_labels.txt')
# 生成my_det_data.yaml文件
data = {
'train': 'paths/train_images.txt',
'val': 'paths/val_images.txt',
'nc': 4,
'names': ['closed_eye', 'open_eye','closed_mouth','open_mouth']
}
with open('Fatigue_det_data.yaml', 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
# Load a model
model = YOLO("yolov8n.yaml")
# 优化参数方案二:增加数据增强、轮数、batch
results = model.train(data="Fatigue_det_data.yaml",
epochs=30,
imgsz=640,
batch=16,
workers=2,
mosaic=0.7, # 增加mosaic数据增强
mixup=0.3, # 增加mixup数据增强
copy_paste=0.2) # 增加copy-paste数据增强
5、训练结果
6、项目开源体验
我在趋动云社区发布了一个开源项目,一起来体验下吧
疲劳驾驶模型训练v3.2:https://open.virtaicloud.com/web/project/detail/471879287792889856
7、模型推理
使用训练好的best.pt模型推理,摄像头实时捕捉
7.1. 图片演示
7.2. 摄像头演示
运行fatigue_driving\inference.py
# 1,引入 YOLO 类
from ultralytics import YOLO
import cv2
# 2,加载预训练权重
model = YOLO('best.pt')
# 建立连接
cap = cv2.VideoCapture(0)
while cap.isOpened():
# 3,读取视频帧
# 读取一帧,返回:读取状态,图像数据
status, frame = cap.read()
if not status:
print('读取失败')
break
# 4,模型预测
results = model(frame)
# 5,绘制预测结果
img = results[0].plot()
# 6,显示图像
cv2.imshow('frame', img)
# 按键等待
if cv2.waitKey(int(1000/24)) == 27:
break
# 释放资源
cap.release()
cv2.destroyAllWindows()