一、前言
安全帽佩戴检测是工业安全中的一个重要课题。通过计算机视觉技术,尤其是深度学习模型,如YOLO(You Only Look Once)系列,可以实现实时、高效的安全帽佩戴检测。YOLOv5是YOLO系列中的最成熟的版本,具有更高的检测精度和速度。目前,把安全帽检测部署在边缘检测设备,如Jetson nano,集成到智能监控设备中,通过蓝牙串口将没有正确佩戴安全帽的工人照片发送到电脑端。
二、数据集训练
1、数据标注
将采集到的图片数据集进行数据标注,共有5975张图片,其中有3163张戴安全帽的样本,2812张不戴安全帽的样本。数据标注后以VOC格式存储在labels文件夹中。
dataset
train
images
labels
labels.cache
val
images
labels
labels.cache
2、数据训练
在yolov5/data文件夹里面,新建一个记事本,新建一个hat.yaml文件。
train: ../hatdataset/train/images/
val: ../hatdataset/val/images/
# test: ../dataset/test/images/
# Classes
names:
0: hat
1: nohat
修改yolov5/models/yolov5n.yaml文件,将nc的值改为2。相比于Yolov5s训练网络,选用Yolov5n作为训练,v5n网络结构模型小,专为 Nano 设备(如 NVIDIA Jetson Nano)进行优化。YOLOv5n 在保持较快速度的同时,提供适用于边缘设备的准确度。
YOLOv5s与YOLOv5n对比:
layers | parameters | gradients | FLOPS | 模型大小 | 平均检测时间 | |
v5s | 157 | 7015519 | 7015519 | 15.8G | 14039KB | 220ms |
v5n | 157 | 1867405 | 1867405 | 4.5G | 3737KB | 100ms |
注:平均检测时间为Jetson nano检测一张640x480图片的平均时间。
YOLOv5s与YOLOv5n训练结果对比:
P | R | mAP50 | mAP50-95 | |
v5n | 0.92 | 0.862 | 0.912 | 0.591 |
v5s | 0.912 | 0.826 | 0.879 | 0.547 |
使用v5s训练结果:
使用v5n训练结果:
对比可得:因为v5s收敛速度较快,如果想在较少的时间获得较大的精度模型,可以使用v5s作为训练,但是我们是部署在边缘计算设备上,设备的性能有一定的局限性,由训练结果可知相比于v5s模型,可以在损失4%的精度下,得到120%的检测速度提升。
3、训练过程
在yolov5打开终端,输入:
conda activate yolov5
python train.py --img 640 --batch 4 --epoch 100 --data data/hat.yaml --cfg models/yolov5n.yaml --weights weights/yolov5n.pt
结果输出:
三、模型部署
1、转化为onnx模型
将模型转化为onnx模型部署于Jetson nano,虽然没什么速度提升,但是可以减小代码量,提高移植性。
python export.py --weights best.pt --include onnx --opset 11
2、环境安装
安装torch可以看我另一篇传送门。
安装完torch后,可以直接pip install onnxruntime-gpu
3、模型推理
将模型传输到Jetson nano后,创建python文件,下面展示了从图像读取、预处理、推理、非极大抑制到结果绘制的完整流程。关键点包括使用ONNX Runtime进行模型推理,图像的预处理(归一化和格式转换),以及(非极大抑制和绘制检测框)。每一步都保证了图像数据正确传递和处理。推理过程:
# -*- coding: UTF-8 -*-
import cv2
import numpy as np
import onnxruntime
CLASSES = ['hat', 'nohat']
colors = {
'hat': (255, 0, 255),
'nohat': (111, 255, 0)
}
class YOLOV5():
def __init__(self, onnxpath):
self.onnx_session = onnxruntime.InferenceSession(onnxpath, providers=[
'CUDAExecutionProvider']) # , providers=['CPUExecutionProvider']
self.input_name = self.get_input_name()
self.output_name = self.get_output_name()
# -------------------------------------------------------
# 获取输入输出的名字
# -------------------------------------------------------
def get_input_name(self):
input_name = []
for node in self.onnx_session.get_inputs():
input_name.append(node.name)
return input_name
def get_output_name(self):
output_name = []
for node in self.onnx_session.get_outputs():
output_name.append(node.name)
return output_name
# -------------------------------------------------------
# 输入图像
# -------------------------------------------------------
def get_input_feed(self, img_tensor):
input_feed = {}
for name in self.input_name:
input_feed[name] = img_tensor
return input_feed
# -------------------------------------------------------
# 1.cv2读取图像并resize
# 2.图像转BGR2RGB和HWC2CHW
# 3.图像归一化
# 4.图像增加维度
# 5.onnx_session 推理
# -------------------------------------------------------
def inference(self, frame):
img_o = frame.copy()
or_img = cv2.resize(frame, (640, 640))
img = or_img[:, :, ::-1].transpose(2, 0, 1) # BGR2RGB和HWC2CHW
img = img.astype(dtype=np.float32)
img /= 255.0
img = np.expand_dims(img, axis=0)
input_feed = self.get_input_feed(img)
pred = self.onnx_session.run(None, input_feed)[0]
return pred, img_o
# dets: array [x,6] 6个值分别为x1,y1,x2,y2,score,class
# thresh: 阈值
def nms(dets, thresh):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
# -------------------------------------------------------
# 计算框的面积
# 置信度从大到小排序
# -------------------------------------------------------
areas = (y2 - y1 + 1) * (x2 - x1 + 1)
scores = dets[:, 4]
keep = []
index = scores.argsort()[::-1]
while index.size > 0:
i = index[0]
keep.append(i)
# -------------------------------------------------------
# 计算相交面积
# 1.相交
# 2.不相交
# -------------------------------------------------------
x11 = np.maximum(x1[i], x1[index[1:]])
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22 - x11 + 1)
h = np.maximum(0, y22 - y11 + 1)
overlaps = w * h
# -------------------------------------------------------
# 计算该框与其它框的IOU,去除掉重复的框,即IOU值大的框
# IOU小于thresh的框保留下来
# -------------------------------------------------------
ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
idx = np.where(ious <= thresh)[0]
index = index[idx + 1]
return keep
def xywh2xyxy(x):
# [x, y, w, h] to [x1, y1, x2, y2]
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def filter_box(org_box, conf_thres, iou_thres): # 过滤掉无用的框
# -------------------------------------------------------
# 删除为1的维度
# 删除置信度小于conf_thres的BOX
# -------------------------------------------------------
org_box = np.squeeze(org_box)
conf = org_box[..., 4] > conf_thres
box = org_box[conf == True]
# -------------------------------------------------------
# 通过argmax获取置信度最大的类别
# -------------------------------------------------------
cls_cinf = box[..., 5:]
cls = []
for i in range(len(cls_cinf)):
cls.append(int(np.argmax(cls_cinf[i])))
all_cls = list(set(cls))
# -------------------------------------------------------
# 分别对每个类别进行过滤
# 1.将第6列元素替换为类别下标
# 2.xywh2xyxy 坐标转换
# 3.经过非极大抑制后输出的BOX下标
# 4.利用下标取出非极大抑制后的BOX
# -------------------------------------------------------
output = []
for i in range(len(all_cls)):
curr_cls = all_cls[i]
curr_cls_box = []
curr_out_box = []
for j in range(len(cls)):
if cls[j] == curr_cls:
box[j][5] = curr_cls
curr_cls_box.append(box[j][:6])
curr_cls_box = np.array(curr_cls_box)
# curr_cls_box_old = np.copy(curr_cls_box)
curr_cls_box = xywh2xyxy(curr_cls_box)
curr_out_box = nms(curr_cls_box, iou_thres)
for k in curr_out_box:
output.append(curr_cls_box[k])
output = np.array(output)
return output
def draw(image, box_data):
if len(box_data) > 0:
# -------------------------------------------------------
# 取整,方便画框
# -------------------------------------------------------
boxes = box_data[..., :4].astype(np.int32)
scores = box_data[..., 4]
classes = box_data[..., 5].astype(np.int32)
img_height_o = image.shape[0]
img_width_o = image.shape[1]
x_ratio = img_width_o / 640
y_ratio = img_height_o / 640
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = box
print('class: {}, score: {}'.format(CLASSES[cl], score))
print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
top = int(top * x_ratio)
right = int(right * x_ratio)
left = int(left * y_ratio)
bottom = int(bottom * y_ratio)
cv2.rectangle(image, (top, left), (right, bottom), colors[CLASSES[cl]], 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left),
cv2.FONT_HERSHEY_COMPLEX,
0.6, colors[CLASSES[cl]], 2)
else:
# 处理没有检测到任何目标的情况
print("No objects detected.")
运行结果:
四、蓝牙串口连接
1、硬件连接
因为是蓝牙串口,蓝牙5v连接5v,GND连接GND,TX连接RX,RX连接TX。连接PIN4,6,8,10口。
2、发送逻辑
2.1 检测端逻辑
1、在检测函数添加,如果检测到有人戴了安全帽,则将这一帧检测后的图片保存
2、命名为./img/当前时间.jpg
2.2 发送端逻辑
1、安装串口通信库函数
2、打开串口,设置波特率为115200,HC-05最大支持115200波特率,每秒可以发送14KB数据
3、使用多线程,读取最新一张图片,先发送图片大小,如果这张图片发送而又没有新图片,则等待
4、以字节方式读取图片,然后发送
2.3 接收端逻辑
1、连接蓝牙
2、打开串口,设置波特率,连接
3、如果收到图片信息,则保存为当前时间.jpg
3、Jetson nano端
修改检测函数:
def draw(image, box_data):
if len(box_data) > 0:
# -------------------------------------------------------
# 取整,方便画框
# -------------------------------------------------------
boxes = box_data[..., :4].astype(np.int32)
scores = box_data[..., 4]
classes = box_data[..., 5].astype(np.int32)
img_height_o = image.shape[0]
img_width_o = image.shape[1]
x_ratio = img_width_o / 640
y_ratio = img_height_o / 640
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = box
print('class: {}, score: {}'.format(CLASSES[cl], score))
print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
top = int(top * x_ratio)
right = int(right * x_ratio)
left = int(left * y_ratio)
bottom = int(bottom * y_ratio)
cv2.rectangle(image, (top, left), (right, bottom), colors[CLASSES[cl]], 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left),
cv2.FONT_HERSHEY_COMPLEX,
0.6, colors[CLASSES[cl]], 2)
current_time = datetime.now().strftime("%Y%m%%d%H%M%S")
filename = f"./img/{current_time}.jpg"
cv2.imwrite(filename, image)
else:
# 处理没有检测到任何目标的情况
print("No objects detected.")
发送函数。先把图片解码成二进制数据,再发送:
打开串口权限:sudo chmod 777 /dev/ttyTHS1
import threading
import time
import os
import serial
from PIL import Image
def send_image(ser, image_path):
try:
start = time.time()
with open(image_path, 'rb') as img_file:
image_data = img_file.read()
image_size = len(image_data)
ser.write(f"{image_size}\n".encode('utf-8'))
ser.write(image_data)
end = time.time()
print(image_path)
print(f"Image data of size {image_size} bytes sent in {end - start} seconds.")
except serial.SerialException as e:
print(f"Error: {e}")
def monitor_directory(directory, ser):
last_file = None
while True:
try:
files = os.listdir(directory)
jpg_files = [f for f in files if f.endswith('.jpg')]
if not jpg_files:
continue
latest_file = max(jpg_files, key=lambda f: os.path.getmtime(os.path.join(directory, f)))
if latest_file != last_file:
image_path = os.path.join(directory, latest_file)
img = Image.open(image_path)
img = img.resize((400, 300), Image.ANTIALIAS)
img.save(image_path)
send_image(ser, image_path)
last_file = latest_file
except Exception as e:
print(f"Error: {e}")
time.sleep(1)
if __name__ == "__main__":
ser = serial.Serial(
port="/dev/ttyTHS1",
baudrate=115200
)
time.sleep(1)
monitor_thread = threading.Thread(target=monitor_directory, args=("img", ser))
monitor_thread.start()
monitor_thread.join()
运行函数要sudo python main.py。
五、PC端接收
电脑打开蓝牙连接,选择你的蓝牙型号,连接。
此时,蓝牙模块上的灯不闪就说明已经连接上了。
1、使用ttk创建交互界面
该界面有串口打开,串口选择,图像读取,接收功能。
def __init__(self, root):
self.root = root
self.root.title("蓝牙收发")
self.com_port = tk.StringVar()
self.baudrate = tk.IntVar(value=9600)
self.serial_connection = None
self.img_dir = './img'
# COM port selection frame
self.frame = ttk.LabelFrame(root, text="Select COM Port")
self.frame.grid(column=0, row=0, padx=10, pady=10)
self.combobox = ttk.Combobox(self.frame, textvariable=self.com_port)
self.combobox.grid(column=0, row=0, padx=10, pady=10)
self.search_button = ttk.Button(self.frame, text="Search COM Ports", command=self.search_com_ports)
self.search_button.grid(column=1, row=0, padx=10, pady=10)
self.open_button = ttk.Button(self.frame, text="Open Port", command=self.open_port)
self.open_button.grid(column=2, row=0, padx=10, pady=10)
# Baudrate selection
self.baudrate_label = ttk.Label(self.frame, text="Select Baudrate:")
self.baudrate_label.grid(column=0, row=1, padx=10, pady=10)
self.baudrate_combobox = ttk.Combobox(self.frame, textvariable=self.baudrate, values=[9600, 115200])
self.baudrate_combobox.grid(column=1, row=1, padx=10, pady=10)
self.baudrate_combobox.current(0)
# Close port button
self.close_button = ttk.Button(self.frame, text="Close Port", command=self.close_port)
self.close_button.grid(column=2, row=1, padx=10, pady=10)
# Sending frame
self.send_frame = ttk.LabelFrame(root, text="Send Data")
self.send_frame.grid(column=0, row=2, padx=10, pady=10)
self.send_entry = ttk.Entry(self.send_frame, width=30)
self.send_entry.grid(column=0, row=0, padx=10, pady=10)
self.send_button = ttk.Button(self.send_frame, text="Send", command=self.send_data)
self.send_button.grid(column=1, row=0, padx=10, pady=10)
# Receiving frame
# self.receive_frame = ttk.LabelFrame(root, text="Received Data")
# self.receive_frame.grid(column=0, row=3, padx=10, pady=10)
#
# self.receive_text = scrolledtext.ScrolledText(self.receive_frame, width=40, height=10, wrap=tk.WORD)
# self.receive_text.grid(column=0, row=0, padx=10, pady=10)
# Start receiving data in a separate thread
self.running = True
self.receive_thread = threading.Thread(target=self.receive_data)
self.receive_thread.start()
self.current_image_name = None
# Show image frame
self.show_image_frame()
2、解析二进制图片数据
def receive_data(self):
while self.running:
if self.serial_connection and self.serial_connection.is_open:
try:
# 接收图片大小信息
image_size_str = self.serial_connection.readline().decode('utf-8').strip()
if not image_size_str.isdigit():
continue # 如果接收到的数据不是数字,则继续等待
image_size = int(image_size_str)
print(f"Expected image size: {image_size} bytes")
# 接收图片数据
image_data = b''
while len(image_data) < image_size:
packet = self.serial_connection.read(image_size - len(image_data))
if not packet:
print("Error: Image data reception incomplete.")
break
image_data += packet
if len(image_data) == image_size:
# 获取当前时间并保存图片
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
image_filename = f"./img/{current_time}.jpg"
with open(image_filename, 'wb') as img_file:
img_file.write(image_data)
self.receive_frame = f"Image received and saved as {image_filename}"
print(f"Image received and saved as {image_filename}")
else:
print("Image reception was not completed successfully.")
except serial.SerialException as e:
print(f"Error: {e}")
break # 出现串口错误时退出循环
3、实现效果
4、接收端代码
import tkinter as tk
from tkinter import ttk, scrolledtext
import serial
import serial.tools.list_ports
import threading
from datetime import datetime
import time
import os
import tkinter as tk
from tkinter import ttk
from tkinter import Label
from PIL import Image, ImageTk
import os
import time
import threading
class SerialGUI:
def __init__(self, root):
self.root = root
self.root.title("蓝牙收发")
self.com_port = tk.StringVar()
self.baudrate = tk.IntVar(value=9600)
self.serial_connection = None
self.img_dir = './img'
# COM port selection frame
self.frame = ttk.LabelFrame(root, text="Select COM Port")
self.frame.grid(column=0, row=0, padx=10, pady=10)
self.combobox = ttk.Combobox(self.frame, textvariable=self.com_port)
self.combobox.grid(column=0, row=0, padx=10, pady=10)
self.search_button = ttk.Button(self.frame, text="Search COM Ports", command=self.search_com_ports)
self.search_button.grid(column=1, row=0, padx=10, pady=10)
self.open_button = ttk.Button(self.frame, text="Open Port", command=self.open_port)
self.open_button.grid(column=2, row=0, padx=10, pady=10)
# Baudrate selection
self.baudrate_label = ttk.Label(self.frame, text="Select Baudrate:")
self.baudrate_label.grid(column=0, row=1, padx=10, pady=10)
self.baudrate_combobox = ttk.Combobox(self.frame, textvariable=self.baudrate, values=[9600, 115200])
self.baudrate_combobox.grid(column=1, row=1, padx=10, pady=10)
self.baudrate_combobox.current(0)
# Close port button
self.close_button = ttk.Button(self.frame, text="Close Port", command=self.close_port)
self.close_button.grid(column=2, row=1, padx=10, pady=10)
# Sending frame
self.send_frame = ttk.LabelFrame(root, text="Send Data")
self.send_frame.grid(column=0, row=2, padx=10, pady=10)
self.send_entry = ttk.Entry(self.send_frame, width=30)
self.send_entry.grid(column=0, row=0, padx=10, pady=10)
self.send_button = ttk.Button(self.send_frame, text="Send", command=self.send_data)
self.send_button.grid(column=1, row=0, padx=10, pady=10)
# Receiving frame
# self.receive_frame = ttk.LabelFrame(root, text="Received Data")
# self.receive_frame.grid(column=0, row=3, padx=10, pady=10)
#
# self.receive_text = scrolledtext.ScrolledText(self.receive_frame, width=40, height=10, wrap=tk.WORD)
# self.receive_text.grid(column=0, row=0, padx=10, pady=10)
# Start receiving data in a separate thread
self.running = True
self.receive_thread = threading.Thread(target=self.receive_data)
self.receive_thread.start()
self.current_image_name = None
# Show image frame
self.show_image_frame()
def show_image_frame(self):
# Create a frame to display the image with specified size
self.image_frame = ttk.LabelFrame(self.root, text="Image Display", width=640, height=480)
self.image_frame.grid(column=1, row=0, rowspan=4, padx=10, pady=10)
self.current_image_name = None # Variable to store the current image name
# Function to update the displayed image
def update_image():
try:
files = os.listdir(self.img_dir)
jpg_files = [f for f in files if f.endswith('.jpg')]
if jpg_files:
new_image_name = max(jpg_files, key=lambda f: os.path.getmtime(os.path.join(self.img_dir, f)))
if new_image_name != self.current_image_name:
self.current_image_name = new_image_name
image_path = os.path.join(self.img_dir, new_image_name)
image = Image.open(image_path)
image.thumbnail((640, 480)) # Resize image to fit the frame
photo = ImageTk.PhotoImage(image)
if hasattr(self, 'image_label'):
self.image_label.config(image=photo)
self.image_label.image = photo # Keep a reference to avoid garbage collection
else:
self.image_label = ttk.Label(self.image_frame, image=photo)
self.image_label.image = photo # Keep a reference to avoid garbage collection
self.image_label.grid(column=0, row=0, padx=10, pady=10)
except Exception as e:
print(f"Error accessing image directory: {e}")
self.image_label.after(500, update_image) # Schedule update after 1 second
# Start updating the displayed image
update_image()
def search_com_ports(self):
ports = serial.tools.list_ports.comports()
com_ports = [port.device for port in ports]
self.combobox['values'] = com_ports
if com_ports:
self.combobox.current(0)
def open_port(self):
com_port = self.com_port.get()
baudrate = self.baudrate.get()
if com_port:
try:
self.serial_connection = serial.Serial(com_port, baudrate, timeout=1)
print(f"Opened port {com_port} at {baudrate} baud.")
except serial.SerialException as e:
print(f"Failed to open port {com_port}: {e}")
def close_port(self):
if self.serial_connection and self.serial_connection.is_open:
self.serial_connection.close()
print("Serial port closed.")
else:
print("Serial port is not open or already closed.")
def send_data(self):
if self.serial_connection and self.serial_connection.is_open:
data = self.send_entry.get()
self.serial_connection.write(data.encode('utf-8'))
print(f"Sent: {data}")
else:
print("Serial port is not open.")
def receive_data(self):
while self.running:
if self.serial_connection and self.serial_connection.is_open:
try:
# 接收图片大小信息
image_size_str = self.serial_connection.readline().decode('utf-8').strip()
if not image_size_str.isdigit():
continue # 如果接收到的数据不是数字,则继续等待
image_size = int(image_size_str)
print(f"Expected image size: {image_size} bytes")
# 接收图片数据
image_data = b''
while len(image_data) < image_size:
packet = self.serial_connection.read(image_size - len(image_data))
if not packet:
print("Error: Image data reception incomplete.")
break
image_data += packet
if len(image_data) == image_size:
# 获取当前时间并保存图片
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
image_filename = f"./img/{current_time}.jpg"
with open(image_filename, 'wb') as img_file:
img_file.write(image_data)
self.receive_frame = f"Image received and saved as {image_filename}"
print(f"Image received and saved as {image_filename}")
else:
print("Image reception was not completed successfully.")
except serial.SerialException as e:
print(f"Error: {e}")
break # 出现串口错误时退出循环
def close(self):
self.running = False
if self.serial_connection and self.serial_connection.is_open:
self.serial_connection.close()
self.root.destroy()
if __name__ == "__main__":
root = tk.Tk()
gui = SerialGUI(root)
root.protocol("WM_DELETE_WINDOW", gui.close)
root.mainloop()
六、应用场景
1、监控摄像头抓拍
将jetson nano与esp32连接,可以将抓拍数据上传到云端,然后使用网络查看。
2、工地检测没有戴安全帽就发出警告
与单片机连接,jetson nano检测,单片机执行,减少jetson cpu占用时间,提高检测效率。
部署车牌检测:传送门