效果如视频所示:
YOLOV8 github地址:GitHub - ultralytics/ultralytics: NEW - YOLOv8 🚀 in PyTorch > ONNX > OpenVINO > CoreML > TFLite
话不多说,代码如下(运行时请将代码文件防止于工程文件内):
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from collections import defaultdict
class VehicleTracker:
def __init__(self, model_path, video_path, output_path):
self.model = YOLO(model_path)
self.cap = cv2.VideoCapture(video_path)
self.fps = self.cap.get(cv2.CAP_PROP_FPS)
self.size = (int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
self.fNUMS = self.cap.get(cv2.CAP_PROP_FRAME_COUNT)
self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
self.videoWriter = cv2.VideoWriter(output_path, self.fourcc, self.fps, self.size)
self.track_history = defaultdict(list)
self.vehicle_in = 0
self.vehicle_out = 0
def box_label(self, image, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# 确保box中的坐标是整数
if isinstance(box[0], np.ndarray) or isinstance(box[0], torch.Tensor):
box = box.cpu().numpy().astype(int).tolist()
else:
box = [int(b) for b in box] # 确保box中的每个坐标都是整数
# 得到目标矩形框的左上角和右下角坐标
p1, p2 = (box[0], box[1]), (box[2], box[3])
# 绘制矩形框
cv2.rectangle(image, p1, p2, color, thickness=1, lineType=cv2.LINE_AA)
if label:
# 得到要书写的文本的宽和长,用于给文本绘制背景色
w, h = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=1)[0]
# 确保显示的文本不会超出图片范围
outside = p1[1] - h >= 3
p2_text = (p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3)
cv2.rectangle(image, p1, p2_text, color, -1) # 填充颜色
# 书写文本
cv2.putText(image,
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.5,
txt_color,
thickness=1)
def process_frame(self, frame):
results = self.model.track(frame, conf=0.3, persist=True)
if results is None or len(results) == 0 or results[0].boxes.id is None:
return
track_ids = results[0].boxes.id.int().cpu().tolist()
for track_id, box in zip(track_ids, results[0].boxes.data):
if box[-1] == 2: # 目标为小汽车
# 绘制该目标的矩形框
self.box_label(frame, box, '#' + str(track_id) + ' car', (0, 255, 22))
# 得到该目标矩形框的中心点坐标(x, y)
x1, y1, x2, y2 = box[:4]
x = (x1 + x2) / 2
y = (y1 + y2) / 2
# 提取出该ID的以前所有帧的目标坐标,当该ID是第一次出现时,则创建该ID的字典
track = self.track_history[track_id]
track.append((float(x), float(y))) # 追加当前目标ID的坐标
# 只有当track中包括两帧以上的情况时,才能够比较前后坐标的先后位置关系
if len(track) > 1:
_, h = track[-2] # 提取前一帧的目标纵坐标
# 我们设基准线为纵坐标是size[1]-400的水平线
# 当前一帧在基准线的上面,当前帧在基准线的下面时,说明该车是从上往下运行
if h < self.size[1] - 400 and y >= self.size[1] - 400:
self.vehicle_out += 1 # out计数加1
# 当前一帧在基准线的下面,当前帧在基准线的上面时,说明该车是从下往上运行
if h > self.size[1] - 400 and y <= self.size[1] - 400:
self.vehicle_in += 1 # in计数加1
elif box[-1] == 5: # 目标为巴士
self.box_label(frame, box, '#' + str(track_id) + ' bus', (132, 161, 255))
x1, y1, x2, y2 = box[:4]
x = (x1 + x2) / 2
y = (y1 + y2) / 2
track = self.track_history[track_id]
track.append((float(x), float(y))) # x, y center point
if len(track) > 1:
_, h = track[-2]
if h < self.size[1] - 400 and y >= self.size[1] - 400:
self.vehicle_out += 1
if h > self.size[1] - 400 and y <= self.size[1] - 400:
self.vehicle_in += 1
elif box[-1] == 7: # 目标为卡车
self.box_label(frame, box, '#' + str(track_id) + ' truck', (19, 222, 24))
x1, y1, x2, y2 = box[:4]
x = (x1 + x2) / 2
y = (y1 + y2) / 2
track = self.track_history[track_id]
track.append((float(x), float(y))) # x, y center point
if len(track) > 1:
_, h = track[-2]
if h < self.size[1] - 400 and y >= self.size[1] - 400:
self.vehicle_out += 1
if h > self.size[1] - 400 and y <= self.size[1] - 400:
self.vehicle_in += 1
elif box[-1] == 3: # 目标为摩托车
self.box_label(frame, box, '#' + str(track_id) + ' motor', (186, 55, 2))
x1, y1, x2, y2 = box[:4]
x = (x1 + x2) / 2
y = (y1 + y2) / 2
track = self.track_history[track_id]
track.append((float(x), float(y))) # x, y center point
if len(track) > 1:
_, h = track[-2]
if h < self.size[1] - 400 and y >= self.size[1] - 400:
self.vehicle_out += 1
if h > self.size[1] - 400 and y <= self.size[1] - 400:
self.vehicle_in += 1
# 绘制基准线
cv2.line(frame, (30, self.size[1] - 400), (self.size[0] - 30, self.size[1] - 400), color=(222, 33, 189),
thickness=2,
lineType=4)
# 实时显示进、出车辆的数量
cv2.putText(frame, 'Car Up : ' + str(self.vehicle_in), (595, self.size[1] - 410),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 60, 255), 2)
cv2.putText(frame, 'Car Down : ' + str(self.vehicle_out), (573, self.size[1] - 370),
cv2.FONT_HERSHEY_SIMPLEX, 1, (123, 90, 55), 2)
cv2.putText(frame, "HY.", (25, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 44, 255), 2)
cv2.imshow("YOLOv8 Tracking", frame) # 显示标记好的当前帧图像
self.videoWriter.write(frame) # 写入保存
if cv2.waitKey(1) & 0xFF == ord('s'):
cv2.imwrite('yolo_carCounting.jpg',frame )
def run(self):
while self.cap.isOpened():
success, frame = self.cap.read()
if success:
self.process_frame(frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
else:
break
self.release_resources()
def release_resources(self):
self.cap.release()
self.videoWriter.release()
cv2.destroyAllWindows()
# 主函数
def main():
model_path = 'yolov8n.pt'
# video_path = 0
video_path = r"E:\SelfLearning\CV\Carpalte\car.mp4"
output_path = "./counting.mp4"
tracker = VehicleTracker(model_path, video_path, output_path)
tracker.run()
if __name__ == "__main__":
main()
也可以考虑将这些功能集成到一个QT界面,也许许多同学的毕业设计就随便解决了!!!!
效果大致如下:
QT界面代码如下:
from PySide6 import QtWidgets, QtCore, QtGui
import cv2, os, time
from threading import Thread
# 不然每次YOLO处理都会输出调试信息
os.environ['YOLO_VERBOSE'] = 'False'
from ultralytics import YOLO
class MWindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
# 设置界面
self.setupUI()
self.camBtn.clicked.connect(self.startCamera)
self.stopBtn.clicked.connect(self.stop)
# 定义定时器,用于控制显示视频的帧率
self.timer_camera = QtCore.QTimer()
# 定时到了,回调 self.show_camera
self.timer_camera.timeout.connect(self.show_camera)
# 加载 YOLO nano 模型,第一次比较耗时,要20秒左右
self.model = YOLO('yolov8n.pt')
# 要处理的视频帧图片队列,目前就放1帧图片
self.frameToAnalyze = []
# 启动处理视频帧独立线程
Thread(target=self.frameAnalyzeThreadFunc,daemon=True).start()
def setupUI(self):
self.resize(1200, 800)
self.setWindowTitle('YOLO-Qt 演示')
# central Widget
centralWidget = QtWidgets.QWidget(self)
self.setCentralWidget(centralWidget)
# central Widget 里面的 主 layout
mainLayout = QtWidgets.QVBoxLayout(centralWidget)
# 界面的上半部分 : 图形展示部分
topLayout = QtWidgets.QHBoxLayout()
self.label_ori_video = QtWidgets.QLabel(self)
self.label_treated = QtWidgets.QLabel(self)
self.label_ori_video.setMinimumSize(520,400)
self.label_treated.setMinimumSize(520,400)
self.label_ori_video.setStyleSheet('border:1px solid #D7E2F9;')
self.label_treated.setStyleSheet('border:1px solid #D7E2F9;')
topLayout.addWidget(self.label_ori_video)
topLayout.addWidget(self.label_treated)
mainLayout.addLayout(topLayout)
# 界面下半部分: 输出框 和 按钮
groupBox = QtWidgets.QGroupBox(self)
bottomLayout = QtWidgets.QHBoxLayout(groupBox)
self.textLog = QtWidgets.QTextBrowser()
bottomLayout.addWidget(self.textLog)
mainLayout.addWidget(groupBox)
btnLayout = QtWidgets.QVBoxLayout()
self.videoBtn = QtWidgets.QPushButton('🎞️视频文件')
self.camBtn = QtWidgets.QPushButton('📹摄像头')
self.stopBtn = QtWidgets.QPushButton('🛑停止')
btnLayout.addWidget(self.videoBtn)
btnLayout.addWidget(self.camBtn)
btnLayout.addWidget(self.stopBtn)
bottomLayout.addLayout(btnLayout)
def startCamera(self):
# 参考 https://docs.opencv.org/3.4/dd/d43/tutorial_py_video_display.html
# 在 windows上指定使用 cv2.CAP_DSHOW 会让打开摄像头快很多,
# 在 Linux/Mac上 指定 V4L, FFMPEG 或者 GSTREAMER
self.cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
if not self.cap.isOpened():
print("1号摄像头不能打开")
return()
if self.timer_camera.isActive() == False: # 若定时器未启动
self.timer_camera.start(10)
def show_camera(self):
ret, frame = self.cap.read() # 从视频流中读取
if not ret:
return
# 把读到的16:10帧的大小重新设置
frame = cv2.resize(frame, (520, 400))
# 视频色彩转换回RGB,OpenCV images as BGR
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
qImage = QtGui.QImage(frame.data, frame.shape[1], frame.shape[0],
QtGui.QImage.Format_RGB888) # 变成QImage形式
# 往显示视频的Label里 显示QImage
self.label_ori_video.setPixmap(QtGui.QPixmap.fromImage(qImage))
# 如果当前没有处理任务
if not self.frameToAnalyze:
self.frameToAnalyze.append(frame)
def frameAnalyzeThreadFunc(self):
while True:
if not self.frameToAnalyze:
time.sleep(0.01)
continue
start_time=time.time()
frame = self.frameToAnalyze.pop(0)
results = self.model(frame)[0]
# 提取检测结果并显示在文本框中
# print(results)
# 提取检测结果并显示在文本框中
boxes = results.boxes.xyxy.cpu().numpy() # 获取检测框坐标
classes = results.boxes.cls.cpu().numpy().astype(int) # 获取类别
scores = results.boxes.conf.cpu().numpy() # 获取置信度
detections = []
for i, box in enumerate(boxes):
if scores[i] > 0.5: # 只显示置信度大于0.5的检测结果
cls = classes[i]
cls_name = results.names[cls]
conf = scores[i]
detections.append(f"{cls_name}: {conf:.2f}")
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
fps = 1 / (time.time() - start_time)
self.textLog.append(f"Time: {current_time}, FPS: {fps:.2f}, Detections: {', '.join(detections)}")
img = results.plot(line_width=1)
qImage = QtGui.QImage(img.data, img.shape[1], img.shape[0],
QtGui.QImage.Format_RGB888) # 变成QImage形式
self.label_treated.setPixmap(QtGui.QPixmap.fromImage(qImage)) # 往显示Label里 显示QImage
time.sleep(0.5)
def stop(self):
self.timer_camera.stop() # 关闭定时器
self.cap.release() # 释放视频流
self.label_ori_video.clear() # 清空视频显示区域
self.label_treated.clear() # 清空视频显示区域
app = QtWidgets.QApplication()
window = MWindow()
window.show()
app.exec()
如果需要设计和创新点,提供以下几点建议:
1.结合具体的使用场景和检测对象
2.通过自己的标准数据集,训练检测模型
3.参考最新的网络结构论文,替换原来的网络结构,再评价效果