【深度学习之YOLO8】视频流推断

春马与夏

已于 2023-07-22 13:34:26 修改

阅读量1.8k

点赞数 3

文章标签：深度学习人工智能

于 2023-07-21 17:06:47 首次发布

本文链接：https://blog.csdn.net/qq_43376286/article/details/131856021

版权

该代码实现了一个利用YOLOv8模型进行屏幕图像捕捉和物体检测的程序。通过ScreenCapture类进行屏幕截图，然后用YOLO模型进行预测，显示检测到的物体及其类别。程序使用了mss库进行无GUI截图，cv2和PIL库处理图像，以及ultralytics的YOLO接口。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

官方V8模型下载

需要准备两个东西

ScreenCapture屏幕图像类

import cv2
import mss
import numpy as np


class ScreenCapture:
    """
    parameters
    ----------
      screen_resolution : Tuple[int, int]
        屏幕宽高，分别为x，y
      capture_region : Tuple[float, float]
        实际截图范围，分别为x，y，(1.0, 1.0)表示全屏检测，越低检测范围越小(始终保持屏幕中心为中心)
      window_name : str
        显示窗口名
      exit_code : int
        结束窗口的退出键值，为键盘各键对应的ASCII码值，默认是ESC键
    """

    def __init__(self, screen_resolution=(1920, 1080), capture_region=(0.5, 0.5), window_name='test', exit_code=0x1B):
        self.screen_capture = mss.mss()  # 实例化mss，并使用高效模式 mon=-1, optimize=True
        self.screen_width = screen_resolution[0]  # 屏幕的宽
        self.screen_height = screen_resolution[1]  # 屏幕的高
        self.capture_region = capture_region  # 捕获区域
        self.screen_center_x, self.screen_center_y = self.screen_width // 2, self.screen_height // 2  # 屏幕中心点坐标
        # 截图区域
        self.capture_width, self.capture_height = int(self.screen_width * self.capture_region[0]), int(
            self.screen_height * self.capture_region[1])  # 宽高
        self.capture_left, self.capture_top = int(
            0 + self.screen_width // 2 * (1. - self.capture_region[0])), int(
            0 + self.screen_height // 2 * (1. - self.capture_region[1]))  # 原点
        self.display_window_width, self.display_window_height = self.screen_width // 3, self.screen_height // 3  # 显示窗口大小
        self.monitor_settings = {
            'left': self.capture_left,
            'top': self.capture_top,
            'width': self.capture_width,
            'height': self.capture_height
        }
        self.window_name = window_name
        self.exit_code = exit_code
        self.img = None

    def grab_screen_mss(self, monitor=None):
        if monitor is None:
            monitor = self.monitor_settings
        # cap.grab截取图片，np.array将图片转为数组，cvtColor将BRGA转为BRG,去掉了透明通道
        return cv2.cvtColor(np.array(self.screen_capture.grab(monitor)), cv2.COLOR_BGRA2BGR)

YOLO模型根据图片的数组推断

import cv2
import os
import numpy as np
from ultralytics import YOLO
from PIL import Image, ImageDraw, ImageFont
from ScreenCapture import ScreenCapture
def run(sc):
    # 实例化 YOLO 模型
    model = YOLO(r'D:\GitProjects\ultralytics\runs\models\yolov8l.pt' )
    # 设置字体
    fontStyle = ImageFont.truetype(r'D:\GitProjects\ultralytics\runs\SIMSUN.ttf' , 48, encoding=' utf-8' )  # 中文字体文件
    # 循环体
    while True:
        # 截屏
        img = Image.fromarray(np.uint8(sc.grab_screen_mss()))
        # 实例化一个图像绘制对象
        draw = ImageDraw.Draw(img)
        # 利用模型进行图像预测
        results = model.predict(source=img, conf=0.75, iou=0.75)
        # 遍历结果，detection
        for result in results:
            # detection
            if len(result.boxes.xyxy) > 0:
                boxes_conf = np.array(result.boxes.conf.tolist())
                # 确定检测结果的数量
                boxes_conf_n = len(boxes_conf)
                if boxes_conf_n > 0:
                    boxes_xyxy = result.boxes.xyxy.tolist()
                    boxes_cls = result.boxes.cls.tolist()
                    boxes_xy_center = []
                    for i, box_xyxy in enumerate(boxes_xyxy):
                        draw.rectangle(box_xyxy, outline=(0, 0, 255), width=5)
                        draw.text((int(box_xyxy[0]), int(box_xyxy[1]) - 20), str(int(boxes_cls[i])), (0, 0, 255),
                                  font=fontStyle)
                        boxes_xy_center.append((((box_xyxy[0] + box_xyxy[2]) / 2) + sc.capture_left,
                                                ((box_xyxy[1] + box_xyxy[3]) / 2) + sc.capture_top))
        cv2.namedWindow(sc.window_name, cv2.WINDOW_NORMAL)  # cv2.WINDOW_NORMAL 根据窗口大小设置图片大小
        cv2.resizeWindow(sc.window_name, sc.display_window_width, sc.display_window_height)
        cv2.imshow(sc.window_name, np.array(img))
        if cv2.waitKey(1) & 0XFF == sc.exit_code:  # 默认：ESC
            cv2.destroyAllWindows()
            os._exit(0)
if __name__ == '__main__':
    sc = ScreenCapture(capture_region=(1, 1))
    run(sc)