基于Yolov5调用摄像头，能选择类名，控制台输出类名及目标坐标值的源代码(含注释)

最新推荐文章于 2024-08-30 10:12:56 发布

不良帅.

最新推荐文章于 2024-08-30 10:12:56 发布

阅读量298

点赞数 15

文章标签： YOLO

本文链接：https://blog.csdn.net/m0_75116965/article/details/140755283

版权

基于Yolov5调用摄像头，能选择类名，控制台输出类名及目标坐标值的源代码(含注释)

温馨提示：

需要从官网下载或者训练好您所需要的yolo模型pt文件，并将此文件路径设置在config.py的对应参数中
在Object_detect.py的get_model()函数中我默认设置使用装了CUDA的GPU，如用CPU跑自行修改cpu值即可
本文及基于yolo模型的使用和优化将持续更新，感谢大家关注！

源码及介绍

核心检测算法模块

Object_detect.py用于进行目标检测的核心文件，主要包含

get_model():用于加载模型，进行一系列初始化操作
pred_img(img0,class_name):用于对传入的图片进行目标检测(这里调用摄像头相当于传入每一帧的图像)，同时根据所给类名进行选择目标检测

下面是源代码:

#Obj_detect.py
import cv2
import torch
import numpy as np

from NewTools.config import *
from models.experimental import attempt_load
from utils.augmentations import letterbox
from utils.general import check_img_size, increment_path, non_max_suppression, scale_coords, xyxy2xywh
from utils.plots import Annotator, colors
from utils.torch_utils import select_device


def get_model():
    #选择设备是CPU还是GPU
    #device = select_device(device) #device存储传入的是什么设备
    device = select_device('') #装了CUDA默认使用GPU
    half = False
    half &= device.type != 'cpu' #half precision only supported on CUDA #只有device为CUDA half才为true

    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults

    #weights就是我们要用到模型，即一个传入路径
    model = torch.jit.load(WEIGHTS) if 'torchscript' in WEIGHTS else attempt_load(WEIGHTS, device=device)
    stride = int(model.stride.max())  # model stride
    # 获取模型中的类别名
    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16  自此处已经得到了所需模型

    #此处用于进行分类
    #if classify:  # second-stage classifier
    #    modelc = load_classifier(name='resnet50', n=2)  # initialize
    #    modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()

    return model,device,half, stride, names

#图像处理
@torch.no_grad() #不进行梯度处理
def pred_img(img0,class_name):
    #获取模型、步幅、类别
    model,device,half, stride, names=get_model()
    imgsz = check_img_size(IMGSZ, s=stride)  # check image size

    # Padded resize
    img = letterbox(img0, imgsz, stride=stride, auto=True)[0]

    # Convert
    img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
    img = np.ascontiguousarray(img)
    (None,img,img0,None)
    bs = 1  # batch_size
    vid_path,vid_writer = [None]*bs,[None]*bs
    model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once

    #归一化处理（nosee）
    img = img / 255.0  # 0 - 255 to 0.0 - 1.0
    if len(img.shape) == 3:
        img = img[None]  # expand for batch dim

    # 转换为 Tensor 类型并放置在设备上
    img_tensor = torch.from_numpy(img).to(device)

    # 调用模型进行预测,得到预测结果
    pred = model(img_tensor, augment=False, visualize=False)[0]

    # NMS
    pred = non_max_suppression(pred, CONF_THRES, IOU_THRES, None, False, max_det=1000)
    #print(len(pred))#打印pred长度
    det = pred[0]
    # Process predictions
    #for i, det in enumerate(pred):  # 此处由于每次相当于只传入一张图片，所以可以删除此迭代循环

    im0 = img0.copy()
    gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

    #使用Annotator给检测到的目标画框并写类名，将此操作的参数存储到annotator中
    annotator = Annotator(im0, line_width=LINE_THICKNESS, example=str(names))

    # 用于存储所有目标中心坐标值
    xywh_list=[]
    # 用于存储所有目标左上右下坐标值
    xyxy_list=[]
    # 用于所有存储目标信息
    detect_info=[]

    # 判断有没有框
    if len(det):
        # Rescale boxes from img_size to im0 size
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
        # Write results
        #在一个图像中找到多个目标进行画框
        for *xyxy, conf, cls in reversed(det):
                c = int(cls)  # integer class 当前类名

                # 过滤用户指定的类名
                if names[c] != class_name:
                    continue

                #此处XY是指在所截取屏幕中检测的目标的中心点坐标值，WH是以其中心点的方框的宽和高
                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh

                #将框的XYWH参数传给xywh_list
                xywh_list.append(xywh)

                #将框的XYXY参数传给xywh_list
                xyxy_list.append(xyxy)

                #存储检测目标信息
                dic = {
                    'class': f'{names[c]}',  # 检测目标对应的类别名
                    'location1': torch.tensor(xyxy).view(1, 4).view(-1).tolist(),  # 坐标信息1(左上、右下)
                    'location2': torch.tensor(xywh).view(1, 4).view(-1).tolist(),  # 坐标信息2(中心点、宽、高)
                    'score': round(float(conf) * 100, 2)  # 目标检测分数
                }
                #打印每一轮循环的目标信息
                print(dic)
                #将每一轮的目标信息存储进detect_info
                detect_info.append(dic)

                #将方框相关参数存储到label中
                label = None if HIDE_LABELS else (names[c] if HIDE_CONF else f'{names[c]} {conf:.2f}')
                #将label画到图像中
                annotator.box_label(xyxy, label, color=colors(c, True))

    #print(xywh_list)  # 打印框对应的值
    im0=annotator.result()

    return im0,xywh_list

参数模块

config.py用于设置目标检测yolo模型的一些必要参数

下面是源代码：

#config.py
#包含一些必要的参数
WEIGHTS = 'F:/yolov5 exp/yolov5/yolov5/yolov5s.pt'
IMGSZ = [640,640]

#指定检测图像的类名
DETECT_CLASS = 'person'

#置信度阈值
CONF_THRES=0.3
#iou阈值
IOU_THRES = 0.45

#方框线宽
LINE_THICKNESS = 3

#HIDE_CONF、HIDE_LABELS分别为隐藏置信度、标签(类名)的布尔值，此处为False即不隐藏
HIDE_CONF = True
HIDE_LABELS = True

调用摄像头主函数模块

webcam_screen.py用于执行整个利用yolo模型，调用摄像头目标识别的主函数

下面是源代码：

#webcam_screen.py
import mss
import numpy as np
import cv2
import win32gui,win32con
from Obj_detect import pred_img #导入
from config import *

sct = mss.mss()
screen_width = 1920 #屏幕的宽
screen_height = 1080 #屏幕的高
RESIZE_WIN_WIDTH,RESIZE_WIN_HEIGHT = screen_width//5 ,screen_height//5 #显示窗口的大小
window_name = 'Webcam screen'

# 初始化摄像头
cap = cv2.VideoCapture(0)  # 0表示第一个摄像头，如果有多个摄像头可以选择不同的编号

# 循环读取摄像头帧
while True:
    # 读取摄像头帧
    ret, frame = cap.read()
    if not ret:
        print("无法从摄像头获取图像")
        break

    # 将图像通道类型转为BGR（假设摄像头默认是BGRA，有alpha通道）
    frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)

    # 在这里进行目标检测或其他处理
    # aims 变量用来接收目标检测后的结果--xywh_list
    frame, aims = pred_img(frame,DETECT_CLASS)

    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) #cv2.WINDOW_NORMAL根据窗口大小设置图片大小
    cv2.resizeWindow(window_name, RESIZE_WIN_WIDTH,RESIZE_WIN_HEIGHT)
    # 显示处理后的图像
    cv2.imshow(window_name, frame)
    # 实现窗口置顶
    hwnd = win32gui.FindWindow(None, window_name)
    win32gui.ShowWindow(hwnd, win32con.SW_SHOWNORMAL)
    win32gui.SetWindowPos(hwnd, win32con.HWND_TOPMOST, 0, 0, 0, 0,win32con.SWP_NOMOVE | win32con.SWP_NOACTIVATE | win32con.SWP_SHOWWINDOW | win32con.SWP_NOSIZE)

    key = cv2.waitKey(1)
    if key%256 == 27: #按下ESC键关闭窗口
        cv2.destroyAllWindows()
        exit('结束进程中...')