2023.5--YOLOV5 版本6.2简化推理代码！打开即用！

本文链接：https://blog.csdn.net/weixin_62894060/article/details/130665208

这是一篇基于YOLOv5的对象检测代码的介绍。该代码是由Python编写的，用于管理和操作YOLOv5模型。这个类库的主要功能是提供一个方便的接口用于加载训练好的模型，处理输入的图像，并进行推理。此外，它还可以将检测结果绘制到原始图像上，以便于进行可视化。

首先，我们需要导入一些必要的库，包括OpenCV，Torch，以及YOLOv5的相关模块。这些库用于图像处理，深度学习模型操作，以及一些工具函数。

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import glob
import sys
from pathlib import Path
import os

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
import cv2
import torch
import torch.backends.cudnn as cudnn
from models.common import DetectMultiBackend
from utils.general import (check_img_size, non_max_suppression, scale_coords)
from utils.torch_utils import select_device
import numpy as np
from utils.augmentations import letterbox
from time import sleep

在Yolov5Manager类的初始化函数中，我们设置了模型的参数，包括权重文件的路径，标签名，图像大小，置信度阈值，IOU阈值，设备类型，以及是否使用半精度计算。然后，我们调用了DetectMultiBackend函数（Yolo原版）来加载模型，并根据所选择的设备（CPU或GPU）以及计算精度来设置模型。

class Yolov5Manager(object):
    def __init__(self, weights=r'', names=[], imgsz=[640, 640], conf_thres=0.3,
                 half=True, iou_thres=0.2,
                 device='0',
                 dnn=False, data=None):

        self.names = names
        self.half = half
        self.conf_thres = conf_thres
        self.iou_thres = iou_thres
        self.device = select_device(device)
        self.model = DetectMultiBackend(weights, device=self.device, dnn=dnn, data=data)
        self.stride, pt, jit, onnx, engine = self.model.stride, self.model.pt, self.model.jit, self.model.onnx, self.model.engine  # endine:False onnx:False pt:True jit:False
        if self.names is None or len(self.names) == 0:
            self.names = self.model.names

        self.imgsz = check_img_size(imgsz, s=self.stride)
        self.auto = True  #
        self.half &= (
                                 pt or jit or onnx or engine) and self.device.type != 'cpu'  # FP16 supported on limited backends with CUDA
        if pt or jit:
            self.model.model.half() if self.half else self.model.model.float()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        self.model.warmup(imgsz=(1, 3, *imgsz), half=self.half)  # warmup

我们还定义了一个内部函数__draw_image，它接受一个OpenCV格式的图像，一个表示检测框位置的列表，以及一些可选的参数，如标签，线条宽度，和颜色。这个函数会在图像上绘制检测框和标签。

    def __draw_image(self, opencv_img, box, label='', line_width=None, box_color=(255, 255, 255),
                     txt_box_color=(200, 200, 200),
                     txt_color=(0, 0, 255)):
        lw = line_width or max(round(sum(opencv_img.shape) / 2 * 0.005), 2)  # line width
        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))

        cv2.rectangle(opencv_img, p1, p2, box_color, thickness=lw, lineType=cv2.LINE_AA)
        if label:
            tf = max(lw - 1, 1)  # font thickness
            w, h = cv2.getTextSize(label, 0, fontScale=lw / 4, thickness=tf)[0]  # text width, height
            outside = p1[1] - h - 1 >= 0  # label fits outside bo
            p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
            cv2.rectangle(opencv_img, p1, p2, txt_box_color, -1, cv2.LINE_AA)  # filled 背景
            label = label.split(',')[0]
            cv2.putText(opencv_img, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 4, txt_color,
                        thickness=tf, lineType=cv2.LINE_AA)
        return opencv_img

此外，inference_image函数接受一个OpenCV格式的图像，并将其预处理为模型可以接受的格式，然后进行推理。最后，它调用non_max_suppression函数来进行非极大值抑制，并返回检测结果。

    def inference_image(self, opencv_img):
        img = letterbox(opencv_img, self.imgsz, stride=self.stride, auto=self.auto)[0]
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(self.device)
        img = img.half() if self.half else img.float()  # uint8 to fp16/32
        img /= 255  # 0 - 255 to 0.0 - 1.0
        if len(img.shape) == 3:
            img = img[None]  # expand for batch dim
        pred = self.model(img, augment=False, visualize=False)
        pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, False, max_det=100)
        result_list = []
        # Process predictions
        for i, det in enumerate(pred):  # per image
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], opencv_img.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    result_list.append(
                        [self.names[int(cls)], round(float(conf), 2), int(xyxy[0]), int(xyxy[1]), int(xyxy[2]),
                         int(xyxy[3])])
        return result_list

我们还提供了一些实用的函数，如start_camera，start_video，和start_video_and_save。这些函数可以分别从摄像头，视频文件，或者保存视频文件中读取图像，并进行推理和绘图。

    @torch.no_grad()
    def start_camera(self, camera_index=0):
        cap = cv2.VideoCapture(camera_index)
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            result_list = self.inference_image(frame)
            frame = self.draw_image(result_list, frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

    @torch.no_grad()
    def start_video(self, video_file):
        cap = cv2.VideoCapture(video_file)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print('ret is False')
                break
            result_list = self.inference_image(frame)
            frame = self.draw_image(result_list, frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

    @torch.no_grad()
    def start_video_and_save(self, video_file, save_file, show=True):
        cap = cv2.VideoCapture(video_file)
        # 获取视频帧速率 FPS
        frame_fps = int(cap.get(cv2.CAP_PROP_FPS))
        # 获取视频帧宽度和高度
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        print("video fps={},width={},height={}".format(frame_fps, frame_width, frame_height))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(save_file, fourcc, frame_fps, (frame_width, frame_height))
        count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("read over or error!")
                break
            result_list = self.inference_image(frame)
            frame = self.draw_image(result_list, frame)
            out.write(frame)
            if show:
                cv2.imshow("result", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        out.release()
        cap.release()
        cv2.destroyAllWindows()

load_labels函数可以从一个文本文件中加载标签名。

    @classmethod
    def load_labels(cls, name_file):
        with open(name_file, 'r') as f:
            lines = f.read().rstrip('\n').split('\n')
        return lines

最后，在主程序中，我们实例化了一个Yolov5Manager对象，并使用它来进行一些实际的检测任务。例如，我们可以从摄像头中读取图像，并实时进行检测和绘图。我们也可以从视频文件中读取图像，进行检测和绘图，并将结果保存为一个新的视频文件。

if __name__ == '__main__':
    infer = Yolov5Manager(weights=r'yolov5s.pt',conf_thres=0.3,half=True,
                          iou_thres=0.2,device='0',)
    beg = time.time()
    img = r'cccccc.png'
    img = cv2.imread(img)
    result_list = infer.inference_image(img)
    infer.imshow(img, result_list)
    print(result_list)

这个代码库提供了一个非常方便的接口，使得我们可以轻松地使用YOLOv5模型进行对象检测。我们可以通过修改和扩展这个代码库来满足我们的特定需求。

在此，我想推荐大家加入我们的YOLO目标检测交流学习群。群号是732818397。在这个群里，我们可以一起学习和探讨关于YOLO目标检测的各种问题和挑战。无论你是初学者还是有经验的专业人士，我们都欢迎你的加入。希望我们能在学习和交流的过程中共同进步，共同提高。期待在群里遇见你。