【目标检测】基于OpenCV与darknet-YOLOv3实时目标检测程序自用指南

最新推荐文章于 2024-06-22 10:48:33 发布

Mr小米周

最新推荐文章于 2024-06-22 10:48:33 发布

阅读量1.4k

点赞数 3

分类专栏：计算机视觉文章标签： python opencv 计算机视觉深度学习

本文链接：https://blog.csdn.net/qq_45552341/article/details/109005342

版权

计算机视觉专栏收录该内容

17 篇文章 3 订阅

订阅专栏

基于OpenCV与darknet-YOLOv3实时目标检测

写在最前面
1.环境配置
2.运行realtime_detection.py
3.运行detection_save.py
4.完整代码
- 目录结构如下图所示

写在最前面

本文使用的yolov3权重文件为作者利用自己小数据集自己训练的模型，可识别垃圾桶、落地花坛、电动栅栏、木箱，共四类目标。数据集图片太少，模型性能较差，仅能初步满足检测需求，且采用opencv调用摄像头的方法延迟高，实时性差，有待后续优化。
学习路长啊，一点一点摸索吧，小白开始耍了，哈哈哈！

1.环境配置

Python >=3.7（建议使用Anaconda,配置环境）
Numpy
Opencv（安装参考链接：https://www.jianshu.com/p/5ab15165f78b）

2.运行realtime_detection.py

（1）使用摄像头进行检测

配置好环境后，即可直接运行realtime_detection.py，默认直接调用摄像头，开始检测，摄像头画面显示如下图所示。
运行效果图

（2）不使用摄像头，仅检测存储的视频

将capture = cv2.VideoCapture(0)注释掉，在该行代码前添加 # 即可，修改成如下所示。

# 读入待检测的图像
# 0是代表摄像头编号，只有一个的话默认为0
# capture = cv2.VideoCapture(0)

将capture = cv2.VideoCapture('./test_video/0031.mp4')解注释，删除# ，注意#后面还有个空格也需删除。
将地址./test_video/0031.mp4替换为需要检测的视频地址即可。
修改成如下所示。

# 读取录制好的视频
capture = cv2.VideoCapture('./test_video/0031.mp4')

（3）调用IP摄像头进行检测

注释掉以下两句代码

capture = cv2.VideoCapture(0)
capture = cv2.VideoCapture('./test_video/0031.mp4')

解注释以下两行代码，即可使用IP摄像头

ip_camera_url = 'http://admin:admin@192.168.253.7:8097'
capture = cv2.VideoCapture(ip_camera_url)

使用opencv调用IP摄像头的具体方法参考链接：
https://blog.csdn.net/urnotY/article/details/108454247

3.运行detection_save.py

detection_save.py文件仅增加了将检测结果保存下来的功能，其他功能与realtime_detection.py相同，根据是否需要保存检测结果的视频文件自行选择。文件保存在output文件夹下。

4.完整代码

realtime_detection.py完整代码如下：

import numpy as np
import cv2
import os
import time


def video_demo():
    # 加载已经训练好的模型路径，可以是绝对路径或者相对路径
    weightsPath = "./yolov3/object_18900.weights"
    configPath = "./yolov3/object.cfg"
    labelsPath = "./yolov3/object.names"

    # 加载官方模型路径
    # weightsPath = "./yolov3/yolov3.weights"
    # configPath = "./yolov3/yolov3.cfg"
    # labelsPath = "./yolov3/coco.names"

    # 初始化一些参数
    LABELS = open(labelsPath).read().strip().split("\n")  # 物体类别
    COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")  # 颜色

    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

    # 读入待检测的图像
    # 0是代表摄像头编号，只有一个的话默认为0
    capture = cv2.VideoCapture(0)

    # 读取录制好的视频
    # capture = cv2.VideoCapture('./test_video/0031.mp4')

    # 使用 ip camera app 调用视频
    # 制定URL  http://用户名：密码@IP地址：端口号/
    # ip_camera_url = 'http://admin:admin@192.168.137.136:8081'
    # 创建一个VideoCapture
    # capture = cv2.VideoCapture(ip_camera_url)

    while (True):
        boxes = []
        confidences = []
        classIDs = []
        ref, image = capture.read()
        (H, W) = image.shape[:2]
        # 得到 YOLO需要的输出层
        ln = net.getLayerNames()
        ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
        # 从输入图像构造一个blob，然后通过加载的模型，给我们提供边界框和相关概率
        blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        layerOutputs = net.forward(ln)
        # 在每层输出上循环
        for output in layerOutputs:
            # 对每个检测进行循环
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                # 过滤掉那些置信度较小的检测结果
                if confidence > 0.5:
                    # 框后接框的宽度和高度
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")
                    # 边框的左上角
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))
                    # 更新检测出来的框
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)
        # 极大值抑制
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
        if len(idxs) > 0:
            for i in idxs.flatten():
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                # 在原图上绘制边框和类别
                color = [int(c) for c in COLORS[classIDs[i]]]
                cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
                text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
                cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv2.imshow("Image", image)
        # 等待30ms显示图像，若过程中按“ESC”退出
        c = cv2.waitKey(30) & 0xff
        if c == 27:
            capture.release()
            break

video_demo()

detection_save.py完整代码如下：

import numpy as np
import cv2
import os
import time


def video_demo():
    # 加载已经训练好的模型路径，可以是绝对路径或者相对路径
    weightsPath = "./yolov3/object_18900.weights"
    configPath = "./yolov3/object.cfg"
    labelsPath = "./yolov3/object.names"

    # 加载官方模型路径
    # weightsPath = "./yolov3/yolov3.weights"
    # configPath = "./yolov3/yolov3.cfg"
    # labelsPath = "./yolov3/coco.names"

    # 初始化一些参数
    LABELS = open(labelsPath).read().strip().split("\n")  # 物体类别
    COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")  # 颜色

    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

    # 读入待检测的图像
    # 0是代表摄像头编号，只有一个的话默认为0
    capture = cv2.VideoCapture(0)

    # # 读取录制好的视频
    # capture = cv2.VideoCapture('./test_video/0031.mp4')

    # 使用 ip camera app 调用视频
    # 制定URL  http://用户名：密码@IP地址：端口号/
    # ip_camera_url = 'http://admin:admin@192.168.253.7:8097'
    # # 创建一个VideoCapture
    # capture = cv2.VideoCapture(ip_camera_url)

    # 设置保存视频帧率
    fps = 30

    # 获取窗口大小
    size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    video_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
    videowrite = cv2.VideoWriter('output/MySaveVideo-' + video_time + '.avi', cv2.VideoWriter_fourcc('I', '4', '2', '0'), 30, size)

    while (True):
        boxes = []
        confidences = []
        classIDs = []
        ref, image = capture.read()
        (H, W) = image.shape[:2]
        # 得到 YOLO需要的输出层
        ln = net.getLayerNames()
        ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
        # 从输入图像构造一个blob，然后通过加载的模型，给我们提供边界框和相关概率
        blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        layerOutputs = net.forward(ln)
        # 在每层输出上循环
        for output in layerOutputs:
            # 对每个检测进行循环
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                # 过滤掉那些置信度较小的检测结果
                if confidence > 0.5:
                    # 框后接框的宽度和高度
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")
                    # 边框的左上角
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))
                    # 更新检测出来的框
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)
        # 极大值抑制
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
        if len(idxs) > 0:
            for i in idxs.flatten():
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                # 在原图上绘制边框和类别
                color = [int(c) for c in COLORS[classIDs[i]]]
                cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
                text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
                cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        videowrite.write(image)
        cv2.imshow("Image", image)

        # 等待30ms显示图像，若过程中按“ESC”退出
        c = cv2.waitKey(30) & 0xff
        if c == 27:
            capture.release()
            break

video_demo()