机器视觉实用工具集NO.10——使用深度学习模型（yolo3）实现物体检测工具

最新推荐文章于 2023-11-30 00:30:00 发布

JAMES费

最新推荐文章于 2023-11-30 00:30:00 发布

阅读量3.1k

点赞数

分类专栏：机器视觉工具集文章标签： opencv 计算机视觉 python

未经同意，不得转载

本文链接：https://blog.csdn.net/kanbide/article/details/123363518

版权

OpenCV YOLOv3 物体检测 Python 深度学习

关键词由CSDN通过智能技术生成

机器视觉工具集专栏收录该内容

21 篇文章 12 订阅

订阅专栏

前言

OpenCV 3.3版本之后提供了对主流深度学习主干网络框架的加载支持。opencv给我们提供了一个应用成熟深度学习模型的便捷工具。
YOLO是一个优秀的物体及人体检测深度神经网络模型，可以通过opencv快速部署检测应用。

opencv+YOLO3实现物体检测

YOLO3里面训练好的能够检测的物体有80种，涵盖了人、鸟、车、沙发等日常动物、物品。且相对来说，实时性比较高。opencv+yolo3部署一个物体检测应用比较间接，100多行代码就可以实现。
在这里插入图片描述

python源码

# -*- coding: utf-8 -*-
"""
Created on Wed Sep 15 22:50:59 2021
@author: JAMES FEI
Copyright (C) 2021 FEI PANFENG, All rights reserved.
THIS SOFTEWARE, INCLUDING DOCUMENTATION,IS PROTECTED BY COPYRIGHT CONTROLLED 
BY FEI PANFENG ALL RIGHTS ARE RESERVED.
"""
import numpy as np
import cv2
import base64


def file2base64(img):        
     base64_encode = base64.b64encode(img).decode('utf-8')
     return base64_encode

# base64 to numpy array with opencv
def bas642mat_cv(base64_encode):
     base64_decode = base64.b64decode(base64_encode)
     img_array = np.frombuffer(base64_decode, np.uint8)
     img = cv2.imdecode(img_array, cv2.COLOR_BGR2RGB)
     return img   
# 画图

class Img_Detor:

    net=None
    output_layer_names=None
    lables=["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign",
            "parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella",
            "handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard",
            "tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli",
            "carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
            "remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors",
            "teddy bear","hair drier","toothbrush"]
    
    

    jobs=[]
     
    def __init__(self, **kwargs):
        self.mathods=dir(self)        

        self.net, self.output_layer_names = self.load_network("yolov3.cfg", "yolov3.weights")
        print("初始化YOLO")

    

    def load_network(self,config_path, weights_path):
        net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
        output_layer_names = net.getLayerNames()
        output_layer_names = [output_layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
        return net, output_layer_names
    

    def YOLO(self,strimg,confident=0.5,overlap=0.5):
    #cap=cv2.VideoCapture(0)
    #sucess,img=cap.read()   
    #img = cv2.imread("888.png")   
        img = bas642mat_cv(strimg)#专成矩阵
        blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), swapRB=True, crop=False)
        self.net.setInput(blob)
        layer_outputs = self.net.forward(self.output_layer_names)  
        boxes, confidences, class_IDs = [], [], []
        H, W = img.shape[:2]
        for output in layer_outputs:
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                if confidence > 0.5:
                    box = detection[0:4] * np.array([W, H, W, H])
                    centerX, centerY, width, height = box.astype("int")
                    x, y = int(centerX - (width / 2)), int(centerY - (height / 2))
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    class_IDs.append(classID)
        indices = cv2.dnn.NMSBoxes(boxes, confidences,confident,overlap)
        BOXS=[]
        if len(indices) > 0:
            # loop over the indexes we are keeping
            aa=indices.flatten()
            for i in aa:
                # extract the bounding box coordinates
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                BOXS.append([(x,y),(w,h),self.lables[class_IDs[i]],confidences[i]])
        # ensure at least one detection exists
        return BOXS

if __name__ == '__main__':
    
    a=Img_Detor()
    #img=cv2.imread("888.png")    
    #d=a.YOLO(img)
    encode_param=[int(cv2.IMWRITE_JPEG_QUALITY),15]
    cap=cv2.VideoCapture(0)  
    while True:
        sucess,img=cap.read()
        result, imgencode = cv2.imencode('.jpg', img, encode_param)         
        strimg=file2base64(imgencode)
        box=a.YOLO(strimg)
        if type(box)==type([]):    
            for b in box:        
                (x,y)=b[0]
                (w,h)=b[1]
                text = "{}: {:.4f}".format(b[2], b[3])
                cv2.putText(img, text, (x, y - 5), cv2.FONT_ITALIC, 0.5, [0, 255, 0], 2)
                cv2.rectangle(img, (x, y), (x + w, y + h), (255,255,0), 2)
            cv2.imshow("mini_eye",img)
        
        
        k=cv2.waitKey(1)
        if k == 27:
            #通过esc键退出摄像
            cv2.destroyAllWindows()
            break