yolov3-object80 目标检测标出图片中的物品类别

最新推荐文章于 2024-04-22 22:12:43 发布

贝叶斯巴达

最新推荐文章于 2024-04-22 22:12:43 发布

阅读量365

点赞数

文章标签： python opencv 计算机视觉目标检测

本文链接：https://blog.csdn.net/qq_41900846/article/details/129482870

版权

完整项目链接：

https://download.csdn.net/download/qq_41900846/87567935

使用yolov3-object80模型对图片进行目标检测，标出图片中的物体

import cv2
import numpy as np

import os
import glob
from tqdm import tqdm

# pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
# 获取摄像头或视频地址
# cap = cv2.VideoCapture(r"./data/test.mp4")
# 识别置信度阈值
confThreshold = 0.5
# 最大抑制值
nmsThreshold = 0.2
# 网络输入图像的宽度和高度
inpWidth = 320
inpHeight = 320
# coco.names文件存储着80种已经训练好的识别类型名称，并且这些类别名称正好与yolo所训练的80种类别一一对应
classesFile = r"coco.names"
# 存储类型名称列表
classNames = []
with open(classesFile, "rt") as f:
    # 依照行读取数据
    classNames = f.read().splitlines()
# 显示所有类型名称
print(classNames)

# 配置yolov3
modelConfiguration = "yolov3.cfg"  # 配置文件
modelWeights = "yolov3.weights"  # 配置权重文件
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)  # 将配置文件加入到dnn网络中
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)  # 将DNN后端设置成opencv
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)  # 将DNN前端设置成cpu驱动

# yolov3检测并处理
def findObjects(outputs, img):
    hT, wT, cT = img.shape  # 获取原始帧图像的大小H,W
    bbox = []  # 创建存储先验框的坐标列表
    classIds = []  # 创建存储每帧检测到的类别信息名称
    confs = []  # 创建每帧读取的置信度值
    for output in outputs:  # 对所有类别遍历
        for det in output:  # 检测frame帧中的每个类别
            scores = det[5:]  # 获取该类别与80项全类别分别的相似概率
            classId = np.argmax(scores)  # 获得80项中最为相似的类别（相似概率值最大的类别）的下标
            confidence = scores[classId]  # 获取最大相似概率的值
            if confidence > confThreshold:  # 判断相似度阈值
                # 获取先验框的四个坐标点
                w, h = int(det[2] * wT), int(det[3] * hT)
                x, y = int((det[0] * wT) - w / 2), int((det[1] * hT) - h / 2)

                bbox.append([x, y, w, h])  # 将坐标添加到bbox中进行存储，便于对frame帧中所有类别的先验框坐标进行存储
                classIds.append(classId)  # 将frame中每一类别对应的编号（1-80），便于在输出文本时，与对应coconame文件中的类别名称进行输出
                confs.append(float(confidence))  # 对frame中识别出来的每一类信息进行最大抑制由参数nms阈值控制
    # 对frame中识别出来的每一类信息进行最大抑制由参数nms阈值控制
    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
    tagid_lst = []
    tagname = []
    for i in indices:
        box = bbox[i]  # 依次读取最大已知参数nms阈值的先验框坐标
        x, y, w, h = box[0], box[1], box[2], box[3]
        # print(x,y,w,h)
        # 对每个最终识别的目标进行矩形框选
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2)
        # 对应coco.names相应的类别名称和相似概率进行文字输出
        tagid_lst.append(classIds[i])
        tagname.append(classNames[classIds[i]].capitalize())
        # cv2.putText(img, f'{classNames[classIds[i]].capitalize()} {int(confs[i] * 100)}%',
        #             (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
    # print(f"== 类别id是{tagid_lst} 类别是{tagname}")
    return tagid_lst, tagname



def img_type():
    # 读取数据
    # success, frame = cap.read()
    img_txt = [

        (
            "/你的图片路径/*.jpg",
            "/你的图片名+图片类别标签+图片类别id/image_yolov3_80.tsv",
        ),
    ]
    for img_path , txt_path in img_txt:
        img_lst = glob.glob(img_path)
        f = open(txt_path,'w')
        broken_img_num = 0
        for input_path in tqdm(img_lst, total=len(img_lst)):

            img_name = os.path.basename(input_path)
            tagid_lst = []
            tagname = []
            if os.path.getsize(input_path):
                # print(f'img_name=={img_name}')
                try:
                    cap = cv2.VideoCapture(input_path)
                    success, frame = cap.read()
                    # DNN网络的输入图像需要采用称为 blob 的特定格式
                    blob = cv2.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], True, False)
                    # 将输出的blob作为传入网络的输入
                    net.setInput(blob)
                    # 获取输入层的名称
                    layerNames = net.getLayerNames()
                    # 获得输入层的最后一层，以此遍历整个网络
                    outputNames = [layerNames[i - 1] for i in net.getUnconnectedOutLayers()]
                    outputs = net.forward(outputNames)
                    tagid_lst, tagname = findObjects(outputs, frame)
                except Exception as e:
                    broken_img_num += 1
            # print((f'{img_name}\t{str(tagid_lst)}\t{str(tagname)}\n'))
            f.write(f'{img_name}\t{str(tagid_lst)}\t{str(tagname)}\n')
            f.flush()
        print(f'总共损坏图像={str(broken_img_num)}')
            # 显示图像
            # cv2.imshow("img", frame)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            # 释放内存
            # cap.release()
            # cv2.destroyAllWindows()


if __name__ == '__main__':
    img_type()

使用模型：

yolov3

类别文档：coco.names

person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush