mediapipe 物体识别(object detection)

鬼谷子2008

已于 2024-07-28 08:37:46 修改

阅读量168

点赞数 1

分类专栏： AI 文章标签：笔记人工智能

于 2024-07-23 07:16:35 首次发布

本文链接：https://blog.csdn.net/guiguzi20080808/article/details/140593960

版权

AI 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

mediapipe 套件分为三部分
1、Tasks分为视觉vision、文本text、听觉audio
2、Model Maker 制作自己的专用模型
3、Studio 基于web 的工具
参考链接
https://ai.google.dev/edge/mediapipe/solutions/guide
1.1 物体检测(object detect)Task
参考链接https://ai.google.dev/edge/mediapipe/solutions/vision/object_detector/python
下载模型model，基于coco 模型库的分为三个级别
1、Lite0 320320 推荐
2、Lite2 448448 较慢
3、256*256 较差
构建物体识别的基本步骤：
step 1、资源导入
step 2、构建检测器 detector
读取模型 base option
具体视觉、听觉、文本 option
生成检测器 detector
step 3、读取识别的图形、音频、视频、文本文件
step 4、检测物体，获得结果result
step 5、将结果展现出来 box识别的框 name物体的名称 score判断分值

`检测静态图片

# step 1: 导入必要的模块
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2 as cv
# step 2: 读取模型文件，生成检测器
base_options = python.BaseOptions(
    model_asset_path='efficientdet_lite0.tflite'    # 模型文件

)  # efficientdet_lite0_8.tflite is the model download from mediapipe offical website
options = vision.ObjectDetectorOptions(
    base_options=base_options,
    max_results=5,
    score_threshold=0.5,
    running_mode=vision.RunningMode.IMAGE
)
# 检测器
with vision.ObjectDetector.create_from_options(options) as detector:
    """step 3 load the image and create the object detector"""
    image_file = 'object/h.jpg'     # 图像文件
    image = cv.imread(image_file)
    cv.namedWindow('image', cv.WINDOW_NORMAL)
    img = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) # 把opencv格式转换成mediapipe格式
    # 或 img = mp.Image.create_from_file(image_file) # mediapipe 格式的文件

# step 4: 检测图像，返回结果
    detection_result = detector.detect(img)

# step 5: 画出检测的方框和物体的名字
for detection in detection_result.detections:
    box = detection.bounding_box
    start_point = (box.origin_x, box.origin_y)      # 左上角
    end_point = (box.origin_x + box.width, box.origin_y + box.height)      # 右下角
    cv.rectangle(image,start_point,end_point,(0,255,0),2)   # 画矩形
    name_point = (box.origin_x, box.origin_y - 10)      # 物体名字位置
    label = detection.categories[0].category_name     # 物体名称标签
    # 将标签写在图上
    cv.putText(image, detection.categories[0].category_name, name_point, cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 225), 1)
    score_point = (box.origin_x, box.origin_y + 60)
    # 输出检测器判断的概率
    cv.putText(image, str(detection.categories[0].score), score_point, cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 225), 1)
cv.imshow('image', image)
cv.waitKey(0)
cv.destroyWindow('image')

检测视频

# step 1: 导入必要的模块
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2 as cv
# step 2 读取训练模型，创建检测器
base_options = python.BaseOptions(model_asset_path='efficientdet_lite0.tflite') # 读入训练模型
options = vision.ObjectDetectorOptions(base_options=base_options,
                                       running_mode=vision.RunningMode.VIDEO,
                                       score_threshold=0.5,
                                       max_results=2)

detector = vision.ObjectDetector.create_from_options(options)
# step 3 读取视频

video = cv.VideoCapture('object/dog.mp4')
fps = video.get(cv.CAP_PROP_FPS)        # mediapipe视频读取需要fps
frame_index = 0
while video.isOpened():
    success, frame = video.read()
    if not success:
        break
    frame_index += 1
    break_time_ms = 1000/fps
    frame_time_stamp_ms = frame_index * break_time_ms
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)   
# step 4 获取检测结果
    result = detector.detect_for_video(mp_image, int(frame_time_stamp_ms))      # 关键语句
# step 5 输出检测结果
    if len(result.detections) > 0:  # 跳过没有物体时
        # draw bounding box to frame with opencv
        rect_start_point = (int(result.detections[0].bounding_box.origin_x), int(result.detections[0].bounding_box.origin_y))
        rect_end_point = (int(result.detections[0].bounding_box.origin_x + result.detections[0].bounding_box.width), int(result.detections[0].bounding_box.origin_y + result.detections[0].bounding_box.height))
        cv.rectangle(frame, rect_start_point, rect_end_point, (0, 255, 0), 2)
        if result.detections[0].categories[0].score > 0.6:      # 分数，越大越准确
            # 输出文字
            cv.putText(frame, result.detections[0].categories[0].category_name, rect_start_point, cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv.imshow('frame', frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

实时视频流stream

#step 1: 导入资源
import cv2 as cv
import numpy
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import time

#step 2: 定义回调函数
detection_result_list = []  # 全局列表,用于传递回调函数中的参数result
def visualize_result(
        result: vision.ObjectDetectorResult,
        out_image:mp.Image,
        timestamp_ms: int
) -> None:
    result.timestamp_ms = timestamp_ms
    detection_result_list.append(result)
    print(detection_result_list)


#step 3: 定义detector
base_options = python.BaseOptions(model_asset_path='efficientdet_lite0.tflite')
options = vision.ObjectDetectorOptions(
    base_options=base_options,
    score_threshold=0.5,
    running_mode=vision.RunningMode.LIVE_STREAM,
    result_callback=visualize_result,
    max_results=5
)
detector = vision.ObjectDetector.create_from_options(options)

#step 4: 读取摄像头图像流
cap = cv.VideoCapture(0)
fps = cap.get(cv.CAP_PROP_FPS) # 获取FPS
frame_index = 0

while cap.isOpened():
    ret, frame = cap.read()
    frame_index += 1
    if not ret:
        break
    cv.flip(frame, 1, frame)
#step 5 :将检测结果输出显示
    frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    # 获取检测结果
    out_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
    timestamp_ms = int(1000 * frame_index / fps)
    detector.detect_async(out_image, timestamp_ms) # 在这里调用了一次回调函数visualize_result```

    if detection_result_list:
        # 检测结果,物体识别方框
        for detection in detection_result_list[0].detections:
            bbox = detection.bounding_box
            start_point = (int(bbox.origin_x), int(bbox.origin_y))
            end_point = (int(bbox.origin_x + bbox.width), int(bbox.origin_y + bbox.height))
            cv.rectangle(frame, start_point, end_point, (0, 255, 0), 2)
            # 标签文字
            label = detection.categories[0].category_name
            cv.putText(frame, label, (start_point[0]+5, start_point[1] + 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 2)
            # 判断分数 最高为1
            score = detection.categories[0].score
            cv.putText(frame, str(detection_result_list[0].detections[0].categories[0].score), (start_point[0]+5, start_point[1] + 50), cv.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 2)
            # 将列表清零，以免溢出
        detection_result_list.clear()
    cv.imshow('camera', frame)
    key = cv.waitKey(1)
    if key == ord('q'):
        break


参考案例
https://github.com/google-ai-edge/mediapipe-samples/blob/main/examples/object_detection/python/object_detector_live_stream/detect.py