Mediapipe classifier 分类识别

鬼谷子2008

已于 2024-08-02 13:53:01 修改

阅读量148

点赞数 2

文章标签： opencv 人工智能

于 2024-07-30 09:40:38 首次发布

本文链接：https://blog.csdn.net/guiguzi20080808/article/details/140788572

版权

Mediapipe classifier 分类识别

对图片、视频、实时视频流中的图像进行分类识别

对图片分类

```python
#step 1 导入资源
import cv2 as cv
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

#step 2 创建分类器
base_options = python.BaseOptions(
    model_asset_path='efficientnet_lite0.tflite' # 在kaggle下载的模型
    )
options = vision.ImageClassifierOptions(
    base_options=base_options,
    running_mode=vision.RunningMode.IMAGE,
    max_results=1
)
with vision.ImageClassifier.create_from_options(options) as classifier:
#step 3 导入图像
    img = cv.imread('object/elephant.jfif')  # 图片
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
#step 4 获取识别结果
    classifier_result = classifier.classify(mp_image)
#step 5 输出识别结果
    name = classifier_result.classifications[0].categories[0].category_name
    score = classifier_result.classifications[0].categories[0].score
    cv.putText(
        img,
        name,
        (0,20),
        cv.FONT_HERSHEY_SIMPLEX,
        0.5,
        (0, 0, 225),
        2)
    cv.putText(
        img,
        str(score),
        (0,40),
        cv.FONT_HERSHEY_SIMPLEX,
        0.5,
        (0, 0, 225),
        2)
    cv.imshow('image', img)
    cv.waitKey(0)
```![在这里插入图片描述](https://i-blog.csdnimg.cn/direct/13a5a866e4714916a19eec230d15e5af.png)

对视频分类

```python
#step 1 导入资源
import mediapipe as mp
import cv2 as cv
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import time

#step 2 创建分类器
base_options = python.BaseOptions(model_asset_path='efficientnet_lite0.tflite') # 在kaggle下载的模型
options = vision.ImageClassifierOptions(
    base_options=base_options,
    running_mode=vision.RunningMode.VIDEO,
    max_results=1,
    score_threshold=0.5)
classifier = vision.ImageClassifier.create_from_options(options)

#step 3 读取视频
cv.namedWindow('win', cv.WINDOW_NORMAL)
cap = cv.VideoCapture('object/elephants.mp4')
frame_stamp_ms = 0
frame_index = 0
#get fps
fps = cap.get(cv.CAP_PROP_FPS)
while True:
    # read the frames from the video
    success, frame = cap.read()
    if not success:
        break
    frame_index += 1
    # calculate the timestamp
    frame_stamp_ms = int(frame_index * 1000 / fps)
    # convert the frame to RGB
    rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    # create a mediapipe image object
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
    #step 4 获得分类结果
    classification_result = classifier.classify_for_video(mp_image, frame_stamp_ms)
    # extract the result
    print(classification_result)
    #step 5 将结果输出
    if len(classification_result.classifications) > 0:
        for result in classification_result.classifications[0].categories:
            name = result.category_name
            score = result.score
            print(f"name: {name}", f"score:{score} ")
            # draw the result on the frame
            cv.putText(frame, f"{name} {score}", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    #show the frame
    cv.imshow('win', frame)
    # break the loop
    if cv.waitKey(10) & 0xFF == ord('q'):
        break

对视频流分类识别

# step 1 导入资源
import cv2 as cv
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

#step 2 回调函数
def print_result(result: vision.ImageClassifierResult, output_image: mp.Image, timestamp_ms: int):
    if  result.classifications[0].categories:
        for category in result.classifications[0].categories:
            label = category.category_name
            score = category.score
            name = category.display_name
            print(f"Label: {label}")
            print(f"Score: {score}")
            print(f"Name: {name}")
"""
class ClassificationResult:
  Contains the classification results of a model.

  Attributes:
    classifications: A list of `Classifications` objects, each for a head of the
      model.
    timestamp_ms: The optional timestamp (in milliseconds) of the start of the
      chunk of data corresponding to these results. This is only used for
      classification on time series (e.g. audio classification). In these use
      cases, the amount of data to process might exceed the maximum size that
      the model can process: to solve this, the input data is split into
      multiple chunks starting at different timestamps.
 

Classifications:
 Attributes:
    categories: The array of predicted categories, usually sorted by descending
      scores (e.g. from high to low probability).
    head_index: The index of the classifier head these categories refer to. This
      is useful for multi-head models.
    head_name: The name of the classifier head, which is the corresponding
      tensor metadata name.
"""
#step 3 创建分类器classifier
base_options = python.BaseOptions(
    model_asset_path='efficientnet_lite0.tflite') # 在kaggle下载的模型
options = vision.ImageClassifierOptions(
    base_options=base_options,
    running_mode=vision.RunningMode.LIVE_STREAM,
    max_results=5,
    result_callback=print_result
)
with vision.ImageClassifier.create_from_options(options) as classifier:
#step 4 读取视频流stream
    cap = cv.VideoCapture(0)
    frame_index = 0
    fps = cap.get(cv.CAP_PROP_FPS)
    # print(f"Frames per second using video.get(cv2.CAP_PROP_FPS) : {fps}")
    while cap.isOpened():
        ret, frame = cap.read()
        # flip frame
        frame = cv.flip(frame, 1)
        if not ret:
            break
        frame_index += 1
        frame_timestamp_ms = int(1000 * frame_index / fps)
        frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
#step 5 获得识别结果，通过classifier调用回调函数print_result，获得result其中包含识别的分类名称等
        classifier.classify_async(mp_image, frame_timestamp_ms)
        cv.imshow('frame', frame)
        if cv.waitKey(5) & 0xFF == ord('q'):
            break
    cap.release()
    cv.destroyAllWindows()