MediaPipe基础（1）人脸检测

最新推荐文章于 2025-03-08 18:01:14 发布

求则得之，舍则失之

最新推荐文章于 2025-03-08 18:01:14 发布

阅读量6.5k

点赞数 1

分类专栏： MediaPipe python基础知识文章标签： MediaPipe

本文链接：https://blog.csdn.net/weixin_43229348/article/details/120524852

版权

python基础知识同时被 2 个专栏收录

19 篇文章

订阅专栏

MediaPipe

9 篇文章

订阅专栏

1.摘要

MediaPipe 人脸检测是一种超快的人脸检测解决方案，具有 6 个landmarks和多人脸支持。它基于 BlazeFace，这是一种轻量级且性能良好的人脸检测器，专为移动 GPU 推理量身定制。
检测器的超实时性能使其能够应用于任何需要准确的面部感兴趣区域作为其他特定任务模型输入的实时取景器体验，例如 3D 面部关键点或几何估计（例如 MediaPipe Face Mesh）、面部特征或表情分类以及面部区域分割等。BlazeFace使用了一个轻量级的特征提取网络，其灵感来自于MobileNetV1/V2，但与MobileNetV1/V2不同，它是一个gpu友好的锚定方案，由Single Shot MultiBox Detector (SSD)改进而来，使用某种策略（an improved tie resolution strategy）替代非极大值抑制的。

2.解决方案的API

2.1参数配置

MODEL_SELECTION：索引为0或1的整数。使用0选择一个短距离模型，最适合距离相机2米以内的人脸，1选择一个全距离模型，最适合距离相机5米以内的人脸。对于全距离选项，采用稀疏模型来提高推理速度。如果未指定，默认为0。貌似此参数已经被淘汰。
MIN_DETECTION_CONFIDENCE：来自人脸检测模型的最小置信值 ([0.0, 1.0])，以便将检测视为成功。默认为 0.5。

2.2输出

DETECTIONS：检测到的人脸的集合，其中每个人脸都表示为一个检测原始消息，其中包含一个边界框和 6 个关键点（右眼、左眼、鼻尖、嘴巴中心、右耳和左耳）。边界框由xmin和width(由图像宽度归一化为[0.0,1.0])以及ymin和height(由图像高度归一化为[0.0,1.0])组成。每个关键点由 x 和 y 组成，分别通过图像宽度和高度归一化为 [0.0, 1.0]。

3.Python API解决方案

支持配置选项:

model_selection
min_detection_confidence
（1）对于图片序列

# python3.6.5 mediapipe=0.8.3
import cv2
import mediapipe as mp
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# 对于静态图像
IMAGE_FILES = ["trump.jpg"]
with mp_face_detection.FaceDetection(min_detection_confidence=0.5) as face_detection:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.imread(file)
    # 转换BGR图像到RGB和使用MediaPipe人脸检测处理它
    results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # 绘制人脸检测到每个人脸
    if not results.detections:
      continue
    annotated_image = image.copy()
    for detection in results.detections:
  	  """The enum type of the six face detection key points.
  	  RIGHT_EYE = 0
 	  LEFT_EYE = 1
 	  NOSE_TIP = 2
  	  MOUTH_CENTER = 3
  	  RIGHT_EAR_TRAGION = 4
  	  LEFT_EAR_TRAGION = 5
  	  """
      print('Nose tip:')
      print(mp_face_detection.get_key_point(detection, mp_face_detection.FaceKeyPoint.NOSE_TIP))
      # Nose tip:
	  # x: 0.3519737124443054
	  # y: 0.4148605167865753
      mp_drawing.draw_detection(annotated_image, detection)
    cv2.imwrite('annotated_image' + str(idx) + '.png', annotated_image)

（2）对于视频

import cv2
import mediapipe as mp
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
# 视频作为输入
cap = cv2.VideoCapture("1.mp4")
with mp_face_detection.FaceDetection(min_detection_confidence=0.5) as face_detection:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # 如果正在加载摄像头，使用continue而不是break。
      break

    # 水平翻转图像为以后的自拍视图显示，并将BGR图像转换为RGB。
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # 为了提高性能，可选择将图像标记为不可写以通过引用传递。
    image.flags.writeable = False
    results = face_detection.process(image)

    # 在图像上绘制人脸检测注释。
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.detections:
      for detection in results.detections:
        mp_drawing.draw_detection(image, detection)
    cv2.imshow('MediaPipe Face Detection', image)
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()

在这里插入图片描述

（3）模块化

import cv2
import mediapipe as mp
import time


class FaceDetector():
    def __init__(self, minDetectionCon=0.5):
        self.minDetectionCon = minDetectionCon
        self.mpFaceDetection = mp.solutions.face_detection
        self.mpDraw = mp.solutions.drawing_utils
        self.faceDetection = self.mpFaceDetection.FaceDetection(self.minDetectionCon)

    def findFaces(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.faceDetection.process(imgRGB)
        # print(self.results)
        bboxs = []
        if self.results.detections:
            for id, detection in enumerate(self.results.detections):
                bboxC = detection.location_data.relative_bounding_box
                ih, iw, ic = img.shape
                bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
                bboxs.append([id, bbox, detection.score])
                if draw:
                    img = self.fancyDraw(img, bbox)
                    cv2.putText(img, f'{int(detection.score[0] * 100)}%', (bbox[0], bbox[1] - 20),
                                cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 255), 2)
                return img, bboxs

    def fancyDraw(self, img, bbox, l=30, t=5, rt=1):
        x, y, w, h = bbox
        x1, y1 = x + w, y + h
        cv2.rectangle(img, bbox, (255, 0, 255), rt)
        # Top Left  x,y
        cv2.line(img, (x, y), (x + l, y), (255, 0, 255), t)
        cv2.line(img, (x, y), (x, y + l), (255, 0, 255), t)
        # Top Right  x1,y
        cv2.line(img, (x1, y), (x1 - l, y), (255, 0, 255), t)
        cv2.line(img, (x1, y), (x1, y + l), (255, 0, 255), t)
        # Bottom Left  x,y1
        cv2.line(img, (x, y1), (x + l, y1), (255, 0, 255), t)
        cv2.line(img, (x, y1), (x, y1 - l), (255, 0, 255), t)
        # Bottom Right  x1,y1
        cv2.line(img, (x1, y1), (x1 - l, y1), (255, 0, 255), t)
        cv2.line(img, (x1, y1), (x1, y1 - l), (255, 0, 255), t)
        return img


def main():
    cap = cv2.VideoCapture("1.mp4")
    pTime = 0
    detector = FaceDetector()
    while True:
        success, img = cap.read()
        if not success:
            break
        img, bboxs = detector.findFaces(img)
        print(bboxs)
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime
        cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 2)
        cv2.imshow("Image", img)
        cv2.waitKey(1)


if __name__ == "__main__":
    main()