基于Mediapipe的Python手势识别项目(附项目源码)

该文介绍了一个使用Mediapipe库在普通CPU环境下进行手势识别的Python项目。核心代码涉及OpenCV、Numpy和Mediapipe的集成,用于计算FPS、定义参数和处理图像。通过计算关键手部landmarks,确定手部中心坐标,进而绘制外接矩形和进行手势识别。文章还提供了一个基础框架,适用于扩展为更复杂的手势识别应用或游戏。
摘要由CSDN通过智能技术生成

识别效果

如下所示,可以在普通的CPU环境下直接进行识别:

核心代码

导入库 主要需要的库包括mediapipe、numpy、opencv-python

import copy
import argparse

import cv2 as cv
import numpy as np
import mediapipe as mp


from collections import deque
import cv2 as cv

1.计算左上角FPS的方法,根据当前的运行速度来计算FPS

class CvFpsCalc(object):
    def __init__(self, buffer_len=1):
        self._start_tick = cv.getTickCount()
        self._freq = 1000.0 / cv.getTickFrequency()
        self._difftimes = deque(maxlen=buffer_len)

    def get(self):
        current_tick = cv.getTickCount()
        different_time = (current_tick - self._start_tick) * self._freq
        self._start_tick = current_tick

        self._difftimes.append(different_time)

        fps = 1000.0 / (sum(self._difftimes) / len(self._difftimes))
        fps_rounded = round(fps, 2)

        return fps_rounded

2.定义参数,参数部分可以保留默认值

def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--width", help='cap width', type=int, default=960)
    parser.add_argument("--height", help='cap height', type=int, default=540)

    parser.add_argument("--model_complexity",
                        help='model_complexity(0,1(default))',
                        type=int,
                        default=1)

    parser.add_argument("--max_num_hands", type=int, default=2)
    parser.add_argument("--min_detection_confidence",
                        help='min_detection_confidence',
                        type=float,
                        default=0.7)
    parser.add_argument("--min_tracking_confidence",
                        help='min_tracking_confidence',
                        type=int,
                        default=0.5)

    parser.add_argument('--use_brect', action='store_true')
    parser.add_argument('--plot_world_landmark', action='store_true')

    args = parser.parse_args()

    return args

3.定义主函数,调用Mediapipe,这里调用Mediapipe获得了识别到的手部信息

def main():
    # 参数解析 #################################################################
    args = get_args()

    cap_device = args.device
    cap_width = args.width
    cap_height = args.height

    model_complexity = args.model_complexity

    max_num_hands = args.max_num_hands
    min_detection_confidence = args.min_detection_confidence
    min_tracking_confidence = args.min_tracking_confidence

    use_brect = args.use_brect
    plot_world_landmark = args.plot_world_landmark

    # 设置图像框大小
    cap = cv.VideoCapture(cap_device)
    cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)

    # 定义mediapipe
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        model_complexity=model_complexity,
        max_num_hands=max_num_hands,
        min_detection_confidence=min_detection_confidence,
        min_tracking_confidence=min_tracking_confidence,
    )

    # FPS计算
    cvFpsCalc = CvFpsCalc(buffer_len=10)

    # plot 画图
    if plot_world_landmark:
        import matplotlib.pyplot as plt

        fig = plt.figure()
        r_ax = fig.add_subplot(121, projection="3d")
        l_ax = fig.add_subplot(122, projection="3d")
        fig.subplots_adjust(left=0.0, right=1, bottom=0, top=1)

    while True:
        display_fps = cvFpsCalc.get()

        # 读取图像每一帧
        ret, image = cap.read()
        if not ret:
            break
        image = cv.flip(image, 1)  # ミラー表示
        debug_image = copy.deepcopy(image)

        # 处理图像
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        results = hands.process(image)

        # 描画 ################################################################
        if results.multi_hand_landmarks is not None:
            for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
                                                  results.multi_handedness):
                # 手的平重心计算
                cx, cy = calc_palm_moment(debug_image, hand_landmarks)
                # 外接矩形的计算
                brect = calc_bounding_rect(debug_image, hand_landmarks)
                # 描画
                debug_image = draw_landmarks(debug_image, cx, cy,
                                             hand_landmarks, handedness)
                debug_image = draw_bounding_rect(use_brect, debug_image, brect)

        cv.putText(debug_image, "FPS:" + str(display_fps), (10, 30),
                   cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2, cv.LINE_AA)

        
        if plot_world_landmark:
            if results.multi_hand_world_landmarks is not None:
                plot_world_landmarks(
                    plt,
                    [r_ax, l_ax],
                    results.multi_hand_world_landmarks,
                    results.multi_handedness,
                )

        # 按ESC结束
        key = cv.waitKey(1)
        if key == 27:  # ESC
            break

        # 画面反映 #############################################################
        cv.imshow('MediaPipe Hand Demo', debug_image)

    cap.release()
    cv.destroyAllWindows()

4.取得手指位置,这里主要通过几个手部关键点的位置,取得了手部的中心坐标

def calc_palm_moment(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    palm_array = np.empty((0, 2), int)

    for index, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        if index == 0:  
            palm_array = np.append(palm_array, landmark_point, axis=0)
        if index == 1:  
            palm_array = np.append(palm_array, landmark_point, axis=0)
        if index == 5:  
            palm_array = np.append(palm_array, landmark_point, axis=0)
        if index == 9:  # 中指
            palm_array = np.append(palm_array, landmark_point, axis=0)
        if index == 13: 
            palm_array = np.append(palm_array, landmark_point, axis=0)
        if index == 17:  # 小指
            palm_array = np.append(palm_array, landmark_point, axis=0)
    M = cv.moments(palm_array)
    cx, cy = 0, 0
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])

    return cx, cy

5.画外接矩阵框

def draw_bounding_rect(use_brect, image, brect):
    if use_brect:
        # 外接矩形
        cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]),
                     (0, 255, 0), 2)

    return image

接下来是一些根据手指的坐标画图的代码等

 主要都是根据这张图上的坐标 进行画线和连接。

这是一个非常好的基础项目,可以在它的基础上完成手势的识别,包装成大作业和毕设。 因为调用Mediapipe,会返回上图每个关节点的位置坐标(x,y)轴坐标,只需要进一步写一些判断逻辑就可以判断具体的手势动作是什么了~

根据本博客的Mediapipe手势识别,打造了更进一步手势识别小游戏,见评论区

需要源码的(5r)可私信我yangsober@163.com

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值