识别效果
如下所示,可以在普通的CPU环境下直接进行识别:
核心代码
导入库 主要需要的库包括mediapipe、numpy、opencv-python
import copy
import argparse
import cv2 as cv
import numpy as np
import mediapipe as mp
from collections import deque
import cv2 as cv
1.计算左上角FPS的方法,根据当前的运行速度来计算FPS
class CvFpsCalc(object):
def __init__(self, buffer_len=1):
self._start_tick = cv.getTickCount()
self._freq = 1000.0 / cv.getTickFrequency()
self._difftimes = deque(maxlen=buffer_len)
def get(self):
current_tick = cv.getTickCount()
different_time = (current_tick - self._start_tick) * self._freq
self._start_tick = current_tick
self._difftimes.append(different_time)
fps = 1000.0 / (sum(self._difftimes) / len(self._difftimes))
fps_rounded = round(fps, 2)
return fps_rounded
2.定义参数,参数部分可以保留默认值
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--device", type=int, default=0)
parser.add_argument("--width", help='cap width', type=int, default=960)
parser.add_argument("--height", help='cap height', type=int, default=540)
parser.add_argument("--model_complexity",
help='model_complexity(0,1(default))',
type=int,
default=1)
parser.add_argument("--max_num_hands", type=int, default=2)
parser.add_argument("--min_detection_confidence",
help='min_detection_confidence',
type=float,
default=0.7)
parser.add_argument("--min_tracking_confidence",
help='min_tracking_confidence',
type=int,
default=0.5)
parser.add_argument('--use_brect', action='store_true')
parser.add_argument('--plot_world_landmark', action='store_true')
args = parser.parse_args()
return args
3.定义主函数,调用Mediapipe,这里调用Mediapipe获得了识别到的手部信息
def main():
# 参数解析 #################################################################
args = get_args()
cap_device = args.device
cap_width = args.width
cap_height = args.height
model_complexity = args.model_complexity
max_num_hands = args.max_num_hands
min_detection_confidence = args.min_detection_confidence
min_tracking_confidence = args.min_tracking_confidence
use_brect = args.use_brect
plot_world_landmark = args.plot_world_landmark
# 设置图像框大小
cap = cv.VideoCapture(cap_device)
cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)
# 定义mediapipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
model_complexity=model_complexity,
max_num_hands=max_num_hands,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence,
)
# FPS计算
cvFpsCalc = CvFpsCalc(buffer_len=10)
# plot 画图
if plot_world_landmark:
import matplotlib.pyplot as plt
fig = plt.figure()
r_ax = fig.add_subplot(121, projection="3d")
l_ax = fig.add_subplot(122, projection="3d")
fig.subplots_adjust(left=0.0, right=1, bottom=0, top=1)
while True:
display_fps = cvFpsCalc.get()
# 读取图像每一帧
ret, image = cap.read()
if not ret:
break
image = cv.flip(image, 1) # ミラー表示
debug_image = copy.deepcopy(image)
# 处理图像
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
results = hands.process(image)
# 描画 ################################################################
if results.multi_hand_landmarks is not None:
for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
results.multi_handedness):
# 手的平重心计算
cx, cy = calc_palm_moment(debug_image, hand_landmarks)
# 外接矩形的计算
brect = calc_bounding_rect(debug_image, hand_landmarks)
# 描画
debug_image = draw_landmarks(debug_image, cx, cy,
hand_landmarks, handedness)
debug_image = draw_bounding_rect(use_brect, debug_image, brect)
cv.putText(debug_image, "FPS:" + str(display_fps), (10, 30),
cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2, cv.LINE_AA)
if plot_world_landmark:
if results.multi_hand_world_landmarks is not None:
plot_world_landmarks(
plt,
[r_ax, l_ax],
results.multi_hand_world_landmarks,
results.multi_handedness,
)
# 按ESC结束
key = cv.waitKey(1)
if key == 27: # ESC
break
# 画面反映 #############################################################
cv.imshow('MediaPipe Hand Demo', debug_image)
cap.release()
cv.destroyAllWindows()
4.取得手指位置,这里主要通过几个手部关键点的位置,取得了手部的中心坐标
def calc_palm_moment(image, landmarks):
image_width, image_height = image.shape[1], image.shape[0]
palm_array = np.empty((0, 2), int)
for index, landmark in enumerate(landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
landmark_point = [np.array((landmark_x, landmark_y))]
if index == 0:
palm_array = np.append(palm_array, landmark_point, axis=0)
if index == 1:
palm_array = np.append(palm_array, landmark_point, axis=0)
if index == 5:
palm_array = np.append(palm_array, landmark_point, axis=0)
if index == 9: # 中指
palm_array = np.append(palm_array, landmark_point, axis=0)
if index == 13:
palm_array = np.append(palm_array, landmark_point, axis=0)
if index == 17: # 小指
palm_array = np.append(palm_array, landmark_point, axis=0)
M = cv.moments(palm_array)
cx, cy = 0, 0
if M['m00'] != 0:
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
return cx, cy
5.画外接矩阵框
def draw_bounding_rect(use_brect, image, brect):
if use_brect:
# 外接矩形
cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]),
(0, 255, 0), 2)
return image
接下来是一些根据手指的坐标画图的代码等
主要都是根据这张图上的坐标 进行画线和连接。
这是一个非常好的基础项目,可以在它的基础上完成手势的识别,包装成大作业和毕设。 因为调用Mediapipe,会返回上图每个关节点的位置坐标(x,y)轴坐标,只需要进一步写一些判断逻辑就可以判断具体的手势动作是什么了~
根据本博客的Mediapipe手势识别,打造了更进一步手势识别小游戏,见评论区
需要源码的(5r)可私信我yangsober@163.com