import cv2
import mediapipe as mp
import cmath
import time
import numpy as np
import math
gesture = ["none","one","two","three","four","five","six","seven","eight","nine","ten"]
flag = 0
pTime = 0
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.75)
cap = cv2.VideoCapture(0)
# 记录中指轨迹的变量
prev_x, prev_y = None, None
# 创建画板,黑色背景
canvas = np.zeros((480, 640, 3), dtype=np.uint8)
# 获取画面宽度、高度
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
rect=False
# 方块初始数组
x = 100
y = 100
w = 100
h = 100
L1 = 0
L2 = 0
on_square = False
square_color = (0, 255, 0)
while True:
flag = 0
ret,frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 因为摄像头是镜像的,所以将摄像头水平翻转
# 不是镜像的可以不翻转
frame= cv2.flip(frame,1)
results = hands.process(frame)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# if results.multi_handedness:
# for hand_label in results.multi_handedness:
# print(hand_label)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
#print('hand_landmarks:', hand_landmarks)
#计算关键点的距离,用于判断手指是否伸直
p0_x = hand_landmarks.landmark[0].x
p0_y = hand_landmarks.landmark[0].y
p5_x = hand_landmarks.landmark[5].x
p5_y = hand_landmarks.landmark[5].y
distance_0_5 = pow(p0_x-p5_x,2)+pow(p0_y-p5_y,2)
base = distance_0_5 / 0.6
p4_x = hand_landmarks.landmark[4].x
p4_y = hand_landmarks.landmark[4].y
distance_5_4 = pow(p5_x-p4_x,2)+pow(p5_y-p4_y,2)
p8_x = hand_landmarks.landmark[8].x
p8_y = hand_landmarks.landmark[8].y
distance_0_8 = pow(p0_x-p8_x,2)+pow(p0_y-p8_y,2)
p12_x = hand_landmarks.landmark[12].x
p12_y = hand_landmarks.landmark[12].y
distance_0_12 = pow(p0_x-p12_x,2)+pow(p0_y-p12_y,2)
p16_x = hand_landmarks.landmark[16].x
p16_y = hand_landmarks.landmark[16].y
distance_0_16 = pow(p0_x-p16_x,2)+pow(p0_y-p16_y,2)
p20_x = hand_landmarks.landmark[20].x
p20_y = hand_landmarks.landmark[20].y
distance_0_20 = pow(p0_x-p20_x,2)+pow(p0_y-p20_y,2)
if distance_0_8 > base:
flag += 1
if distance_0_12 > base:
flag += 1
if distance_0_16 > base:
flag += 1
if distance_0_20 > base:
flag += 1
if distance_5_4 > base*0.2:
flag += 1
if flag>=10:
flag = 10
if flag==5:
# 获取中指关键点的坐标
middle_finger = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
x = int(middle_finger.x * frame.shape[1])
y = int(middle_finger.y * frame.shape[0])
# 绘制中指指点
cv2.circle(frame, (x, y), 5, (0, 0, 255), -1)
# 绘制中指轨迹
if prev_x is not None and prev_y is not None:
cv2.line(canvas, (prev_x, prev_y), (x, y), (0, 255, 0), 3)
# 更新前一个中指指点的位置
prev_x, prev_y = x, y
if flag==3:
canvas = np.zeros((480, 640, 3), dtype=np.uint8)
prev_x, prev_y = None, None
if flag==2:
rect=True
x_list = []
y_list = []
for landmark in hand_landmarks.landmark:
x_list.append(landmark.x)
y_list.append(landmark.y)
# 获取食指指尖
index_finger_x, index_finger_y= int(x_list[8] * width),int(y_list[8] * height)
# 获取中指
middle_finger_x,middle_finger_y =int(x_list[12] * width), int(y_list[12] * height)
# 计算两指尖距离
finger_distance =math.hypot((middle_finger_x - index_finger_x), (middle_finger_y - index_finger_y))
# 如果双指合并(两之间距离近)
if finger_distance < 60:
# X坐标范围 Y坐标范围
if (index_finger_x > x and index_finger_x < (x + w)) and (
index_finger_y > y and index_finger_y < (y + h)):
if on_square == False:
L1 = index_finger_x - x
L2 = index_finger_y - y
square_color = (255, 0, 255)
on_square = True
else:
# 双指不合并/分开
on_square = False
square_color = (0, 255, 0)
# 更新坐标
if on_square:
x=index_finger_x - L1
y =index_finger_y - L2
# 图像融合 使方块不遮挡视频图片
if flag!=2:
rect=False
# 关键点可视化
mp_drawing.draw_landmarks(
frame,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
if rect:
cv2.rectangle(frame, (x, y), (x + w, y + h), square_color, -1)
# 将画板叠加到原始图像上
frame = cv2.addWeighted(frame, 1, canvas, 0.5, 0)
cv2.putText(frame,gesture[flag],(50,50),0,1.3,(0,0,255),3)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(frame, f'FPS: {int(fps)}', (480, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 124, 90), 1)
cv2.imshow('MediaPipe', frame)
if cv2.waitKey(1) & 0xFF == 13:
break
cap.release()
cv2.destroyAllWindows()