基于策略的手势识别

使用YOLOv8等目标检测网络直接识别手势需要标注大量的数据集,效果也不好。本文主要基于一个手部关键点开源库mediapipe,并用检测到的关键点基于一定策略实现手势识别。

目前支持的手势包括
1 2 3 3_variant 4 5 6 7 8 9 0 fist good bad ok agree love rubbish despise wish swordfinger Vulcan_salute stop Orchid1 Orchid2 catch 6+1 under_control provocation seduce
共计30种手势。还有一种手势 unknown 表示未识别。
先看效果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

代码部分:
需要安装的依赖:

pip install numpy
pip install python-opencv
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple mediapipe

新建一个utils.py文件写入

import numpy as np
import itertools

# 判断点是否在三角形内部
def piont_in_triangle(P, A, B, C):
    P = project_point_on_plane(P, A, B, C)
    # 计算PAB, PAC的面积
    return np.linalg.norm(np.cross(P-A, P-B)) + np.linalg.norm(np.cross(P-A, P-C)) < np.linalg.norm(np.cross(C-A, C-B))

# 判断一根线段是否和三个点构成的平面相交
def segments_cross_planes(p1, p2, A, B, C):

    n_vec = np.cross(A - B, A - C)
    e1_c, e2_c, e3_c = np.dot(n_vec, A), p1[0]*p2[1]-p2[0]*p1[1], p1[0]*p2[2]-p2[0]*p1[2]
    mat1_d = np.array([[n_vec[0],       n_vec[1],       n_vec[2]], 
                        [p2[1]-p1[1],   p1[0]-p2[0],    0], 
                        [p2[2]-p1[2],   0,              p1[0]-p2[0]]])
    
    d_d = np.linalg.det(mat1_d)
    if d_d == 0:
        return False
    mat1_x = np.array([[e1_c,           n_vec[1],       n_vec[2]], 
                        [e2_c,          p1[0]-p2[0],    0], 
                        [e3_c,          0,              p1[0]-p2[0]]])
    
    mat1_y = np.array([[n_vec[0],       e1_c,           n_vec[2]], 
                        [p2[1]-p1[1],   e2_c,           0], 
                        [p2[2]-p1[2],   e3_c,           p1[0]-p2[0]]])
    
    mat1_z = np.array([[n_vec[0],       n_vec[1],       e1_c], 
                        [p2[1]-p1[1],   p1[0]-p2[0],    e2_c], 
                        [p2[2]-p1[2],   0,              e3_c]])
    
    x = np.linalg.det(mat1_x) / d_d
    y = np.linalg.det(mat1_y) / d_d
    z = np.linalg.det(mat1_z) / d_d
    P = np.array([x, y, z])

    return piont_in_triangle(P, A, B, C) and np.dot(P - p1, P - p2)<0

# 计算两个向量的夹角
def angle_between_vectors(v1, v2):
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)

    if norm_v1 * norm_v2 == 0:
        return 0
    
    cos_angle = dot_product / (norm_v1 * norm_v2)
    cos_angle = np.clip(cos_angle, -1, 1)
    angle_rad = np.arccos(cos_angle)
    angle_deg = np.degrees(angle_rad)
    return angle_deg

# 计算一个点在另外三个点构成平面的投影点
def project_point_on_plane(P, A, B, C):
    # 计算平面的法向量
    v1 = B - A
    v2 = C - A
    normal = np.cross(v1, v2)
    if np.linalg.norm(normal) == 0:
        return P
    normal = normal / np.linalg.norm(normal)  # 单位化法向量

    # 计算点M到平面的投影向量
    v = P - A
    distance = np.dot(v, normal) / np.linalg.norm(normal)**2

    # 计算投影点坐标
    projection = P - distance * normal
    return projection

# 计算一个点到另外三个点构成平面的距离
def distance_point_plane(P, A, B, C):
    p2 = project_point_on_plane(P, A, B, C)
    return np.linalg.norm(p2 - P) 

# 计算一个点到两个点构成直线的距离
def distance_point_line(P, A, B):
    if P.shape[0] == 2:
        P = np.append(P, [0])
    if A.shape[0] == 2:
        A = np.append(A, [0])
    if B.shape[0] == 2:
        B = np.append(B, [0])

    if np.linalg.norm(A - B) == 0:
        return np.linalg.norm(P - B)
    elif np.linalg.norm(P - B) == 0 or np.linalg.norm(P - B) == 0:
        return 0
    else:
        return abs(np.linalg.norm(np.cross(P - A, P - B)) / np.linalg.norm(A - B))

# 计算四个点组成两直线的公垂线与其各自的交点,并判断交点是否在线段中
def intersection_in_line_segment(p1, p2, p3, p4):

    ln1, ln2 = p2 - p1, p4 - p3
    n_ln12 = np.cross(ln1, ln2)
    n_p12, n_p34 = np.cross(n_ln12, ln1), np.cross(n_ln12, ln2)
    # print(n_ln12)
    # print(n_p12)
    # 计算p3p4交点
    # 方程组右侧系数
    e1_c, e2_c, e3_c = np.dot(n_p12, p2), p3[0]*p4[1]-p4[0]*p3[1], p3[0]*p4[2]-p4[0]*p3[2]
    mat2_d = np.array([[n_p12[0],       n_p12[1],       n_p12[2]], 
                        [p4[1]-p3[1],   p3[0]-p4[0],    0], 
                        [p4[2]-p3[2],   0,              p3[0]-p4[0]]])
    
    mat2_x = np.array([[e1_c,           n_p12[1],       n_p12[2]], 
                        [e2_c,          p3[0]-p4[0],    0], 
                        [e3_c,          0,              p3[0]-p4[0]]])
    
    mat2_y = np.array([[n_p12[0],       e1_c,           n_p12[2]], 
                        [p4[1]-p3[1],   e2_c,           0], 
                        [p4[2]-p3[2],   e3_c,           p3[0]-p4[0]]])
    
    mat2_z = np.array([[n_p12[0],       n_p12[1],       e1_c], 
                        [p4[1]-p3[1],   p3[0]-p4[0],    e2_c], 
                        [p4[2]-p3[2],   0,              e3_c]])
    
    d2_d = np.linalg.det(mat2_d)
    if d2_d == 0:
        return False
    x2 = np.linalg.det(mat2_x) / d2_d
    y2 = np.linalg.det(mat2_y) / d2_d
    z2 = np.linalg.det(mat2_z) / d2_d

    inter2 = np.array([x2, y2, z2])


    # 计算p1p2交点
    # 方程组右侧系数
    e1_c, e2_c, e3_c = np.dot(n_p34, p3), p1[0]*p2[1]-p2[0]*p1[1], p1[0]*p2[2]-p2[0]*p1[2]
    mat1_d = np.array([[n_p34[0],       n_p34[1],       n_p34[2]], 
                        [p2[1]-p1[1],   p1[0]-p2[0],    0], 
                        [p2[2]-p1[2],   0,              p1[0]-p2[0]]])
    
    mat1_x = np.array([[e1_c,           n_p34[1],       n_p34[2]], 
                        [e2_c,          p1[0]-p2[0],    0], 
                        [e3_c,          0,              p1[0]-p2[0]]])
    
    mat1_y = np.array([[n_p34[0],       e1_c,           n_p34[2]], 
                        [p2[1]-p1[1],   e2_c,           0], 
                        [p2[2]-p1[2],   e3_c,           p1[0]-p2[0]]])
    
    mat1_z = np.array([[n_p34[0],       n_p34[1],       e1_c], 
                        [p2[1]-p1[1],   p1[0]-p2[0],    e2_c], 
                        [p2[2]-p1[2],   0,              e3_c]])
    
    d1_d = np.linalg.det(mat1_d)
    if d1_d == 0:
        return False
    x1 = np.linalg.det(mat1_x) / d1_d
    y1 = np.linalg.det(mat1_y) / d1_d
    z1 = np.linalg.det(mat1_z) / d1_d
    inter1 = np.array([x1, y1, z1])

    # print(inter1)
    # print(inter2)

    inter_in_line1 = np.dot(p1 - inter1, p2 - inter1)
    inter_in_line2 = np.dot(p3 - inter2, p4 - inter2)
    return (inter_in_line1 < 0 and inter_in_line2 < 0)

# 判断单根手指状态
def process_finger(P0, P1, P2, P3):
    # 参数是按照手腕和手指关节依次给出
    arg1 = angle_between_vectors(P1 - P0, P2 - P1)
    arg2 = angle_between_vectors(P2 - P1, P3 - P2)
    if (arg1 < 40 and arg2 < 40) or arg1 + arg2 < 50:
        return 0        # 直立
    elif arg1 < 40 and arg2 >= 40:
        return 1        # 半弯折
    elif (arg1 >= 40 and arg2 >= 40):
        return 2        # 全弯折
    elif  arg1 >= 40 and arg2 < 40:
        return -2       # 大关节弯折前部直立
    else:
        return -1       # 未定义奇怪的手势

# 返回手势
def process_hand(hand_cds, details=False):
    
    # 检测单独手指姿态
    hand_state = [process_finger(hand_cds[0], hand_cds[2], hand_cds[3], hand_cds[4])
                ,process_finger(hand_cds[0], hand_cds[5], hand_cds[6], hand_cds[7])
                ,process_finger(hand_cds[0], hand_cds[9], hand_cds[10], hand_cds[11])
                ,process_finger(hand_cds[0], hand_cds[13], hand_cds[14], hand_cds[15])
                ,process_finger(hand_cds[0], hand_cds[17], hand_cds[18], hand_cds[19])]

    # 计算手平均大小
    ids = [(0, 1), (1, 2), (2, 3), (3, 4),  # 拇指
            (5, 6), (6, 7), (7, 8),          # 食指
            (9, 10), (10, 11), (11, 12),     # 中指
            (13, 14), (14, 15), (15, 16),    # 无名指
            (17, 18), (18, 19), (19, 20),       # 小指
            (0, 5),(0, 9), (0, 13), (0, 17)]   
    mean_bone_length = 0
    for id in ids:
        mean_bone_length += np.linalg.norm(hand_cds[id[0]] - hand_cds[id[1]])
    mean_bone_length /= len(ids)

    # 求出拇指尖和其他手指关节的最小距离
    other_joints = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
    thumb_dis, thumb_dis_2d = [], []
    for joint in other_joints:
        thumb_dis.append(np.linalg.norm(hand_cds[4] - hand_cds[joint]))  
        thumb_dis_2d.append(np.linalg.norm(hand_cds[4,:2] - hand_cds[joint,:2]))  
    min_thumb_dis = min(thumb_dis) / mean_bone_length
    min_thumb_dis_2d = min(thumb_dis_2d) / mean_bone_length

    # 求出手的方向,合成各个手指关节的向量与竖直方向的夹角
    hand_vec = hand_cds[2] + hand_cds[5] + hand_cds[9] + hand_cds[13] + hand_cds[17] - 5 * hand_cds[0]
    hand_angle = angle_between_vectors(hand_vec, np.array([0, -1, 0]))

    # 求出手的法向量与x轴夹角
    hand_normal =  np.cross(hand_cds[5] - hand_cds[0], hand_cds[13] - hand_cds[0]) + \
                    np.cross(hand_cds[17] - hand_cds[0], hand_cds[9] - hand_cds[0])


    hand_phi = angle_between_vectors(hand_normal, np.array([0, 0, 1]))

    # 7的判断比较复杂,123近+45弯折,1234近+5弯折,12345近,求取条件
    dis_m = [[], [], []]
    fingertips = [4, 8, 12, 16, 20]
    for i in range(3):
        for pair in itertools.combinations(fingertips[:3+i], 2):
            dis_m[i].append(np.linalg.norm(hand_cds[pair[0],:2] - hand_cds[pair[1],:2]))

    # 检查手势 
    gesture_name = "unknown"
    if (segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[5], hand_cds[6], hand_cds[9]) or\
        segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[6], hand_cds[9], hand_cds[10]) or\
        segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[9], hand_cds[10], hand_cds[13]) or\
        segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[10], hand_cds[13], hand_cds[14]) or\
        segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[13], hand_cds[14], hand_cds[17]) or\
        segments_cross_planes(hand_cds[3], hand_cds[4], hand_cds[14], hand_cds[17], hand_cds[18])) and\
        np.linalg.norm(hand_cds[8] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2 and\
        np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2 and \
        hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and hand_state[4] in [1, 2, -2]:
        gesture_name = "provocation"
    elif hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and hand_state[4] in [1, 2, -2] and np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length < 0.9 and\
            np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length < 0.9 and\
            angle_between_vectors(hand_cds[8] - hand_cds[7], hand_cds[7] - hand_cds[6]) + angle_between_vectors(hand_cds[7] - hand_cds[6], hand_cds[6] - hand_cds[5]) > 100 and\
            angle_between_vectors(hand_cds[12] - hand_cds[11], hand_cds[11] - hand_cds[10]) + angle_between_vectors(hand_cds[11] - hand_cds[10], hand_cds[10] - hand_cds[9]) > 100 and\
            angle_between_vectors(hand_cds[16] - hand_cds[15], hand_cds[15] - hand_cds[14]) + angle_between_vectors(hand_cds[15] - hand_cds[14], hand_cds[14] - hand_cds[13]) > 100 and\
            angle_between_vectors(hand_cds[20] - hand_cds[19], hand_cds[19] - hand_cds[18]) + angle_between_vectors(hand_cds[19] - hand_cds[18], hand_cds[18] - hand_cds[17]) > 100 and\
            np.linalg.norm(hand_cds[8] - hand_cds[0]) / mean_bone_length < 3 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 3 and\
            np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 3 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 3:

        
        
        if min_thumb_dis > 0.6 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) < 80 and\
            np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
            gesture_name = "good"
            
        elif min_thumb_dis > 1 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) > 110 and\
            np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
            gesture_name = "bad"

        elif angle_between_vectors(hand_cds[4] - hand_cds[3], hand_cds[3] - hand_cds[2]) > 20 and\
              np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1 and angle_between_vectors(hand_cds[4] - hand_cds[0], np.array([0, -1, 0])) < 80:
            gesture_name = "agree"
        elif hand_state[0] >= 1 or min_thumb_dis_2d < 0.8 or hand_state[0] == -2:
            gesture_name = "fist"
    
    if gesture_name == 'unknown' and (max(dis_m[0]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1) and \
        np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 1.8 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 1.8:
        gesture_name = "7"
    # elif np.linalg.norm(hand_cds[12,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
    #     np.linalg.norm(hand_cds[8,:2] - hand_cds[16,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[8,:2] - hand_cds[20,:2]) / mean_bone_length > 1 and\
    #     np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and np.linalg.norm(hand_cds[20,:2] - hand_cds[16,:2]) / mean_bone_length < 0.9 and\
    #     hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2 and np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2 and\
    #     np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 0.65:    
    #     pass
        
    elif hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and np.linalg.norm(hand_cds[12] - hand_cds[0]) / mean_bone_length < 2.5 and np.linalg.norm(hand_cds[16] - hand_cds[0]) / mean_bone_length < 2.5 and\
        np.linalg.norm(hand_cds[20] - hand_cds[0]) / mean_bone_length < 2.5 and hand_state[0] == 0 and hand_state[1] != 2:
        if intersection_in_line_segment(hand_cds[4], hand_cds[3] + 3 * (hand_cds[3] - hand_cds[4]), hand_cds[8], hand_cds[7] + 3 * (hand_cds[7] - hand_cds[8])) and np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length > 0.7 and\
            np.linalg.norm(hand_cds[8] - hand_cds[12]) / mean_bone_length > 0.7:
            gesture_name = "love"
        elif np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length < 0.7:
            gesture_name = "under_control"
    if  gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] in [0, -2] and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
        gesture_name = "1"

    elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[4] in [0, -2] and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[1] >= 1 and\
        np.linalg.norm(hand_cds[20,:2] - hand_cds[16,:2]) / mean_bone_length > 1:
        gesture_name = "rubbish"

    
    elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] >= 1 and hand_state[2] in [0, -2] and hand_state[3] >= 1 and hand_state[4] >= 1:
        gesture_name = "despise"
    
    elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] in [0, -2] and hand_state[2] in [0, -2] and hand_state[3] >= 1 and hand_state[4] >= 1 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
        if distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
            (not intersection_in_line_segment(hand_cds[12], hand_cds[9], hand_cds[8], hand_cds[5])):
            gesture_name = "2"

        elif intersection_in_line_segment(hand_cds[12], hand_cds[9], hand_cds[8], hand_cds[5]):
            gesture_name = "wish"
        else:
            gesture_name = "swordfinger"
    
    if gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] >= 1 and\
        distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
        distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
        gesture_name = "3"
    
    elif hand_angle < 100 and (hand_state[0] >= 1 or min_thumb_dis < 0.5 or hand_state[0] == -2) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and\
        distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
        distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
        distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]) and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1:
        gesture_name = "4"
    
    elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] == 0 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1 and \
        not piont_in_triangle(hand_cds[4], hand_cds[8], hand_cds[0], hand_cds[12]) and\
        not piont_in_triangle(hand_cds[4], hand_cds[16], hand_cds[0], hand_cds[12]) and\
        not piont_in_triangle(hand_cds[4], hand_cds[16], hand_cds[0], hand_cds[20]):
        if distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and\
            distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
            distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]):
            gesture_name = "5"
        elif (distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) - distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12])) / mean_bone_length < 0.2 and\
            (distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) - distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])) / mean_bone_length > 0.5 and\
            (distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) - distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20])) / mean_bone_length < 0.2:
            gesture_name = "Vulcan_salute"
        elif (distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) - distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12])) / mean_bone_length < 0.2 and\
            (distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) - distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16])) / mean_bone_length < 0.2 and\
            (distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) - distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20])) / mean_bone_length < 0.2 and\
            distance_point_plane(hand_cds[4], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length < 1 :
            gesture_name = "stop"
    
    if gesture_name == 'unknown' and hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] >= 1 and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] == 0 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5:
        gesture_name = "6"
    
    elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and hand_state[1] == 0 and hand_state[2] >= 1 and hand_state[3] >= 1 and (hand_state[4] == 0 or hand_state[4] == -2) and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[4,:2]) / mean_bone_length > 1.5 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[16,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[16,:2] - hand_cds[20,:2]) / mean_bone_length > 1.5:
        gesture_name = "6+1"
    
    elif hand_angle < 100 and ((hand_state[0] >= 1 or hand_state[0] == -2) and hand_state[1] >= 1 and hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0) or\
        hand_state[2] == 0 and hand_state[3] == 0 and hand_state[4] == 0 and np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 0.5 and np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length > 2 and\
        np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length > 2 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 2 and\
        distance_point_line(hand_cds[12], hand_cds[13], hand_cds[16]) > distance_point_line(hand_cds[9], hand_cds[13], hand_cds[16]) and\
        distance_point_line(hand_cds[16], hand_cds[17], hand_cds[20]) > distance_point_line(hand_cds[13], hand_cds[17], hand_cds[20]):
        gesture_name = "ok"
    
    elif hand_angle < 100 and (np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length < 0.6) and \
        np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length > 1 and\
        np.linalg.norm(hand_cds[12] - hand_cds[8]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12] - hand_cds[20]) / mean_bone_length > 1.5 and np.linalg.norm(hand_cds[12] - hand_cds[16]) / mean_bone_length > 1.5 and\
        hand_state[3] == 0 and hand_state[4] == 0:
        gesture_name = "Orchid1"

    elif hand_angle < 100 and (np.linalg.norm(hand_cds[4] - hand_cds[12]) / mean_bone_length < 0.6) and \
        (np.linalg.norm(hand_cds[4] - hand_cds[16]) / mean_bone_length < 0.6 or np.linalg.norm(hand_cds[3] - hand_cds[16]) / mean_bone_length < 0.6 or np.linalg.norm(hand_cds[2] - hand_cds[16]) / mean_bone_length < 0.6) and\
        np.linalg.norm(hand_cds[4] - hand_cds[8]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length > 1 and hand_state[4] == 0:

        gesture_name = "Orchid2"
    
    elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and (hand_state[1] == 0 or hand_state[1] == -2) and hand_state[2] >= 1 and hand_state[3] >= 1 and hand_state[4] >= 1 and\
        np.linalg.norm(hand_cds[12,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
        np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
            gesture_name = "8"
    elif hand_angle < 100 and (hand_state[0] == 0 and min_thumb_dis >= 0.5) and (hand_state[1] == 0 or hand_state[1] == -2) and (hand_state[2] == 0 or hand_state[2] == -2) and hand_state[3] >= 1 and hand_state[4] >= 1 and\
        distance_point_line(hand_cds[8], hand_cds[9], hand_cds[12]) > distance_point_line(hand_cds[5], hand_cds[9], hand_cds[12]) and np.linalg.norm(hand_cds[16,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and\
        np.linalg.norm(hand_cds[20,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[12,:2] - hand_cds[16,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[20,:2] - hand_cds[12,:2]) / mean_bone_length > 1 and\
        np.linalg.norm(hand_cds[8,:2] - hand_cds[4,:2]) / mean_bone_length > 2 and np.linalg.norm(hand_cds[12,:2] - hand_cds[4,:2]) / mean_bone_length > 2:
            gesture_name = "3_variant"

    elif (hand_state[0] >= 1 or min_thumb_dis < 1 or hand_state[0] == -2) and hand_state[1] == 1 and hand_state[2] == 2 and hand_state[3] == 2 and hand_state[4] == 2 and \
        np.linalg.norm(hand_cds[8,:2] - hand_cds[12,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[20]) / mean_bone_length < 1.8:
        if hand_angle < 60:
            gesture_name = "9"
        else:
            gesture_name = "seduce"
        
    
    if  gesture_name == 'unknown' and hand_state[1] in [1, 2, -2] and hand_state[2] in [1, 2, -2] and hand_state[3] in [1, 2, -2] and\
        distance_point_plane(hand_cds[8], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
        distance_point_plane(hand_cds[12], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
        distance_point_plane(hand_cds[16], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7 and\
        distance_point_plane(hand_cds[20], hand_cds[0], hand_cds[5], hand_cds[17]) / mean_bone_length > 0.7:
        if np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[5]) / mean_bone_length > 1 and\
            np.linalg.norm(hand_cds[4] - hand_cds[9]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[9]) / mean_bone_length > 1 and np.linalg.norm(hand_cds[4] - hand_cds[17]) / mean_bone_length > 1:
            gesture_name = "catch"
        elif np.linalg.norm(hand_cds[4,:2] - hand_cds[8,:2]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[4,:2] - hand_cds[12,:2]) / mean_bone_length < 1 and\
            np.linalg.norm(hand_cds[4,:2] - hand_cds[16,:2]) / mean_bone_length < 1 and np.linalg.norm(hand_cds[4,:2] - hand_cds[20,:2]) / mean_bone_length < 1:    
            gesture_name = "0"

    if details:
        prompt = ["直立", '半弯', '全弯', "根弯", 'unknown']
        print(f"手部平均大小{mean_bone_length:.3f}",
              f"手的方向{hand_angle:.3f}",
              f'法向量方向{hand_phi:.3f}',
                "1指"+prompt[hand_state[0]],
                "2指"+prompt[hand_state[1]],
                "3指"+prompt[hand_state[2]],
                "4指"+prompt[hand_state[3]],
                "5指"+prompt[hand_state[4]],
                 "检测结果:",gesture_name   )

    return gesture_name

# 非极大值抑制
def non_max_suppression(boxes, overlap_thresh):

    splits = []
    for bbox in boxes:
        add_flag = False
        bbox_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
        for split in splits:
            for split_item in split:

                # 计算交集
                # 计算两个边界框之间的交集
                xA = max(split_item[0], bbox[0])
                yA = max(split_item[1], bbox[1])
                xB = min(split_item[2], bbox[2])
                yB = min(split_item[3], bbox[3])

                # 计算交集的宽度和高度
                interArea = max(0, xB - xA) * max(0, yB - yA)
                
                item_area = (split_item[2] - split_item[0]) * (split_item[3] - split_item[1])
                # overlap = interArea / (item_area + bbox_area - interArea)
                overlap = interArea / min(item_area, bbox_area)
                if overlap > overlap_thresh:
                    add_flag = True
                    break
            if add_flag:
                split.append(bbox)
                break
        if not add_flag:
            splits.append([bbox])

    # print(splits)
    nms_boxes, nms_ids = [], []
    for split in splits:
        max_area = 0
        max_area_id = -1
        for split_item in split:
             item_area = (split_item[2] - split_item[0]) * (split_item[3] - split_item[1])
             if item_area > max_area:
                 max_area = item_area
                 max_area_id = boxes.index(split_item)

        # print(max_area_id)
        if max_area_id != -1:
            nms_boxes.append(boxes[max_area_id])
            nms_ids.append(max_area_id)

    # print(nms_boxes)
    # print(nms_ids)
    return nms_boxes, nms_ids

然后新建一个hand_detection.py文件写入

import cv2
import mediapipe as mp
import time
import numpy as np
from utils import process_hand, non_max_suppression
import os

class HandDetection():
    def __init__(self, max_num_hands=2,                 # 最多检测几只手
                       min_detection_confidence=0.8,    # 置信度阈值
                       min_tracking_confidence=0.5,     # 追踪阈值
                       overlap_thresh=0.8,              # 非极大值抑制阈值
                       ):
        self.mp_hands = mp.solutions.hands

        self.hands = self.mp_hands.Hands(static_image_mode=False,
                                        max_num_hands=max_num_hands,
                                        min_detection_confidence=min_detection_confidence,
                                        min_tracking_confidence=min_tracking_confidence
                                        )
        
        self.mpDraw = mp.solutions.drawing_utils
        self.overlap_thresh = overlap_thresh
        self.frame = None
        self.results = None
        self.gestures = []
        self.nms_id = []
        self.nms_gesture = []
        

    # 检测单张图像
    def process_frame(self, img, details=False):

        self.get_gesture(img, details)
        img = self.frame
        if self.results.multi_hand_landmarks:  # 如果有检测到手
            # 遍历每一只检测出的手
            # for hand_idx in range(len(self.results.multi_hand_landmarks)):
            for hand_idx in self.nms_id:
                # self.mpDraw.plot_landmarks(self.results.multi_hand_landmarks[0], self.mp_hands.HAND_CONNECTIONS)
                if details:
                    hand_pixels = self.results.multi_hand_landmarks[hand_idx]  # 获取该手的所有关键点坐标
                    self.mpDraw.draw_landmarks(img, hand_pixels, self.mp_hands.HAND_CONNECTIONS)  # 可视化
 
                item = self.gestures[hand_idx]

                cv2.line(img, (item[0], item[1]), (item[0] + 20, item[1]), (255, 0, 0), 4)
                cv2.line(img, (item[0], item[1]), (item[0], item[1] + 20), (255, 0, 0), 4)
                cv2.line(img, (item[2], item[1]), (item[2] - 20, item[1]), (255, 0, 0), 4)
                cv2.line(img, (item[2], item[1]), (item[2], item[1] + 20), (255, 0, 0), 4)
                cv2.line(img, (item[0], item[3]), (item[0] + 20, item[3]), (255, 0, 0), 4)
                cv2.line(img, (item[0], item[3]), (item[0], item[3] - 20), (255, 0, 0), 4)
                cv2.line(img, (item[2], item[3]), (item[2] - 20, item[3]), (255, 0, 0), 4)
                cv2.line(img, (item[2], item[3]), (item[2], item[3] - 20), (255, 0, 0), 4)

                # cv2.rectangle(img, (item[0], item[1]), (item[2], item[3]), (255, 0, 0), 2)
                cv2.putText(img, item[4], (item[0], item[1]-8), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 8, cv2.LINE_AA)
                cv2.putText(img, item[4], (item[0], item[1]-8), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2, cv2.LINE_AA)
                
        return img
    # 获得姿态
    def get_gesture(self, img, details=False):
        self.frame = img
        self.results = self.hands.process(img)
        self.gestures = []
        # 检查是否检测到手部
        if self.results.multi_hand_landmarks:
            for hand_landmarks in self.results.multi_hand_landmarks:
                # 遍历每个关键点
                hand_cds = np.zeros((21, 3), dtype='float32')
                for idx, landmark in enumerate(hand_landmarks.landmark):
                    hand_cds[idx] = [landmark.x, landmark.y, landmark.z]
                
                x_min = min(hand_cds[:, 0])
                y_min = min(hand_cds[:, 1])
                x_max = max(hand_cds[:, 0])
                y_max = max(hand_cds[:, 1])
                # print(x_min, y_min, x_max, y_max)
                # x_min -= 0.1 * (x_max - x_min)
                # x_max += 0.1 * (x_max - x_min)
                # y_min -= 0.1 * (y_max - y_min)
                # y_max += 0.1 * (y_max - y_min)

                x_min = max(int(x_min * self.frame.shape[1]),0)
                y_min = max(int(y_min * self.frame.shape[0]),0)
                x_max = min(int(x_max * self.frame.shape[1]),self.frame.shape[1])
                y_max = min(int(y_max * self.frame.shape[0]),self.frame.shape[0])
                
                gesture = process_hand(hand_cds, details)
                self.gestures.append([x_min, y_min, x_max, y_max, gesture])

        # 非极大值抑制
        self.nms_gesture, self.nms_id = non_max_suppression(self.gestures, self.overlap_thresh)
        # if len(self.nms_gesture) != len(self.gestures):
        #     print('nmsed!')
        return self.nms_gesture

    # 检测3d模型
    def draw_3d_module(self, img):
        img = cv2.flip(img, 1)
        self.results = self.hands.process(img)
        if self.results.multi_hand_landmarks:
            self.mpDraw.plot_landmarks(self.results.multi_hand_landmarks[0], self.mp_hands.HAND_CONNECTIONS)

def run():
    hand_detection = HandDetection()
    capture = cv2.VideoCapture(0)
    ref, frame = capture.read()
    if not ref:
        raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。")

    fps = 0.0
    while(True):
        t1 = time.time()
        # 读取某一帧
        ref, frame = capture.read()
        if not ref:
            break
        frame = cv2.flip(frame, 1)
        frame = hand_detection.process_frame(frame, details=True)        
        # if len(hand_detection.gestures):
        #     print(hand_detection.gestures)  
        alpha = 0.9
        if time.time()-t1 > 0:
            fps  = alpha*fps + (1-alpha)*(1./(time.time()-t1)) 

        frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 
        
        
        cv2.imshow("video",frame)
        key = cv2.waitKey(1) & 0xff
        if key == 27:       # Esc
            capture.release()
            break
        
        # 按 Q 键保存裁剪图像
        if key == ord('q'):
            save_root = './results/hand_gestures/'
            if not os.path.exists(save_root):
                # 如果路径不存在,则创建路径
                os.makedirs(save_root)
            for gesture in hand_detection.gestures:
                i = 0
                while os.path.exists(save_root+gesture[4]+'-'+str(i)+'.jpg'):
                    i += 1
                
                img_crop = frame[gesture[1]:gesture[3], gesture[0]:gesture[2]]
                cv2.imwrite(save_root+gesture[4]+'-'+str(i)+'.jpg', img_crop)

            print(save_root+gesture[4]+'-'+str(i)+'.jpg','Captured!')

    print("Video Detection Done!")
    capture.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    run()

直接运行hand_detection.py即可

  • 6
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

迟钝皮纳德

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值