手部姿态检测（按帧）

最新推荐文章于 2024-08-17 15:27:20 发布

尼古拉斯·two_dog

最新推荐文章于 2024-08-17 15:27:20 发布

阅读量1.1k

点赞数 3

分类专栏：手部姿态识别文章标签：机器学习 svm 姿态识别

本文链接：https://blog.csdn.net/gm_ergou/article/details/118393011

版权

手部姿态识别专栏收录该内容

6 篇文章 0 订阅

订阅专栏

目的：识别手部在脸上的动作，比如：涂眼霜、涂水乳、敷面膜、没动作

参考链接：

https://github.com/xinghaochen/awesome-hand-pose-estimation

https://github.com/lmb-freiburg/hand3d

https://github.com/FORTH-ModelBasedTracker/MonocularRGB_3D_Handpose_WACV18

openpose：https://blog.csdn.net/zziahgf/article/details/90706693

方法1：svm
数据集：20-30人，3w+张图片
准确度：0.85（opencv的手部特征检测器不准）

训练技巧：
1.分别选取稳定的几个点代表整个手部，脸部也是如此
2.坏帧清洗，自动剔除没有检测到手或者图片模糊导致检测不准的帧
3.数据做归一化处理

特征：
1.计算手部中心点和脸部中心点的欧氏距离
2.计算指尖中心点到内眼角的欧氏距离
3.计算指尖与眼睛的y轴距离
4.计算手部和脸部所有关键点向量的欧式距离
5.计算手部和脸部所有关键点向量的曼哈顿距离

数据处理：

1.把视频流按帧保存下来：

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils


def getImageVar(image):
	img2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	imageVar = cv2.Laplacian(img2gray, cv2.CV_64F).var()
	return imageVar



video_path = 'data2/vedio/width/1.mp4'
save_path='data2/5/'
cap = cv2.VideoCapture(video_path)

hasFrame, frame = cap.read()
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]

#开始进入循环取帧
k=0
while (hasFrame):
	k += 1
	hasFrame, frame = cap.read()   #读取视频帧
	frame=frame[0:480,150:530]
	h,w,_=frame.shape
	frame=cv2.resize(frame,(int(w*0.9),int(h*0.9)))
	
	# frame=cv2.resize(frame,(640,480))
	# frame=frame[0:480,150:530]
	# frame=cv2.resize(frame,(432,342))
	# print(h,w)
	if k%5==0:
		# print(k,hasFrame)
		fm=getImageVar(frame)
		if fm>=50:
			print(k,fm)
			cv2.imwrite(save_path+'frame_%d.jpg' %k, frame)   #保存视频帧

cap.release()

2.清洗掉没有人脸的帧：

import matplotlib.pyplot as plt
import cv2
import numpy as np
import  dlib
import os

# 如果未检测到人脸,那么返回false,否则返回true
face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor("class\shape_predictor_68_face_landmarks.dat")   #加载人脸特征点模型

def geteye_rect(imgpath):
    bgrImg = cv2.imread(imgpath)
    name=imgpath.split('/')[-1].split('.')[0]

    if bgrImg is None:
        return False
        
    # rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)
    facesrect = face_detector(bgrImg, 1)
    
    if len(facesrect) != 1:
        print(imgpath,"none.")
        os.remove(imgpath)
n=0
path='data2/5/'
for root,dirs,files in os.walk(path):
    for file in files:  # 遍历文件
        if file.endswith('jpg'):
            n += 1
            file_name= root+'/'+file
            print(n, file_name)
            geteye_rect(file_name)

提取特征：

3.坐标点检测（人脸/手部关键点）

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')

def face_detect(pic):
    img = np.copy(pic)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # 人脸数rects
    rects = detector(img, 1)
    if len(rects)!=1:
        print(len(rects),"face detection fail!")
        exit()

    landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])

    for idx, point in enumerate(landmarks):
        x=point[0, 0]
        y=point[0, 1]
        f1.write(str(x)+","+str(y)+"  ")

        # 画图和点
        cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)

    f1.write("\n")

    return img

def hand_detect(pic, label, img_face, file_name):
    protoFile = "class/hand/pose_deploy.prototxt"
    weightsFile = "class/hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
                  [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    img_hand=np.copy(img_face)   #用来画手的坐标点
    frame = np.copy(pic)   #用来做手部检测
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth / frameHeight

    threshold = 0.1

    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    print("time taken by network : {:.3f}".format(time.time() - t))

    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # 对应身体部位的置信度图
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        # cv2.minMaxLoc：返回矩阵的最小值，最大值，并得到最大值，最小值的索引
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)   #找最大值及其下标（即置信度最大的点）

        if prob >= threshold:
            # 获取坐标点
            x,y=(int(point[0]), int(point[1]))
            f2.write(str(x)+","+str(y)+"  ")

            # 画图和点
            cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
        else:
            # 获取坐标点
            x,y=(0, 0)
            f2.write(str(x)+","+str(y)+"  ")


    if label=='a':
        gg=0
    elif label=='b':
        gg=1
    elif label=='c':
        gg=2
    else:
        gg=9
    f2.write(str(gg)+"  "+file_name+"  \n")

    return img_hand


f1=open("data2/val2/face_point.txt","w+")
f2=open("data2/val2/hand_point.txt","w+")
path = "data2/val2/"
save_path = "data2/0/"

n=0
for root,dirs,files in os.walk(path):
    for file in files:
        if file.endswith('jpg'):
            file_name=root+"/"+file
            label=root.split('/')[-1]
            n+=1
            print(n,file_name,label)

            # 获取坐标点
            pic=cv2.imread(file_name)
            img_face=face_detect(pic)   #人脸检测
            img_hand=hand_detect(pic, label, img_face, file_name)   #手部检测
            
            cv2.imwrite(save_path+label+"/"+file,img_hand)

f1.close()
f2.close()

4.计算距离特征

import glob
import os
import numpy as np
from numpy import *
import cv2


# 4.计算特征
def feature(data_hand, label_hand, data_face, max_num):
    feature=[]
    # 计算欧氏距离
    num=data_hand.shape[0]
    for i in range(num):
        label=label_hand[i]

        key_point_hand1=[8,12,16]
        # key_point_hand2=[7,11,15,20]
        
        # 计算手部指尖中心坐标
        hand_point,mask=center_point(i, data_hand, key_point_hand1)

        # 判断图片是否有手
        if mask<=2:
            # print("action_Z：no hand!","mack:",mask)
            continue

        # 左右眼的坐标（计算中心点）
        left_eye=[37,38,40,41]
        right_eye=[43,44,46,47]

        # 向量距离的坐标（指尖和眼睛）
        vector_point_hand=[8,12,16]
        vector_left_eye=[37,38,40]
        vector_right_eye=[43,44,46]

        vector_hand=vector(i, data_hand,vector_point_hand)   #指尖的向量

        # 判断手在左边还是右边（依据：指尖与左右外眼角的距离）
        if abs(hand_point[0]-data_face[i][36][0]) <= abs(hand_point[0]-data_face[i][45][0]):
            eye_point,_=center_point(i, data_face, left_eye)   #眼睛的中心坐标
            inner_eye_point=data_face[i][39]                   #内眼角坐标
            vector_eye=vector(i, data_face,vector_left_eye)   #眼睛的向量
        else:
            eye_point,_=center_point(i, data_face, right_eye)
            inner_eye_point=data_face[i][42]
            vector_eye=vector(i, data_face,vector_right_eye)


        # 特征1：计算欧氏距离
        featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))

        # 特征2：计算指尖中心点到内眼角的欧氏距离
        featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))

        # 特征3：计算指尖与眼睛的y轴距离
        featur3=int(abs(hand_point[1]-eye_point[1]))

        # 特征4：计算向量的欧氏距离
        featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
        
        # 特征5：计算向量的曼哈顿距离
        featur5=int(sum(abs(mat(hand_point)-mat(eye_point))))

        # 特征归一化
        if len(max_num)>=3:
            featur1=featur1/max_num[0]
            featur2=featur2/max_num[1]
            featur3=featur3/max_num[2]
            featur4=featur4/max_num[3]
            featur5=featur5/max_num[4]

        # 写入txt
        f3.write(str(featur1)+"  "+str(featur2)+"  "+str(featur3)+"  "+str(featur4)+"  "+str(featur5)+"  "+label+"\n")
        
        feature.append((featur1,featur2,featur3,featur4,featur5))

        # print("欧氏距离：{}".format(featur1),"标签：{}".format(label))
        # print("指尖中心点到内眼角的欧氏距离：{}".format(featur2),"标签：{}".format(label))
        # print("指尖与眼睛的y轴距离：{}".format(featur3),"标签：{}".format(label))
        # print ('向量欧氏距离：',featur4,"标签：{}".format(label))
        # print("向量曼哈顿距离：",featur5,"标签：{}".format(label))

        # print(featur1,featur2,featur3,featur4,featur5,label)

    return feature


# 3.坐标向量
def vector(i, data,key_point):
    vector=[]
    for idx in key_point:
        x=data[i][idx][0]
        y=data[i][idx][1]
        vector.append((x,y))
    return vector

# 2.计算中心坐标点
def center_point(i, data, key_point):
    # 计算手尖坐标
    sum_x=0
    sum_y=0
    n=0
    for idx in key_point:
        x=data[i][idx][0]
        y=data[i][idx][1]
        # print(x,y)
        if x!=0:
            sum_x+=x
            sum_y+=y
            n+=1
    if n!=0:
        avg_x=int(sum_x/n)
        avg_y=int(sum_y/n)
    else:
        avg_x=avg_y=0

    point=(avg_x,avg_y)


    return np.array(point),n
    
# 1.读取坐标点数据
def read_data(file_name):
    f1=open(file_name,"r")
    lines=f1.readlines()

    pic_num=len(lines)   #照片数量，行数
    point_num=len(lines[0].split('  '))   #手部关键点数量

    data=[]   #存所有照片的手部关键点
    label=[]
    for n in range(pic_num):
        point=[]   #存每一个关键点的坐标
        line=lines[n].split('  ')
        
        if point_num==69:
            for i in range(68):
                x=line[i].split(",")[0]
                y=line[i].split(",")[1]
                point.append([int(x),int(y)])   #把一张照片的所有关键点存入point
        else:
            for i in range(22):
                x=line[i].split(",")[0]
                y=line[i].split(",")[1]
                point.append([int(x),int(y)])   #把一张照片的所有关键点存入point
        
        data.append(point)   #把所有照片的关键点存入data
        label.append(line[22])


    data=np.array(data)
    label=np.array(label)
    return data,label



# 获取手部数据、标签
f1="data3/train/hand_point.txt"
data_hand,label_hand=read_data(f1)

# 获取脸部数据
f2="data3/train/face_point.txt"
data_face,_=read_data(f2)
print(data_hand.shape)

# 计算特征
f3=open("data3/train/feature.txt","w+")
max_num=[]   #特征归一化，最大值
feature=feature(data_hand, label_hand, data_face, max_num)

5.使用svm进行训练

from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt
import shutil

def loadDataSet(fileName):
    dataMat = []
    labelMat = []
    with open(fileName) as fr:
        for line in fr.readlines():
            lineArr = line.strip().split('  ')
            # print(lineArr)

            # dataMat.append([float(lineArr[0]), float(lineArr[1]), 
            #                 float(lineArr[2]), float(lineArr[3]), float(lineArr[4])])
            dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2])])
            
            labelMat.append([int(lineArr[5])])
    return dataMat, labelMat

#加载训练集
train_data,train_label = loadDataSet('data3/train/feature-shuffle.txt')   #1.加载一个txt数据集
train_data = mat(train_data)
train_data=np.array(train_data, dtype='float32')
train_label = mat(train_label)
# print(train_data)
print(train_label.shape)

#加载测试集
test_data,test_label = loadDataSet('data3/val2/feature.txt')   #1.加载一个txt数据集
test_data = mat(test_data)
test_data=np.array(test_data, dtype='float32')
test_label=mat(test_label)
print(test_label.shape)


# 创建分类器
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)  # SVM类型
svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
svm.setC(1e-5)

# 训练
ret = svm.train(train_data, cv2.ml.ROW_SAMPLE, train_label)
svm.save('data2/train/hand_class.xml')

# 支持向量
vec = svm.getSupportVectors()
print("最终结果:",vec)


# 测试
# svm = cv2.ml.SVM_load("weight/test3/hand_detect.xml")
(ret, res) = svm.predict(test_data)
# print(res)

# # 准确率
# f1=open("data/val2/0/hand_point.txt","r")
# lines=f1.readlines()
# save_path='data/error/val2/'

n=0
lens=len(test_data)
for i in range(lens):
    if res[i]==test_label[i]:
        n=n+1
    # else:
    #     root=lines[i].split('  ')[23]
    #     file_name=root.split('/')[-1]
    #     # print(file_name)
    #     shutil.copy(root,save_path+file_name+str(test_label[i])+".jpg")

Accuracy=n/lens
print("准确度为:",Accuracy)

合并测试：提取特征+svm预测

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
from numpy import *


# 4.计算特征
def feature(data_hand, data_face, max_num):
    feature=[]

    # 指尖关键点
    key_point_hand=[8,12,16]

    # 计算手部指尖中心坐标
    hand_point,mask=center_point(data_hand, key_point_hand)
    # print(hand_point,mask)
    
    # 判断图片是否有手
    if mask<=1:
        print("action_Z：no hand!","mack:",mask)
        exit()

    
    # 左右眼的坐标（计算中心点）
    left_eye=[37,38,40,41]
    right_eye=[43,44,46,47]

    # 向量距离的坐标（指尖和眼睛）
    vector_point_hand=[8,12,16,20]
    vector_left_eye=[37,38,40,41]
    vector_right_eye=[43,44,46,47]

    vector_hand=vector(data_hand,vector_point_hand)   #指尖的向量

    # 判断手在左边还是右边（依据：指尖与左右外眼角的距离）
    if abs(hand_point[0]-data_face[36][0]) <= abs(hand_point[0]-data_face[45][0]):
        eye_point,_=center_point(data_face, left_eye)   #眼睛的中心坐标
        inner_eye_point=data_face[39]                   #内眼角坐标
        vector_eye=vector(data_face,vector_left_eye)   #眼睛的向量
    else:
        eye_point,_=center_point(data_face, right_eye)
        inner_eye_point=data_face[42]
        vector_eye=vector(data_face,vector_right_eye)


    # 特征1：计算欧氏距离
    featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))

    # 特征2：计算指尖中心点到内眼角的欧氏距离
    featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))

    # 特征3：计算指尖与眼睛的y轴距离
    featur3=int(abs(hand_point[1]-eye_point[1]))

    # 特征4：计算向量的欧氏距离
    featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
    
    # 特征5：计算向量的曼哈顿距离
    featur5=int(sum(abs(mat(vector_hand)-mat(vector_eye))))

    # 特征归一化
    if len(max_num)>=3:
        featur1=featur1/max_num[0]
        featur2=featur2/max_num[1]
        featur3=featur3/max_num[2]
        featur4=featur4/max_num[3]
        featur5=featur5/max_num[4]

    
    feature.append((featur1,featur2,featur3,featur4,featur5))

    # print("欧氏距离：{}".format(featur1),"标签：{}".format(label))
    # print("指尖中心点到内眼角的欧氏距离：{}".format(featur2),"标签:{}".format(label))
    # print("指尖与眼睛的y轴距离：{}".format(featur3),"标签：{}".format(label))
    # print ('向量欧氏距离：',featur4,"标签：{}".format(label))
    # print("向量曼哈顿距离：",featur5,"标签：{}".format(label))

    return feature


# 3.坐标向量
def vector(data,key_point):
    vector=[]
    for idx in key_point:
        x=data[idx][0]
        y=data[idx][1]
        vector.append((x,y))
    return vector


# 2.计算中心坐标点
def center_point(data, key_point):
    # 计算手尖坐标
    sum_x=0
    sum_y=0
    n=0
    for idx in key_point:
        x=data[idx][0]
        y=data[idx][1]
        # print(x,y)
        if x!=0:
            sum_x+=x
            sum_y+=y
            n+=1
    if n!=0:
        avg_x=int(sum_x/n)
        avg_y=int(sum_y/n)
    else:
        avg_x=avg_y=0

    point=(avg_x,avg_y)


    return np.array(point),n


def face_detect(pic):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')
    img = np.copy(pic)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # 人脸数rects
    rects = detector(img, 1)
    if len(rects)!=1:
        print("face detection fail!")
        exit()

    landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
    points=[]
    for idx, point in enumerate(landmarks):
        x=point[0, 0]
        y=point[0, 1]
        points.append([x,y])

        # # 画图和点
        # cv2.circle(img, (x,y), 9, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2,cv2.LINE_AA)


    return img, np.array(points)


def hand_detect(pic, label, img_face):
    protoFile = "class/hand/pose_deploy.prototxt"
    weightsFile = "class/hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
                  [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    img_hand=np.copy(img_face)   #用来画手的坐标点
    frame = np.copy(pic)   #用来做手部检测
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth / frameHeight

    threshold = 0.1

    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    print("time taken by network : {:.3f}".format(time.time() - t))

    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # 对应身体部位的置信度图
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        # cv2.minMaxLoc：返回矩阵的最小值，最大值，并得到最大值，最小值的索引
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)   #找最大值及其下标（即置信度最大的点）

        # 获取坐标点
        x,y=(int(point[0]), int(point[1]))
        points.append([x,y])

        # # 画图和点
        # cv2.circle(img_hand, (x,y), 9, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 2,cv2.LINE_AA)

    return img_hand,np.array(points)


path = "data/test2/"
save_path = "data/0/"
svm_model="data/train/hand_class.xml"
for root,dirs,files in os.walk(path):
    for file in files:
        if file.endswith('jpg'):
            file_name=root+"/"+file
            label=root.split('/')[-1]
            # print(file_name,label)
            # 
            # 

            # 获取坐标点
            pic=cv2.imread(file_name)
            img_face, point_face=face_detect(pic)   #人脸检测
            img_hand, point_hand=hand_detect(pic, label, img_face)   #手部检测

            # print(point_face.shape, point_hand.shape)
            # cv2.imwrite(save_path+label+"/"+file,img_hand)

            # 计算特征
            max_num=[]   #特征归一化，最大值
            features=feature(point_hand, point_face, max_num)
            features=mat(features)
            features=np.array(features, dtype='float32')
            print("特征：",features)


            # 创建分类器
            svm = cv2.ml.SVM_create()
            svm.setType(cv2.ml.SVM_C_SVC)  # SVM类型
            svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
            svm.setC(1e-5)

            # 加载模型、测试
            svm = cv2.ml.SVM_load(svm_model)
            (ret, res) = svm.predict(features)
            print("预测结果：",res)