目的:识别手部在脸上的动作,比如:涂眼霜、涂水乳、敷面膜、没动作
参考链接:
https://github.com/xinghaochen/awesome-hand-pose-estimation
https://github.com/lmb-freiburg/hand3d
https://github.com/FORTH-ModelBasedTracker/MonocularRGB_3D_Handpose_WACV18
openpose:https://blog.csdn.net/zziahgf/article/details/90706693
方法1:svm
数据集:20-30人,3w+张图片
准确度:0.85(opencv的手部特征检测器不准)
训练技巧:
1.分别选取稳定的几个点代表整个手部,脸部也是如此
2.坏帧清洗,自动剔除没有检测到手或者图片模糊导致检测不准的帧
3.数据做归一化处理
特征:
1.计算手部中心点和脸部中心点的欧氏距离
2.计算指尖中心点到内眼角的欧氏距离
3.计算指尖与眼睛的y轴距离
4.计算手部和脸部所有关键点向量的欧式距离
5.计算手部和脸部所有关键点向量的曼哈顿距离
数据处理:
1.把视频流按帧保存下来:
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
def getImageVar(image):
img2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
imageVar = cv2.Laplacian(img2gray, cv2.CV_64F).var()
return imageVar
video_path = 'data2/vedio/width/1.mp4'
save_path='data2/5/'
cap = cv2.VideoCapture(video_path)
hasFrame, frame = cap.read()
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
#开始进入循环取帧
k=0
while (hasFrame):
k += 1
hasFrame, frame = cap.read() #读取视频帧
frame=frame[0:480,150:530]
h,w,_=frame.shape
frame=cv2.resize(frame,(int(w*0.9),int(h*0.9)))
# frame=cv2.resize(frame,(640,480))
# frame=frame[0:480,150:530]
# frame=cv2.resize(frame,(432,342))
# print(h,w)
if k%5==0:
# print(k,hasFrame)
fm=getImageVar(frame)
if fm>=50:
print(k,fm)
cv2.imwrite(save_path+'frame_%d.jpg' %k, frame) #保存视频帧
cap.release()
2.清洗掉没有人脸的帧:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import dlib
import os
# 如果未检测到人脸,那么返回false,否则返回true
face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor("class\shape_predictor_68_face_landmarks.dat") #加载人脸特征点模型
def geteye_rect(imgpath):
bgrImg = cv2.imread(imgpath)
name=imgpath.split('/')[-1].split('.')[0]
if bgrImg is None:
return False
# rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)
facesrect = face_detector(bgrImg, 1)
if len(facesrect) != 1:
print(imgpath,"none.")
os.remove(imgpath)
n=0
path='data2/5/'
for root,dirs,files in os.walk(path):
for file in files: # 遍历文件
if file.endswith('jpg'):
n += 1
file_name= root+'/'+file
print(n, file_name)
geteye_rect(file_name)
提取特征:
3.坐标点检测(人脸/手部关键点)
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')
def face_detect(pic):
img = np.copy(pic)
# img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 人脸数rects
rects = detector(img, 1)
if len(rects)!=1:
print(len(rects),"face detection fail!")
exit()
landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
for idx, point in enumerate(landmarks):
x=point[0, 0]
y=point[0, 1]
f1.write(str(x)+","+str(y)+" ")
# 画图和点
cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)
f1.write("\n")
return img
def hand_detect(pic, label, img_face, file_name):
protoFile = "class/hand/pose_deploy.prototxt"
weightsFile = "class/hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
img_hand=np.copy(img_face) #用来画手的坐标点
frame = np.copy(pic) #用来做手部检测
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth / frameHeight
threshold = 0.1
t = time.time()
# input image dimensions for the network
inHeight = 368
inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
print("time taken by network : {:.3f}".format(time.time() - t))
# Empty list to store the detected keypoints
points = []
for i in range(nPoints):
# 对应身体部位的置信度图
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) #找最大值及其下标(即置信度最大的点)
if prob >= threshold:
# 获取坐标点
x,y=(int(point[0]), int(point[1]))
f2.write(str(x)+","+str(y)+" ")
# 画图和点
cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
else:
# 获取坐标点
x,y=(0, 0)
f2.write(str(x)+","+str(y)+" ")
if label=='a':
gg=0
elif label=='b':
gg=1
elif label=='c':
gg=2
else:
gg=9
f2.write(str(gg)+" "+file_name+" \n")
return img_hand
f1=open("data2/val2/face_point.txt","w+")
f2=open("data2/val2/hand_point.txt","w+")
path = "data2/val2/"
save_path = "data2/0/"
n=0
for root,dirs,files in os.walk(path):
for file in files:
if file.endswith('jpg'):
file_name=root+"/"+file
label=root.split('/')[-1]
n+=1
print(n,file_name,label)
# 获取坐标点
pic=cv2.imread(file_name)
img_face=face_detect(pic) #人脸检测
img_hand=hand_detect(pic, label, img_face, file_name) #手部检测
cv2.imwrite(save_path+label+"/"+file,img_hand)
f1.close()
f2.close()
4.计算距离特征
import glob
import os
import numpy as np
from numpy import *
import cv2
# 4.计算特征
def feature(data_hand, label_hand, data_face, max_num):
feature=[]
# 计算欧氏距离
num=data_hand.shape[0]
for i in range(num):
label=label_hand[i]
key_point_hand1=[8,12,16]
# key_point_hand2=[7,11,15,20]
# 计算手部指尖中心坐标
hand_point,mask=center_point(i, data_hand, key_point_hand1)
# 判断图片是否有手
if mask<=2:
# print("action_Z:no hand!","mack:",mask)
continue
# 左右眼的坐标(计算中心点)
left_eye=[37,38,40,41]
right_eye=[43,44,46,47]
# 向量距离的坐标(指尖和眼睛)
vector_point_hand=[8,12,16]
vector_left_eye=[37,38,40]
vector_right_eye=[43,44,46]
vector_hand=vector(i, data_hand,vector_point_hand) #指尖的向量
# 判断手在左边还是右边(依据:指尖与左右外眼角的距离)
if abs(hand_point[0]-data_face[i][36][0]) <= abs(hand_point[0]-data_face[i][45][0]):
eye_point,_=center_point(i, data_face, left_eye) #眼睛的中心坐标
inner_eye_point=data_face[i][39] #内眼角坐标
vector_eye=vector(i, data_face,vector_left_eye) #眼睛的向量
else:
eye_point,_=center_point(i, data_face, right_eye)
inner_eye_point=data_face[i][42]
vector_eye=vector(i, data_face,vector_right_eye)
# 特征1:计算欧氏距离
featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))
# 特征2:计算指尖中心点到内眼角的欧氏距离
featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))
# 特征3:计算指尖与眼睛的y轴距离
featur3=int(abs(hand_point[1]-eye_point[1]))
# 特征4:计算向量的欧氏距离
featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
# 特征5:计算向量的曼哈顿距离
featur5=int(sum(abs(mat(hand_point)-mat(eye_point))))
# 特征归一化
if len(max_num)>=3:
featur1=featur1/max_num[0]
featur2=featur2/max_num[1]
featur3=featur3/max_num[2]
featur4=featur4/max_num[3]
featur5=featur5/max_num[4]
# 写入txt
f3.write(str(featur1)+" "+str(featur2)+" "+str(featur3)+" "+str(featur4)+" "+str(featur5)+" "+label+"\n")
feature.append((featur1,featur2,featur3,featur4,featur5))
# print("欧氏距离:{}".format(featur1),"标签:{}".format(label))
# print("指尖中心点到内眼角的欧氏距离:{}".format(featur2),"标签:{}".format(label))
# print("指尖与眼睛的y轴距离:{}".format(featur3),"标签:{}".format(label))
# print ('向量欧氏距离:',featur4,"标签:{}".format(label))
# print("向量曼哈顿距离:",featur5,"标签:{}".format(label))
# print(featur1,featur2,featur3,featur4,featur5,label)
return feature
# 3.坐标向量
def vector(i, data,key_point):
vector=[]
for idx in key_point:
x=data[i][idx][0]
y=data[i][idx][1]
vector.append((x,y))
return vector
# 2.计算中心坐标点
def center_point(i, data, key_point):
# 计算手尖坐标
sum_x=0
sum_y=0
n=0
for idx in key_point:
x=data[i][idx][0]
y=data[i][idx][1]
# print(x,y)
if x!=0:
sum_x+=x
sum_y+=y
n+=1
if n!=0:
avg_x=int(sum_x/n)
avg_y=int(sum_y/n)
else:
avg_x=avg_y=0
point=(avg_x,avg_y)
return np.array(point),n
# 1.读取坐标点数据
def read_data(file_name):
f1=open(file_name,"r")
lines=f1.readlines()
pic_num=len(lines) #照片数量,行数
point_num=len(lines[0].split(' ')) #手部关键点数量
data=[] #存所有照片的手部关键点
label=[]
for n in range(pic_num):
point=[] #存每一个关键点的坐标
line=lines[n].split(' ')
if point_num==69:
for i in range(68):
x=line[i].split(",")[0]
y=line[i].split(",")[1]
point.append([int(x),int(y)]) #把一张照片的所有关键点存入point
else:
for i in range(22):
x=line[i].split(",")[0]
y=line[i].split(",")[1]
point.append([int(x),int(y)]) #把一张照片的所有关键点存入point
data.append(point) #把所有照片的关键点存入data
label.append(line[22])
data=np.array(data)
label=np.array(label)
return data,label
# 获取手部数据、标签
f1="data3/train/hand_point.txt"
data_hand,label_hand=read_data(f1)
# 获取脸部数据
f2="data3/train/face_point.txt"
data_face,_=read_data(f2)
print(data_hand.shape)
# 计算特征
f3=open("data3/train/feature.txt","w+")
max_num=[] #特征归一化,最大值
feature=feature(data_hand, label_hand, data_face, max_num)
5.使用svm进行训练
from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt
import shutil
def loadDataSet(fileName):
dataMat = []
labelMat = []
with open(fileName) as fr:
for line in fr.readlines():
lineArr = line.strip().split(' ')
# print(lineArr)
# dataMat.append([float(lineArr[0]), float(lineArr[1]),
# float(lineArr[2]), float(lineArr[3]), float(lineArr[4])])
dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2])])
labelMat.append([int(lineArr[5])])
return dataMat, labelMat
#加载训练集
train_data,train_label = loadDataSet('data3/train/feature-shuffle.txt') #1.加载一个txt数据集
train_data = mat(train_data)
train_data=np.array(train_data, dtype='float32')
train_label = mat(train_label)
# print(train_data)
print(train_label.shape)
#加载测试集
test_data,test_label = loadDataSet('data3/val2/feature.txt') #1.加载一个txt数据集
test_data = mat(test_data)
test_data=np.array(test_data, dtype='float32')
test_label=mat(test_label)
print(test_label.shape)
# 创建分类器
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC) # SVM类型
svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
svm.setC(1e-5)
# 训练
ret = svm.train(train_data, cv2.ml.ROW_SAMPLE, train_label)
svm.save('data2/train/hand_class.xml')
# 支持向量
vec = svm.getSupportVectors()
print("最终结果:",vec)
# 测试
# svm = cv2.ml.SVM_load("weight/test3/hand_detect.xml")
(ret, res) = svm.predict(test_data)
# print(res)
# # 准确率
# f1=open("data/val2/0/hand_point.txt","r")
# lines=f1.readlines()
# save_path='data/error/val2/'
n=0
lens=len(test_data)
for i in range(lens):
if res[i]==test_label[i]:
n=n+1
# else:
# root=lines[i].split(' ')[23]
# file_name=root.split('/')[-1]
# # print(file_name)
# shutil.copy(root,save_path+file_name+str(test_label[i])+".jpg")
Accuracy=n/lens
print("准确度为:",Accuracy)
合并测试:提取特征+svm预测
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
from numpy import *
# 4.计算特征
def feature(data_hand, data_face, max_num):
feature=[]
# 指尖关键点
key_point_hand=[8,12,16]
# 计算手部指尖中心坐标
hand_point,mask=center_point(data_hand, key_point_hand)
# print(hand_point,mask)
# 判断图片是否有手
if mask<=1:
print("action_Z:no hand!","mack:",mask)
exit()
# 左右眼的坐标(计算中心点)
left_eye=[37,38,40,41]
right_eye=[43,44,46,47]
# 向量距离的坐标(指尖和眼睛)
vector_point_hand=[8,12,16,20]
vector_left_eye=[37,38,40,41]
vector_right_eye=[43,44,46,47]
vector_hand=vector(data_hand,vector_point_hand) #指尖的向量
# 判断手在左边还是右边(依据:指尖与左右外眼角的距离)
if abs(hand_point[0]-data_face[36][0]) <= abs(hand_point[0]-data_face[45][0]):
eye_point,_=center_point(data_face, left_eye) #眼睛的中心坐标
inner_eye_point=data_face[39] #内眼角坐标
vector_eye=vector(data_face,vector_left_eye) #眼睛的向量
else:
eye_point,_=center_point(data_face, right_eye)
inner_eye_point=data_face[42]
vector_eye=vector(data_face,vector_right_eye)
# 特征1:计算欧氏距离
featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))
# 特征2:计算指尖中心点到内眼角的欧氏距离
featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))
# 特征3:计算指尖与眼睛的y轴距离
featur3=int(abs(hand_point[1]-eye_point[1]))
# 特征4:计算向量的欧氏距离
featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
# 特征5:计算向量的曼哈顿距离
featur5=int(sum(abs(mat(vector_hand)-mat(vector_eye))))
# 特征归一化
if len(max_num)>=3:
featur1=featur1/max_num[0]
featur2=featur2/max_num[1]
featur3=featur3/max_num[2]
featur4=featur4/max_num[3]
featur5=featur5/max_num[4]
feature.append((featur1,featur2,featur3,featur4,featur5))
# print("欧氏距离:{}".format(featur1),"标签:{}".format(label))
# print("指尖中心点到内眼角的欧氏距离:{}".format(featur2),"标签:{}".format(label))
# print("指尖与眼睛的y轴距离:{}".format(featur3),"标签:{}".format(label))
# print ('向量欧氏距离:',featur4,"标签:{}".format(label))
# print("向量曼哈顿距离:",featur5,"标签:{}".format(label))
return feature
# 3.坐标向量
def vector(data,key_point):
vector=[]
for idx in key_point:
x=data[idx][0]
y=data[idx][1]
vector.append((x,y))
return vector
# 2.计算中心坐标点
def center_point(data, key_point):
# 计算手尖坐标
sum_x=0
sum_y=0
n=0
for idx in key_point:
x=data[idx][0]
y=data[idx][1]
# print(x,y)
if x!=0:
sum_x+=x
sum_y+=y
n+=1
if n!=0:
avg_x=int(sum_x/n)
avg_y=int(sum_y/n)
else:
avg_x=avg_y=0
point=(avg_x,avg_y)
return np.array(point),n
def face_detect(pic):
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')
img = np.copy(pic)
# img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 人脸数rects
rects = detector(img, 1)
if len(rects)!=1:
print("face detection fail!")
exit()
landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
points=[]
for idx, point in enumerate(landmarks):
x=point[0, 0]
y=point[0, 1]
points.append([x,y])
# # 画图和点
# cv2.circle(img, (x,y), 9, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
# cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2,cv2.LINE_AA)
return img, np.array(points)
def hand_detect(pic, label, img_face):
protoFile = "class/hand/pose_deploy.prototxt"
weightsFile = "class/hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
img_hand=np.copy(img_face) #用来画手的坐标点
frame = np.copy(pic) #用来做手部检测
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth / frameHeight
threshold = 0.1
t = time.time()
# input image dimensions for the network
inHeight = 368
inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
print("time taken by network : {:.3f}".format(time.time() - t))
# Empty list to store the detected keypoints
points = []
for i in range(nPoints):
# 对应身体部位的置信度图
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) #找最大值及其下标(即置信度最大的点)
# 获取坐标点
x,y=(int(point[0]), int(point[1]))
points.append([x,y])
# # 画图和点
# cv2.circle(img_hand, (x,y), 9, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
# cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 2,cv2.LINE_AA)
return img_hand,np.array(points)
path = "data/test2/"
save_path = "data/0/"
svm_model="data/train/hand_class.xml"
for root,dirs,files in os.walk(path):
for file in files:
if file.endswith('jpg'):
file_name=root+"/"+file
label=root.split('/')[-1]
# print(file_name,label)
#
#
# 获取坐标点
pic=cv2.imread(file_name)
img_face, point_face=face_detect(pic) #人脸检测
img_hand, point_hand=hand_detect(pic, label, img_face) #手部检测
# print(point_face.shape, point_hand.shape)
# cv2.imwrite(save_path+label+"/"+file,img_hand)
# 计算特征
max_num=[] #特征归一化,最大值
features=feature(point_hand, point_face, max_num)
features=mat(features)
features=np.array(features, dtype='float32')
print("特征:",features)
# 创建分类器
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC) # SVM类型
svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
svm.setC(1e-5)
# 加载模型、测试
svm = cv2.ml.SVM_load(svm_model)
(ret, res) = svm.predict(features)
print("预测结果:",res)