参考时空图卷积,加上了空间信息,把二维的坐标点加上时间帧的维度,形成三维的矩阵数据。
方法2:st-gcn、hcn、2s-agcn
数据集:20人,7k+段视频
准确度:0.95
训练技巧:
1.自动清洗数据,判断人脸检测是否变形,判断一段视频的有效帧是否足够
2.尝试了1s/2s/4s视频进行预测,最终选择2s 64帧作为一条数据,有15帧有效则数据可用,否则是无效数据
3.数据预处理,把一段视频的所有帧,脸部和手部的检测点转化成3维数据(2d坐标+置信度),等于加上时间信息
骨骼图:
试验方法:
数据处理步骤:
视频分段->计算特征->数据清洗->坐标归一化
1.视频分段
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
def cut(file_name,save_name):
cap = cv2.VideoCapture(file_name)
if cap.isOpened():
rate = cap.get(5) # 帧速率
FrameNumber = cap.get(7) # 总帧数
duration = int(FrameNumber/rate) # 总帧数/帧速率=时间
hasFrame, frame = cap.read()
w = frame.shape[1]
h = frame.shape[0]
root=save_name.split(".")[0]
nums=int(FrameNumber/64)
for i in range(nums):
videoWriter =cv2.VideoWriter(root+'_'+str(i)+'.mp4',cv2.VideoWriter_fourcc(*"mp4v"),30,(h, w))
#开始进入循环取帧
n=0
while (hasFrame):
n += 1
if n<=64:
hasFrame, frame = cap.read() #读取视频帧
if hasFrame:
frame=np.rot90(frame)
# frame=np.rot90(frame)
# frame=np.rot90(frame)
videoWriter.write(frame)
else:
break
cap.release()
path="E:/data/src_vedio/6/img3/"
for file in os .listdir(path):
name=file.split('.')[0]
save_root="E:/data/makeup_vedio/class2_2s/6/class3/"+str(name)+"/"
if os.path.exists(save_root) == False:
os.makedirs(save_root)
save_name=save_root+file
file_name=path+file
cut(file_name,save_name)
print(save_root)
2.计算特征(主要获取关键点的坐标)
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
def crop(img, point_face):
top=point_face[24][1]
mins=top-200
if mins<=0:
mins=0
h=img.shape[0]
w=img.shape[1]
img2=img[mins:h,0:w]
return img2
def face_detect(pic):
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')
img = np.copy(pic)
# img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 人脸数rects
rects = detector(img, 1)
if len(rects)!=1:
# print(len(rects),"face detection fail!")
return -1,-1
landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
point_face=[]
for idx, point in enumerate(landmarks):
x=point[0, 0]
y=point[0, 1]
point_face.append((x,y))
# 画图和点
cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)
return img, point_face
def hand_detect(pic, img_face):
protoFile = "class/hand/pose_deploy.prototxt"
weightsFile = "class/hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
img_hand=np.copy(img_face) #用来画手的坐标点
frame = np.copy(pic) #用来做手部检测
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth / frameHeight
threshold = 0.1
t = time.time()
# input image dimensions for the network
inHeight = 368
inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
# print("time taken by network : {:.3f}".format(time.time() - t))
# Empty list to store the detected keypoints
point_hand = []
for i in range(nPoints):
# 对应身体部位的置信度图
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) #找最大值及其下标(即置信度最大的点)
# 获取坐标点
x,y=(int(point[0]), int(point[1]))
point_hand.append((x,y,prob))
# 画图和点
cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
return img_hand,point_hand
def get_feature(point_face, point_hand,img):
circle_center=point_face[30]
# 1.只取15个关键点
# face_key_point=[0,16,2,14,48,54,39,42,27,30,57]
# hand_key_point=[8,12,16,20,6,10,14,18,5,9,13,17]
# face_key_point=[36,45,2,14,39,42,30,57]
face_key_point=[36,45,2,14,39,42,30,57]
hand_key_point=[8,12,16,6,10,14,9]
for i in face_key_point:
x=point_face[i][0]-circle_center[0]
y=point_face[i][1]-circle_center[1]
confidence=1
f1.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
# 画图和点
cv2.circle(img, (point_face[i][0],point_face[i][1]), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(i), (point_face[i][0],point_face[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
for i in hand_key_point:
x=point_hand[i][0]-circle_center[0]
y=point_hand[i][1]-circle_center[1]
confidence=point_hand[i][2]
f1.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
# 画图和点
cv2.circle(img, (point_hand[i][0],point_hand[i][1]), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(i), (point_hand[i][0],point_hand[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
f1.write('\n')
return img
# # 2.取所有点
# face_len=len(point_face)
# hand_len=len(point_hand)
# for i in range(face_len):
# x=point_face[i][0]-circle_center[0]
# y=point_face[i][1]-circle_center[1]
# confidence=1
# f2.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
# for i in range(hand_len):
# x=point_hand[i][0]-circle_center[0]
# y=point_hand[i][1]-circle_center[1]
# confidence=point_hand[i][2]
# f2.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
# f2.write('\n')
def get_feature2(point_face, point_hand):
circle_center=point_face[30]
# 2.取所有点
face_len=len(point_face)
hand_len=len(point_hand)
for i in range(face_len):
x=point_face[i][0]-circle_center[0]
y=point_face[i][1]-circle_center[1]
confidence=1
f2.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
for i in range(hand_len):
x=point_hand[i][0]-circle_center[0]
y=point_hand[i][1]-circle_center[1]
confidence=point_hand[i][2]
f2.write(str(x)+' '+str(y)+' '+str(confidence)+' ')
f2.write('\n')
def get_feature3(point_face, point_hand):
circle_center=point_face[30]
# 2.取所有点
face_len=len(point_face)
hand_len=len(point_hand)
for i in range(face_len):
x=point_face[i][0]
y=point_face[i][1]
confidence=1
f3.write(str(x)+','+str(y)+','+str(confidence)+' ')
f3.write('\n')
for i in range(hand_len):
x=point_hand[i][0]-circle_center[0]
y=point_hand[i][1]-circle_center[1]
confidence=point_hand[i][2]
f4.write(str(x)+','+str(y)+','+str(confidence)+' ')
f4.write('\n')
def main(file_root, v_id, a_id, label):
video_path = file_root
cap = cv2.VideoCapture(video_path)
hasFrame, frame = cap.read()
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
save_path = "E:/data/makeup_vedio/class2_2s/gg/"
#开始进入循环取帧
n=0
while (hasFrame):
n += 1
hasFrame, frame = cap.read() #读取视频帧
if hasFrame==True:
h,w,_=frame.shape
frame=cv2.resize(frame,(int(w*0.5),int(h*0.5)))
img_face, point_face=face_detect(frame) #人脸检测
if point_face !=-1:
img=crop(frame, point_face)
img=cv2.resize(img,(500,700))
# pic=np.copy(img_face)
img_face2, point_face2=face_detect(img)
if point_face2 !=-1:
f1.write(str(a_id)+' '+str(v_id)+' '+str(label)+' ')
f2.write(str(a_id)+' '+str(v_id)+' '+str(label)+' ')
f3.write(str(a_id)+' '+str(v_id)+' '+str(label)+' ')
f4.write(str(a_id)+' '+str(v_id)+' '+str(label)+' ')
img_hand, point_hand=hand_detect(img, img_face2) #手部检测
gg=get_feature(point_face2, point_hand,img)
get_feature2(point_face2, point_hand)
get_feature3(point_face2, point_hand)
name=file_root.split('/')[-1].split('.')[0]
# cv2.imwrite(save_path+name+'-'+str(n)+'.jpg',gg)
cap.release()
path="E:/data/makeup_vedio/class2_2s/6/6.1/"
txt_path="E:/data/makeup_vedio/class2_2s/6/6.1/"
f1=open(txt_path+"data_15.txt","w+")
f2=open(txt_path+"data_all.txt","w+")
f3=open(txt_path+"face.txt","w+")
f4=open(txt_path+"hand.txt","w+")
n=0
for root,dirs,files in os.walk(path):
for file in files: # 遍历文件
if file.endswith('mp4'):
label=root.split("/")[-1]
file_root=root+'/'+file
n+=1
star=time.time()
name=file.split(".")[0]
a_id=name.split("_")[0]
v_id=name.split("_")[1]
# print("vid:",v_id, "a_id:",a_id, "label:",label)
main(file_root, v_id, a_id, label)
end=time.time()-star
print(n,"---time:",end)
3.数据清洗v1
import shutil
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
# 清洗规则:
# 1.手部特征点置信度>=0.1, 7个点要超过4个;
# 2.人脸检测30帧通过15帧以上。
def count_label(label_arr):
label_0=0
label_1=0
label_3=0
for label in label_arr:
if label==0:
label_0+=1
elif label==1:
label_1+=1
elif label==3:
label_3+=1
return (label_0,label_1,label_3)
def count_pass(point,label):
n=0
points=point[8:]
for data in points:
confidence=float(data[2])
# print(confidence)
if confidence>=0.1:
n+=1
# 类别3的数据不做判断,直接通过
if label == 3:
n=7
# 判断人脸,若人脸检测变形,则不通过
w=int(point[3][0])-int(point[2][0]) # 14-2
if w<=50:
n=0
return n
def clear(txt1,txt2):
f1 = open(txt1, "r")
f2 = open(txt2, "w+")
lines1=f1.readlines()
data_nums=len(lines1)
total1=0
total2=0
total3=0
label1=[]
label2=[]
data=[]
data2=[]
new_vid=lines1[0].strip().split(" ")[:3] # 视频标签
for i in range(data_nums):
line1=lines1[i].strip().split(" ")
label=int(lines1[i-1].strip().split(" ")[2]) # 类别标签
now_vid=line1[:3] # 视频标签
point=line1[3:] #坐标点数据
point = np.reshape(np.asarray(point), (-1,3))
pass_nums=count_pass(point,label) #判断有多少点通过了阈值
# print(pass_nums)
# 标签相同则写进去
if new_vid==now_vid:
if pass_nums>=4:
data.append(lines1[i])
data2.append(lines1[i])
# 标签不同的时候,判断data的数据,超过15条则通过,写入新txt
else:
total1+=1
label1.append(label)
nums=len(data)
if nums>=15:
for feature in data2:
f2.write(feature)
total2+=1
label2.append(label)
nums2=len(data2)
if nums2>=15:
total3+=1
# 重置data,并判断新视频的第一条数据,是否写入data
data=[]
if pass_nums>=4:
data.append(lines1[i])
data2=[]
data2.append(lines1[i])
new_vid=now_vid
f2.close()
f3 = open(txt2, "r")
new_nums=len(f3.readlines())
label_old=count_label(label1)
label_new=count_label(label2)
return data_nums,new_nums,total1,total2,total3,label_old,label_new
txt1="txt2/src/data_15.txt"
txt2="txt2/clear/data_15.txt"
data_nums, new_nums, total1, total2, total3, label_old, label_new=clear(txt1,txt2)
print("数据总量:",data_nums,new_nums)
print("视频数量:",total1,total2)
print("人脸检测问题减少:",total1-total3)
print("手部检测问题减少:",(total1-total2)-(total1-total3))
print("清洗前三类数据:",label_old)
print("清洗后三类数据:",label_new)
4.提取骨骼特征+关节特征(双流法2s-agcn时使用)
import shutil
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
# 获取原来的15个坐标点,不要做相对坐标
def gen_joint(txt1,txt2,txt3):
face_key_point=[36,45,2,14,39,42,30,57]
hand_key_point=[8,12,16,6,10,14,9]
# 合并txt
f1 = open(txt1, "r")
f2 = open(txt2, "r")
f3 = open(txt3, "w+")
lines1=f1.readlines()
lines2=f2.readlines()
nums=len(lines1)
print(nums)
for i in range(nums):
line1=lines1[i].strip().split(" ")
line2=lines2[i].strip().split(" ")
# print(len(line1),len(line2))
if line1[:3]==line2[:3]:
data_face=line1[3:]
data_hand=line2[3:]
label=line1[:3]
center_point=data_face[30].split(',')
txt=""
for a in label:
txt+=str(a)+" "
# 脸部关键点
for index in face_key_point:
x=data_face[index].split(',')[0]
y=data_face[index].split(',')[1]
z=data_face[index].split(',')[2]
txt+=str(x)+" "+str(y)+" "+str(z)+" "
# 手部关键点
for index in hand_key_point:
x=int(data_hand[index].split(',')[0])+int(center_point[0])
y=int(data_hand[index].split(',')[1])+int(center_point[1])
z=data_hand[index].split(',')[2]
txt+=str(x)+" "+str(y)+" "+str(z)+" "
# print(x,y,z)
txt+="\n"
f3.write(txt)
f1.close()
f2.close()
f3.close()
# 获取骨骼数据
def gen_bone(txt1,txt2,txt4):
face_key_point=[36,45,2,14,39,42,30,57]
hand_key_point=[8,12,16,6,10,14,9]
point_bone=[(39, 30), (36, 39), (42, 30), (45, 42), (2, 30), (14, 30), (57, 30), (30, 30), # face
(9, 30), (14, 9), (16, 14), (10, 9), (12, 10), (6, 9), (8, 6)] # hand
# 合并txt
f1 = open(txt1, "r")
f2 = open(txt2, "r")
f4 = open(txt4, "w+")
lines1=f1.readlines()
lines2=f2.readlines()
nums=len(lines1)
print(nums)
for i in range(nums):
line1=lines1[i].strip().split(" ")
line2=lines2[i].strip().split(" ")
if line1[:3]==line2[:3]:
data_face=line1[3:]
data_hand=line2[3:]
label=line1[:3]
center_point=data_face[30].split(',')
txt=""
for a in label:
txt+=str(a)+" "
for index in point_bone:
index1=index[0] # 一根骨骼中的远中心点
index2=index[1] # 一根骨骼中的近中心点
# 判断关键点是脸部还是手部
if index1 in face_key_point:
x1=data_face[index1].split(",")[0]
y1=data_face[index1].split(",")[1]
z1=data_face[index1].split(",")[2]
else:
x1=int(data_hand[index1].split(",")[0])+int(center_point[0])
y1=int(data_hand[index1].split(",")[1])+int(center_point[1])
z1=float(data_hand[index1].split(",")[2])
if index2 in face_key_point:
x2=data_face[index2].split(",")[0]
y2=data_face[index2].split(",")[1]
z2=data_face[index2].split(",")[2]
else:
x2=int(data_hand[index2].split(",")[0])+int(center_point[0])
y2=int(data_hand[index2].split(",")[1])+int(center_point[1])
z2=float(data_hand[index2].split(",")[2])
# 获取骨骼向量:远点-近点
x=int(x1)-int(x2)
y=int(y1)-int(y2)
z=float(z1)-float(z2)
txt+=str(x)+" "+str(y)+" "+str(z)+" "
txt+="\n"
f4.write(txt)
f1.close()
f2.close()
f4.close()
txt1="txt2/src/face.txt"
txt2="txt2/src/hand.txt"
txt3="txt2/src/data_joint.txt"
txt4="txt2/src/data_bone.txt"
gen_joint(txt1,txt2,txt3)
gen_bone(txt1,txt2,txt4)
f4 = open(txt4, "r")
lines3=f4.readlines()
print(len(lines3),len(lines3[0].strip().split(" ")))
5.数据清洗(双流法2s-agcn时使用)
import shutil
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
# 清洗规则:
# 1.手部特征点置信度>=0.1, 7个点要超过4个;
# 2.人脸检测30帧通过15帧以上。
def count_label(label_arr):
label_0=0
label_1=0
label_3=0
for label in label_arr:
if label==0:
label_0+=1
elif label==1:
label_1+=1
elif label==3:
label_3+=1
return (label_0,label_1,label_3)
def count_pass(point,label):
n=0
points=point[8:]
for data in points:
confidence=float(data[2])
# print(confidence)
if confidence>=0.1:
n+=1
# 类别3的数据不做判断,直接通过
if label == 3:
n=7
# 判断人脸,若人脸检测变形,则不通过
w=int(point[3][0])-int(point[2][0]) # 14-2
if w<=50:
n=0
return n
def clear(txt1_1, txt1_2, txt2):
f1_1 = open(txt1_1, "r")
f1_2 = open(txt1_2, "r")
f2 = open(txt2, "w+")
lines1=f1_1.readlines()
lines2=f1_2.readlines()
data_nums=len(lines1)
total1=0
total2=0
total3=0
label1=[]
label2=[]
data=[]
data2=[]
new_vid=lines1[0].strip().split(" ")[:3] # 视频标签
for i in range(data_nums):
line1=lines1[i].strip().split(" ")
line2=lines2[i].strip().split(" ")
label=int(lines1[i-1].strip().split(" ")[2]) # 类别标签
now_vid=line1[:3] # 视频标签
point=line1[3:] #坐标点数据
point = np.reshape(np.asarray(point), (-1,3))
pass_nums=count_pass(point,label) #判断有多少点通过了阈值
# print(pass_nums)
# 标签相同则写进去
if new_vid==now_vid:
if pass_nums>=4:
data.append(lines2[i])
data2.append(lines2[i])
# 标签不同的时候,判断data的数据,超过15条则通过,写入新txt
else:
total1+=1
label1.append(label)
nums=len(data)
if nums>=15:
for feature in data:
f2.write(feature)
total2+=1
label2.append(label)
nums2=len(data2)
if nums2>=15:
total3+=1
# 重置data,并判断新视频的第一条数据,是否写入data
data=[]
if pass_nums>=4:
data.append(lines2[i])
data2=[]
data2.append(lines2[i])
new_vid=now_vid
f2.close()
f3 = open(txt2, "r")
new_nums=len(f3.readlines())
label_old=count_label(label1)
label_new=count_label(label2)
return data_nums,new_nums,total1,total2,total3,label_old,label_new
txt1_1="txt2/src/data_joint.txt" #对标
txt1_2="txt2/src/data_joint.txt" #清洗
txt2="txt2/clear/data_joint.txt" #生成
data_nums, new_nums, total1, total2, total3, label_old, label_new=clear(txt1_1, txt1_2, txt2)
print("数据总量:",data_nums,new_nums)
print("视频数量:",total1,total2)
print("人脸检测问题减少:",total1-total3)
print("手部检测问题减少:",(total1-total2)-(total1-total3))
print("清洗前三类数据:",label_old)
print("清洗后三类数据:",label_new)
6.坐标归一化
import shutil
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
def face_normal(txt1,txt2,txt3):
# 合并txt
f1 = open(txt1, "r")
f2 = open(txt2, "r")
f3 = open(txt3, "w+")
lines1=f1.readlines()
lines2=f2.readlines()
nums=len(lines1)
print(nums)
base_w=350
base_h=500
for i in range(nums):
# w:26-17 h:57-27 ps:点前面还有三个数,所有+3
line1=lines1[i].strip().split(" ")
w=int(line1[15+3].split(',')[0])-int(line1[2+3].split(',')[0]) # 人脸宽度
h=int(line1[8+3].split(',')[1])-int(line1[19+3].split(',')[1]) # 人脸高度
line2=lines2[i].strip().split(" ")
if line1[:3]==line2[:3]:
data=line2[:3]
point=line2[3:]
point = np.reshape(np.asarray(point), (-1,3))
len_line2=len(point)
# print(point)
for a in range(len_line2):
# # 归一化方法_v1 new=old/人脸
# x=int(point[a][0])/w
# y=int(point[a][1])/h
# 归一化方法_v2 new=old*(标准/人脸)
x=int(point[a][0])*(base_w/w)
y=int(point[a][1])*(base_h/h)
data.append(int(x))
data.append(int(y))
data.append(point[a][2])
txt=""
for b in range(len(data)):
txt+=str(data[b])+" "
txt+="\n"
f3.write(txt)
f3.close()
txt1="txt/2s/src/9/face.txt"
txt2="txt/2s/src/9/data_15.txt"
txt3="txt/2s/src/9/data_15_v2.txt"
face_normal(txt1,txt2,txt3)
f4 = open(txt3, "r")
lines3=f4.readlines()
print(len(lines3))