1.前言
本周做了一些关于音视频方面的算法,其中需要用dlib中的shape_predictor_68_face_landmarks来提取人脸唇部的一些信息,其实shape_predictor_68_face_landmarks提取效率较低,经常会出现检测不到人脸的状况和检测效果较差,我们加了一些自己的实际trick来改善提取效果。
2.算法
videoCapture = cv2.VideoCapture('/data/voice1/yanglin/MISP_Data_Task1/MISP2021_AVWWS/negative/video/train/middle/R11_S189190191192_C0009_I1_Middle_189_573575_577529.mp4')
#获得码率及尺寸
fps = videoCapture.get(cv2.CAP_PROP_FPS)
size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
fNUMS = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
print(fps,size,fNUMS)
success, frame = videoCapture.read()
print(success,frame.shape)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi',fourcc, 25.0, (96,96))
先要读取识别的视频,并且生成一个和读取视频配置一样的视频文件
def detector_mouth(frame,maxX_frotframe,minX_frotframe,maxY_frotframe,minY_frotframe):
#################人脸检测 唇部提取
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("/data/nfs_rt16/luyuan/code/AV-KWS/pre_face/shape_predictor_68_face_landmarks.dat")
img_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
xxList = []
yyList = []
maxX = 0
minX = 0
maxY = 0
minY = 0
# 人脸数rects
rects = detector(img_gray, 1)
if len(rects) != 1:
maxX = maxX_frotframe
minX = minX_frotframe
maxY = maxY_frotframe
minY = minY_frotframe
else:
for i in range(len(rects)):
landmarks = np.matrix([[p.x, p.y] for p in predictor(frame, rects[i]).parts()])
print(landmarks.shape)
for idx, point in enumerate(landmarks):
if idx > 49:
pos = (point[0, 0], point[0, 1])
x = point[0, 0],
y = point[0, 1]
xList = []
yList = []
xList.append(x) # 添加数据
yList.append(y)
for k in xList:
list(k)
xxList.append(k[0])
maxX = max(xxList)
minX = min(xxList)
for p in yList:
# list(p)
yyList.append(p)
maxY = max(yyList)
minY = min(yyList)
#cv2.circle(frame, pos, 2, color=(0, 255, 0))
font = cv2.FONT_HERSHEY_SIMPLEX
if maxX_frotframe == 0:
maxX = maxX
minX = minX
maxY = maxY
minY = minY
else:
maxX = int(0.9*maxX+0.1*maxX_frotframe)
minX = int(0.9*minX+0.1*minX_frotframe)
maxY = int(0.9*maxY+0.1*maxY_frotframe)
minY = int(0.9*minY+0.1*minY_frotframe)
X = 96
Y = 96
X = X - int(maxX-minX)
X1 = int(X/2)
X2 = X-X1
Y = Y - int(maxY-minY)
Y1 = 50
Y2 = Y - Y1
print(minX-X1, maxY+Y2 ,maxX+X2, minY-Y1)
#cv2.rectangle(frame, (minX-X1, maxY+Y2), (maxX+X2, minY-Y1), (255, 255, 255), 1) # 闭合矩形
# 记录每次开始写入人脸像素的宽度位置
img_blank = np.zeros((96, 96, 3), np.uint8)
# 将人脸填充到img_blank
# 填充
for i in range(96):
for j in range(96):
img_blank[i][j] = frame[int(minY-Y1)+i][int(minX-X1)+j]
#print(frame[minX-X1+i][minY-Y1+j])
# 调整图像
#print(maxY, minY)
return img_blank, maxX, minX, maxY, minY
我们首先采用shape_predictor_68_face_landmarks检测人脸68个点,其中大于49的是唇部,提取唇部对角线的两个点,基于这两个点定位大小为96*96的框,但是我们认为上一帧和这一帧是有关系的,加了个权重。最后描点,完成输出。