参考:
由6,14以及68点人脸关键点计算头部姿态
dlib https://www.zhihu.com/question/34524316
https://blog.csdn.net/ChuiGeDaQiQiu/article/details/88623267
3D数据
https://github.com/yinguobing/head-pose-estimation
https://www.learnopencv.com/facial-landmark-detection/
重磅!头部姿态估计「原理详解 + 实战代码」来啦!:
https://zhuanlan.zhihu.com/p/51208197
使用opencv和dlib进行人脸姿态估计(python)
https://blog.csdn.net/yuanlulu/article/details/82763170
head-pose-estimation--提供3D模型
https://github.com/YadiraF/face3d
https://github.com/lincolnhard/head-pose-estimation
其中关于可视化的一些问题:参考:https://github.com/lincolnhard/head-pose-estimation
比如
还有这种:
最近在弄代码:
PFLD-pytorch-github
使用数据集WFLW下载地址:
https://wywu.github.io/projects/LAB/WFLW.html
我刚开始不知道有官方的,自己写了一个,然后发现,有的landmark比框大,使用最下外接矩形改了一下,先上
原始的:
import os
import cv2
import numpy as np
# self.img = cv2.imread(self.line[0])
# self.landmark = np.asarray(self.line[1:197], dtype=np.float32)
# self.attribute = np.asarray(self.line[197:203], dtype=np.int32)
# self.euler_angle = np.asarray(self.line[203:206], dtype=np.float32)
def groberect(points, ww, hh):
x1 = points[0]
y1 = points[1]
x2 = points[2]
y2 = points[3]
w = x2 - x1 + 1
h = y2 - y1 + 1
px = float(x1 + x2) / 2
py = float(y1 + y2) / 2
w = w * 1.3
h = h * 1.3
l = max(0, px - w / 2)
r = min(ww - 1, px + w / 2)
t = max(0, py - h / 2)
b = min(hh - 1, py + h / 2)
# x1 y1 x2 y2
return [int(l), int(t), int(r), int(b)]
file_ynh = open("./WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt",'r')
lines = file_ynh.readlines()
count = len(lines)
num = 1
for line in lines:
print(num, "/", count)
num += 1
line = line.strip().split()
landmark = line[0:196]
detection = line[196:200]
attributes = line[200:206]
name = line[206:207]
img = cv2.imread("./WFLW_images/" + str(name[0]))
if img is None:
exit()
continue
h,w = img.shape[0:2]
# cv2.rectangle(img, (int(detection[0]),int(detection[1])), (int(detection[2]),int(detection[3])), (0, 255, 0), 2, 8)
# for index in range(0, len(landmark), 2):
# cv2.circle(img, (int(float(landmark[index])), int(float(landmark[index+1]))), 1, (255, 0, 0), -1)
# cv2.imshow("img.jpg", img)
# cv2.waitKey(0)
detection = list(map(int, detection))
rect = groberect(detection, w, h)
rectimg = img[rect[1]:rect[3],rect[0]:rect[2],:]
landmark = list(map(float, landmark))
for index in range(0, len(landmark), 2):
if( (landmark[index]-rect[0])<0 or (landmark[index+1]-rect[1])<0 ):
print("特征点超出扩展框,应该改变策略,使用特征点和原始框的最小外接矩形做扩展框")
print(landmark[index]-rect[0],"\n")
print(landmark[index+1]-rect[1], "\n")
cv2.circle(rectimg, (int(landmark[index]-rect[0]), int(landmark[index+1]-rect[1])), 1, (255, 0, 0), -1)
cv2.imwrite("./result/"+"img_%s.jpg"%(str(num)), rectimg)
# cv2.imshow("rectimg.jpg", rectimg)
# cv2.waitKey(0)
file_ynh.close()
写完自己的代码才发现,官方有,我,,,,,,内心羊驼在奔跑。。。。。。。。官方的放在了最后
最小外接矩形改进:
import os
import cv2
import numpy as np
# self.img = cv2.imread(self.line[0])
# self.landmark = np.asarray(self.line[1:197], dtype=np.float32)
# self.attribute = np.asarray(self.line[197:203], dtype=np.int32)
# self.euler_angle = np.asarray(self.line[203:206], dtype=np.float32)
def groberect(points, ww, hh):
x1 = points[0]
y1 = points[1]
x2 = points[2]
y2 = points[3]
w = x2 - x1 + 1
h = y2 - y1 + 1
px = float(x1 + x2) / 2
py = float(y1 + y2) / 2
w = w * 1.2
h = h * 1.2
l = max(0, px - w / 2)
r = min(ww - 1, px + w / 2)
t = max(0, py - h / 2)
b = min(hh - 1, py + h / 2)
# x1 y1 x2 y2
return [int(l), int(t), int(r), int(b)]
file_ynh = open("./WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt",'r')
lines = file_ynh.readlines()
count = len(lines)
num = 1
for line in lines:
print(num, "/", count)
num += 1
line = line.strip().split()
landmark = line[0:196]
detection = line[196:200]
attributes = line[200:206]
name = line[206:207]
img = cv2.imread("./WFLW_images/" + str(name[0]))
if img is None:
exit()
continue
h,w = img.shape[0:2]
# cv2.rectangle(img, (int(detection[0]),int(detection[1])), (int(detection[2]),int(detection[3])), (0, 255, 0), 2, 8)
# for index in range(0, len(landmark), 2):
# cv2.circle(img, (int(float(landmark[index])), int(float(landmark[index+1]))), 1, (255, 0, 0), -1)
# cv2.imshow("img.jpg", img)
# cv2.waitKey(0)
landmark = list(map(float, landmark))
new_landmark = []
for index in range(0, len(landmark), 2):
new_landmark.append([landmark[index], landmark[index+1]])
landmark_array = np.asarray(new_landmark)
xmax, ymax = landmark_array.max(axis=0)
xmin, ymin = landmark_array.min(axis=0)
#坐标中有负数,为了crop,应该判断
xmin = xmin if xmin > 0 else 0
ymin = ymin if ymin > 0 else 0
xmax = xmax if xmax < w-1 else w-1
ymax = ymax if ymax < h-1 else h-1
detection = list(map(int, detection))
new_detection = []
new_detection.append(detection[0] if detection[0] < int(xmin+0.5) else int(xmin+0.5))
new_detection.append(detection[1] if detection[1] < int(ymin+0.5) else int(ymin+0.5))
new_detection.append(detection[2] if detection[2] > int(xmax+0.5) else int(xmax+0.5))
new_detection.append(detection[3] if detection[3] > int(ymax+0.5) else int(ymax+0.5))
#new_detection = [detection[x] if detection[x] > landList[x] else landList[x] for x in range(len(detection))]
rect = groberect(new_detection, w, h)
rectimg = img[rect[1]:rect[3],rect[0]:rect[2],:]
for index in range(0, len(landmark), 2):
x,y = landmark[index], landmark[index+1]
x = x if x > 0 else 0
y = y if y > 0 else 0
x = x if x < w - 1 else w - 1
y = y if y < h - 1 else h - 1
if( (x-rect[0])<0 or (y-rect[1])<0 ):
print("特征点超出扩展框,应该改变策略,使用特征点和原始框的最小外接矩形做扩展框")
print(x-rect[0],"\n")
print(y-rect[1], "\n")
cv2.circle(rectimg, (int(x-rect[0]), int(y-rect[1])), 1, (255, 0, 0), -1)
cv2.imwrite("./result/"+"img_%s.jpg"%(str(num)), rectimg)
# cv2.imshow("rectimg.jpg", rectimg)
# cv2.waitKey(0)
file_ynh.close()
有小伙伴私信问了官方位置,,我是在:
https://github.com/mpatacchiola/deepgaze/blob/master/deepgaze/head_pose_estimation.py
看到的,很多代码都使用该处理文件,所以视为官方了
官方处理代码:
#-*- coding: utf-8 -*-
import os
import numpy as np
import cv2
import shutil
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
#from pfld.utils import calculate_pitch_yaw_roll
debug = False
import math
def calculate_pitch_yaw_roll(landmarks_2D, cam_w=256, cam_h=256,
radians=False):
""" Return the the pitch yaw and roll angles associated with the input image.
@param radians When True it returns the angle in radians, otherwise in degrees.
"""
assert landmarks_2D is not None, 'landmarks_2D is None'
# Estimated camera matrix values.
c_x = cam_w / 2
c_y = cam_h / 2
f_x = c_x / np.tan(60 / 2 * np.pi / 180)
f_y = f_x
camera_matrix = np.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
[0.0, 0.0, 1.0]])
camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])
# dlib (68 landmark) trached points
# TRACKED_POINTS = [17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8]
# wflw(98 landmark) trached points
# TRACKED_POINTS = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
# X-Y-Z with X pointing forward and Y on the left and Z up.
# The X-Y-Z coordinates used are like the standard coordinates of ROS (robotic operative system)
# OpenCV uses the reference usually used in computer vision:
# X points to the right, Y down, Z to the front
landmarks_3D = np.float32([
[6.825897, 6.760612, 4.402142], # LEFT_EYEBROW_LEFT,
[1.330353, 7.122144, 6.903745], # LEFT_EYEBROW_RIGHT,
[-1.330353, 7.122144, 6.903745], # RIGHT_EYEBROW_LEFT,
[-6.825897, 6.760612, 4.402142], # RIGHT_EYEBROW_RIGHT,
[5.311432, 5.485328, 3.987654], # LEFT_EYE_LEFT,
[1.789930, 5.393625, 4.413414], # LEFT_EYE_RIGHT,
[-1.789930, 5.393625, 4.413414], # RIGHT_EYE_LEFT,
[-5.311432, 5.485328, 3.987654], # RIGHT_EYE_RIGHT,
[-2.005628, 1.409845, 6.165652], # NOSE_LEFT,
[-2.005628, 1.409845, 6.165652], # NOSE_RIGHT,
[2.774015, -2.080775, 5.048531], # MOUTH_LEFT,
[-2.774015, -2.080775, 5.048531], # MOUTH_RIGHT,
[0.000000, -3.116408, 6.097667], # LOWER_LIP,
[0.000000, -7.415691, 4.070434], # CHIN
])
landmarks_2D = np.asarray(landmarks_2D, dtype=np.float32).reshape(-1, 2)
# Applying the PnP solver to find the 3D pose of the head from the 2D position of the landmarks.
# retval - bool
# rvec - Output rotation vector that, together with tvec, brings points from the world coordinate system to the camera coordinate system.
# tvec - Output translation vector. It is the position of the world origin (SELLION) in camera co-ords
_, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
camera_matrix, camera_distortion)
# Get as input the rotational vector, Return a rotational matrix
# const double PI = 3.141592653;
# double thetaz = atan2(r21, r11) / PI * 180;
# double thetay = atan2(-1 * r31, sqrt(r32*r32 + r33*r33)) / PI * 180;
# double thetax = atan2(r32, r33) / PI * 180;
rmat, _ = cv2.Rodrigues(rvec)
pose_mat = cv2.hconcat((rmat, tvec))
_, _, _, _, _, _, euler_angles = cv2.decomposeProjectionMatrix(pose_mat)
return map(lambda k: k[0], euler_angles) # euler_angles contain (pitch, yaw, roll)
def rotate(angle, center, landmark):
rad = angle * np.pi / 180.0
alpha = np.cos(rad)
beta = np.sin(rad)
M = np.zeros((2,3), dtype=np.float32)
M[0, 0] = alpha
M[0, 1] = beta
M[0, 2] = (1-alpha)*center[0] - beta*center[1]
M[1, 0] = -beta
M[1, 1] = alpha
M[1, 2] = beta*center[0] + (1-alpha)*center[1]
landmark_ = np.asarray([(M[0,0]*x+M[0,1]*y+M[0,2],
M[1,0]*x+M[1,1]*y+M[1,2]) for (x,y) in landmark])
return M, landmark_
class ImageDate():
def __init__(self, line, imgDir, image_size=112):
self.image_size = image_size
line = line.strip().split()
#0-195: landmark 坐标点 196-199: bbox 坐标点;
#200: 姿态(pose) 0->正常姿态(normal pose) 1->大的姿态(large pose)
#201: 表情(expression) 0->正常表情(normal expression) 1->夸张的表情(exaggerate expression)
#202: 照度(illumination) 0->正常照明(normal illumination) 1->极端照明(extreme illumination)
#203: 化妆(make-up) 0->无化妆(no make-up) 1->化妆(make-up)
#204: 遮挡(occlusion) 0->无遮挡(no occlusion) 1->遮挡(occlusion)
#205: 模糊(blur) 0->清晰(clear) 1->模糊(blur)
#206: 图片名称
assert(len(line) == 207)
self.list = line
self.landmark = np.asarray(list(map(float, line[:196])), dtype=np.float32).reshape(-1, 2)
self.box = np.asarray(list(map(int, line[196:200])),dtype=np.int32)
flag = list(map(int, line[200:206]))
flag = list(map(bool, flag))
self.pose = flag[0]
self.expression = flag[1]
self.illumination = flag[2]
self.make_up = flag[3]
self.occlusion = flag[4]
self.blur = flag[5]
self.path = os.path.join(imgDir, line[206])
self.img = None
self.imgs = []
self.landmarks = []
self.boxes = []
def load_data(self, is_train, repeat, mirror=None):
if (mirror is not None):
with open(mirror, 'r') as f:
lines = f.readlines()
assert len(lines) == 1
mirror_idx = lines[0].strip().split(',')
mirror_idx = list(map(int, mirror_idx))
xy = np.min(self.landmark, axis=0).astype(np.int32)
zz = np.max(self.landmark, axis=0).astype(np.int32)
wh = zz - xy + 1
center = (xy + wh/2).astype(np.int32)
img = cv2.imread(self.path)
boxsize = int(np.max(wh)*1.2)
xy = center - boxsize//2
x1, y1 = xy
x2, y2 = xy + boxsize
height, width, _ = img.shape
dx = max(0, -x1)
dy = max(0, -y1)
x1 = max(0, x1)
y1 = max(0, y1)
edx = max(0, x2 - width)
edy = max(0, y2 - height)
x2 = min(width, x2)
y2 = min(height, y2)
imgT = img[y1:y2, x1:x2]
if (dx > 0 or dy > 0 or edx > 0 or edy > 0):
imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)
if imgT.shape[0] == 0 or imgT.shape[1] == 0:
imgTT = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for x, y in (self.landmark+0.5).astype(np.int32):
cv2.circle(imgTT, (x, y), 1, (0, 0, 255))
cv2.imshow('0', imgTT)
if cv2.waitKey(0) == 27:
exit()
imgT = cv2.resize(imgT, (self.image_size, self.image_size))
landmark = (self.landmark - xy)/boxsize
assert (landmark >= 0).all(), str(landmark) + str([dx, dy])
assert (landmark <= 1).all(), str(landmark) + str([dx, dy])
self.imgs.append(imgT)
self.landmarks.append(landmark)
if is_train:
while len(self.imgs) < repeat:
angle = np.random.randint(-30, 30)
cx, cy = center
cx = cx + int(np.random.randint(-boxsize*0.1, boxsize*0.1))
cy = cy + int(np.random.randint(-boxsize * 0.1, boxsize * 0.1))
M, landmark = rotate(angle, (cx,cy), self.landmark)
imgT = cv2.warpAffine(img, M, (int(img.shape[1]*1.1), int(img.shape[0]*1.1)))
wh = np.ptp(landmark, axis=0).astype(np.int32) + 1
size = np.random.randint(int(np.min(wh)), np.ceil(np.max(wh) * 1.25))
xy = np.asarray((cx - size // 2, cy - size//2), dtype=np.int32)
landmark = (landmark - xy) / size
if (landmark < 0).any() or (landmark > 1).any():
continue
x1, y1 = xy
x2, y2 = xy + size
height, width, _ = imgT.shape
dx = max(0, -x1)
dy = max(0, -y1)
x1 = max(0, x1)
y1 = max(0, y1)
edx = max(0, x2 - width)
edy = max(0, y2 - height)
x2 = min(width, x2)
y2 = min(height, y2)
imgT = imgT[y1:y2, x1:x2]
if (dx > 0 or dy > 0 or edx >0 or edy > 0):
imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)
imgT = cv2.resize(imgT, (self.image_size, self.image_size))
if mirror is not None and np.random.choice((True, False)):
landmark[:,0] = 1 - landmark[:,0]
landmark = landmark[mirror_idx]
imgT = cv2.flip(imgT, 1)
self.imgs.append(imgT)
self.landmarks.append(landmark)
def save_data(self, path, prefix):
attributes = [self.pose, self.expression, self.illumination, self.make_up, self.occlusion, self.blur]
attributes = np.asarray(attributes, dtype=np.int32)
attributes_str = ' '.join(list(map(str, attributes)))
labels = []
TRACKED_POINTS = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
for i, (img, lanmark) in enumerate(zip(self.imgs, self.landmarks)):
assert lanmark.shape == (98, 2)
save_path = os.path.join(path, prefix+'_'+str(i)+'.png')
assert not os.path.exists(save_path), save_path
cv2.imwrite(save_path, img)
euler_angles_landmark = []
for index in TRACKED_POINTS:
euler_angles_landmark.append(lanmark[index])
euler_angles_landmark = np.asarray(euler_angles_landmark).reshape((-1, 28))
pitch, yaw, roll = calculate_pitch_yaw_roll(euler_angles_landmark[0])
euler_angles = np.asarray((pitch, yaw, roll), dtype=np.float32)
euler_angles_str = ' '.join(list(map(str, euler_angles)))
landmark_str = ' '.join(list(map(str,lanmark.reshape(-1).tolist())))
label = '{} {} {} {}\n'.format(save_path, landmark_str, attributes_str, euler_angles_str)
labels.append(label)
return labels
def get_dataset_list(imgDir, outDir, landmarkDir, is_train):
with open(landmarkDir,'r') as f:
lines = f.readlines()
labels = []
save_img = os.path.join(outDir, 'imgs')
if not os.path.exists(save_img):
os.mkdir(save_img)
if debug:
lines = lines[:100]
for i, line in enumerate(lines):
Img = ImageDate(line, imgDir)
img_name = Img.path
Img.load_data(is_train, 10, Mirror_file)
_, filename = os.path.split(img_name)
filename, _ = os.path.splitext(filename)
label_txt = Img.save_data(save_img, str(i)+'_' + filename)
labels.append(label_txt)
if ((i + 1) % 100) == 0:
print('file: {}/{}'.format(i+1, len(lines)))
with open(os.path.join(outDir, 'list.txt'),'w') as f:
for label in labels:
f.writelines(label)
if __name__ == '__main__':
root_dir = os.path.dirname(os.path.realpath(__file__))
imageDirs = 'WFLW/WFLW_images'
Mirror_file = 'WFLW/WFLW_annotations/Mirror98.txt'
landmarkDirs = ['WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_test.txt',
'WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt']
outDirs = ['test_data', 'train_data']
for landmarkDir, outDir in zip(landmarkDirs, outDirs):
outDir = os.path.join(root_dir, outDir)
print(outDir)
if os.path.exists(outDir):
shutil.rmtree(outDir)
os.mkdir(outDir)
if 'list_98pt_rect_attr_test.txt' in landmarkDir:
is_train = False
else:
is_train = True
imgs = get_dataset_list(imageDirs, outDir, landmarkDir, is_train)
print('end')
另外:
博客https://blog.csdn.net/ChuiGeDaQiQiu/article/details/88623267
中使用的是通用3D模型,相机内参和畸变也用的是通用方式,效果并不好:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import os
import cv2
import sys
import numpy as np
import math
class PoseEstimator:
"""Estimate head pose according to the facial landmarks"""
def __init__(self, img_size=(480, 640)):
self.size = img_size
# 3D model points.
self.model_points_6 = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -330.0, -65.0), # Chin
(-225.0, 170.0, -135.0), # Left eye left corner
(225.0, 170.0, -135.0), # Right eye right corne
(-150.0, -150.0, -125.0), # Left Mouth corner
(150.0, -150.0, -125.0) # Right mouth corner
], dtype=float) / 4.5
self.model_points_14 = np.array([
(6.825897, 6.760612, 4.402142),
(1.330353, 7.122144, 6.903745),
(-1.330353, 7.122144, 6.903745),
(-6.825897, 6.760612, 4.402142),
(5.311432, 5.485328, 3.987654),
(1.789930, 5.393625, 4.413414),
(-1.789930, 5.393625, 4.413414),
(-5.311432, 5.485328, 3.987654),
(2.005628, 1.409845, 6.165652),
(-2.005628, 1.409845, 6.165652),
(2.774015, -2.080775, 5.048531),
(-2.774015, -2.080775, 5.048531),
(0.000000, -3.116408, 6.097667),
(0.000000, -7.415691, 4.070434)], dtype=float)
self.model_points_68 = np.array([
[-73.393523, -29.801432, -47.667532],
[-72.775014, -10.949766, -45.909403],
[-70.533638, 7.929818, -44.84258],
[-66.850058, 26.07428, -43.141114],
[-59.790187, 42.56439, -38.635298],
[-48.368973, 56.48108, -30.750622],
[-34.121101, 67.246992, -18.456453],
[-17.875411, 75.056892, -3.609035],
[0.098749, 77.061286, 0.881698],
[17.477031, 74.758448, -5.181201],
[32.648966, 66.929021, -19.176563],
[46.372358, 56.311389, -30.77057],
[57.34348, 42.419126, -37.628629],
[64.388482, 25.45588, -40.886309],
[68.212038, 6.990805, -42.281449],
[70.486405, -11.666193, -44.142567],
[71.375822, -30.365191, -47.140426],
[-61.119406, -49.361602, -14.254422],
[-51.287588, -58.769795, -7.268147],
[-37.8048, -61.996155, -0.442051],
[-24.022754, -61.033399, 6.606501],
[-11.635713, -56.686759, 11.967398],
[12.056636, -57.391033, 12.051204],
[25.106256, -61.902186, 7.315098],
[38.338588, -62.777713, 1.022953],
[51.191007, -59.302347, -5.349435],
[60.053851, -50.190255, -11.615746],
[0.65394, -42.19379, 13.380835],
[0.804809, -30.993721, 21.150853],
[0.992204, -19.944596, 29.284036],
[1.226783, -8.414541, 36.94806],
[-14.772472, 2.598255, 20.132003],
[-7.180239, 4.751589, 23.536684],
[0.55592, 6.5629, 25.944448],
[8.272499, 4.661005, 23.695741],
[15.214351, 2.643046, 20.858157],
[-46.04729, -37.471411, -7.037989],
[-37.674688, -42.73051, -3.021217],
[-27.883856, -42.711517, -1.353629],
[-19.648268, -36.754742, 0.111088],
[-28.272965, -35.134493, 0.147273],
[-38.082418, -34.919043, -1.476612],
[19.265868, -37.032306, 0.665746],
[27.894191, -43.342445, -0.24766],
[37.437529, -43.110822, -1.696435],
[45.170805, -38.086515, -4.894163],
[38.196454, -35.532024, -0.282961],
[28.764989, -35.484289, 1.172675],
[-28.916267, 28.612716, 2.24031],
[-17.533194, 22.172187, 15.934335],
[-6.68459, 19.029051, 22.611355],
[0.381001, 20.721118, 23.748437],
[8.375443, 19.03546, 22.721995],
[18.876618, 22.394109, 15.610679],
[28.794412, 28.079924, 3.217393],
[19.057574, 36.298248, 14.987997],
[8.956375, 39.634575, 22.554245],
[0.381549, 40.395647, 23.591626],
[-7.428895, 39.836405, 22.406106],
[-18.160634, 36.677899, 15.121907],
[-24.37749, 28.677771, 4.785684],
[-6.897633, 25.475976, 20.893742],
[0.340663, 26.014269, 22.220479],
[8.444722, 25.326198, 21.02552],
[24.474473, 28.323008, 5.712776],
[8.449166, 30.596216, 20.671489],
[0.205322, 31.408738, 21.90367],
[-7.198266, 30.844876, 20.328022]])
self.focal_length = self.size[1]
self.camera_center = (self.size[1] / 2, self.size[0] / 2)
self.camera_matrix = np.array(
[[self.focal_length, 0, self.camera_center[0]],
[0, self.focal_length, self.camera_center[1]],
[0, 0, 1]], dtype="double")
# Assuming no lens distortion
self.dist_coeefs = np.zeros((4, 1))
# Rotation vector and translation vector
self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
self.t_vec = np.array([[-14.97821226], [-10.62040383], [-2053.03596872]])
def get_euler_angle(self, rotation_vector):
# calc rotation angles
theta = cv2.norm(rotation_vector, cv2.NORM_L2)
# transform to quaterniond
w = math.cos(theta / 2)
x = math.sin(theta / 2) * rotation_vector[0][0] / theta
y = math.sin(theta / 2) * rotation_vector[1][0] / theta
z = math.sin(theta / 2) * rotation_vector[2][0] / theta
# pitch (x-axis rotation)
t0 = 2.0 * (w * x + y * z)
t1 = 1.0 - 2.0 * (x ** 2 + y ** 2)
pitch = math.atan2(t0, t1)
# yaw (y-axis rotation)
t2 = 2.0 * (w * y - z * x)
if t2 > 1.0:
t2 = 1.0
if t2 < -1.0:
t2 = -1.0
yaw = math.asin(t2)
# roll (z-axis rotation)
t3 = 2.0 * (w * z + x * y)
t4 = 1.0 - 2.0 * (y ** 2 + z ** 2)
roll = math.atan2(t3, t4)
return pitch, yaw, roll
def solve_pose_by_6_points(self, image_points):
"""
Solve pose from image points
Return (rotation_vector, translation_vector) as pose.
"""
points_6 = np.float32([
image_points[30], image_points[36], image_points[45],
image_points[48], image_points[54], image_points[8]])
_, rotation_vector, translation_vector = cv2.solvePnP(
self.model_points_6,
points_6,
self.camera_matrix,
self.dist_coeefs,
rvec=self.r_vec,
tvec=self.t_vec,
useExtrinsicGuess=True)
return rotation_vector, translation_vector
def solve_pose_by_14_points(self, image_points):
points_14 = np.float32([
image_points[17], image_points[21], image_points[22], image_points[26], image_points[36],
image_points[39], image_points[42], image_points[45], image_points[31], image_points[35],
image_points[48], image_points[54], image_points[57], image_points[8]])
_, rotation_vector, translation_vector = cv2.solvePnP(
self.model_points_14,
points_14,
self.camera_matrix,
self.dist_coeefs,
rvec=self.r_vec,
tvec=self.t_vec,
useExtrinsicGuess=True)
return rotation_vector, translation_vector
def solve_pose_by_68_points(self, image_points):
image_points = np.float32(np.expand_dims(image_points, axis=1))
_, rotation_vector, translation_vector = cv2.solvePnP(
self.model_points_68,
image_points,
self.camera_matrix,
self.dist_coeefs,
rvec=self.r_vec,
tvec=self.t_vec,
useExtrinsicGuess=True)
return rotation_vector, translation_vector
def draw_annotation_box(self, image, rotation_vector, translation_vector, color=(255, 255, 255), line_width=2):
"""Draw a 3D box as annotation of pose"""
point_3d = []
rear_size = 75
rear_depth = 0
point_3d.append((-rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, rear_size, rear_depth))
point_3d.append((rear_size, rear_size, rear_depth))
point_3d.append((rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, -rear_size, rear_depth))
front_size = 100
front_depth = 100
point_3d.append((-front_size, -front_size, front_depth))
point_3d.append((-front_size, front_size, front_depth))
point_3d.append((front_size, front_size, front_depth))
point_3d.append((front_size, -front_size, front_depth))
point_3d.append((-front_size, -front_size, front_depth))
point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
# Map to 2d image points
(point_2d, _) = cv2.projectPoints(point_3d,
rotation_vector,
translation_vector,
self.camera_matrix,
self.dist_coeefs)
point_2d = np.int32(point_2d.reshape(-1, 2))
# Draw all the lines
cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[1]), tuple(
point_2d[6]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[2]), tuple(
point_2d[7]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[3]), tuple(
point_2d[8]), color, line_width, cv2.LINE_AA)
def run(pic_path):
import dlib
detector = dlib.get_frontal_face_detector() # 加载dlib自带的人脸检测器
img = cv2.imread(pic_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # opencv读到的是BGR的矩阵
faces = detector(img, 1) # 检测人脸,返回检出的人脸框,可能有多张
r = faces[0] # 只取第一张脸
predictor = dlib.shape_predictor('./shape_predictor_68_face_landmarks.dat') # 加载关键点检测模型
ldmk = predictor(img, faces[0]) # 对指定的人脸进行特征点检测
points_68 = np.matrix([[p.x, p.y] for p in ldmk.parts()])
# points_68 = [[p.x, p.y] for p in ldmk.parts()]
# x0, y0, x1, y1 = r.left(), r.top(), r.right(), r.bottom()
# cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2) # 画个人脸框框
# for p in points_68:
# cv2.circle(img, (int(p[0]), int(p[1])), 2, (0, 255, 0), -1, 0)
# cv2.imshow("img",cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
# cv2.waitKey()
pose_estimator = PoseEstimator(img_size=img.shape)
#pose = pose_estimator.solve_pose_by_6_points(points_68)
#pose = pose_estimator.solve_pose_by_14_points(points_68)
pose = pose_estimator.solve_pose_by_68_points(points_68)
pitch, yaw, roll = pose_estimator.get_euler_angle(pose[0])
def _radian2angle(r):
return (r / math.pi) * 180
Y, X, Z = map(_radian2angle, [pitch, yaw, roll])
line = 'Y:{:.1f}\nX:{:.1f}\nZ:{:.1f}'.format(Y, X, Z)
print('{},{}'.format(os.path.basename(pic_path), line.replace('\n', ',')))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
y = 20
for _, txt in enumerate(line.split('\n')):
cv2.putText(img, txt, (20, y), cv2.FONT_HERSHEY_PLAIN, 1.3, (0, 0, 255), 1)
y = y + 15
ori_68 = [[p.x, p.y] for p in ldmk.parts()]
for p in ori_68:
cv2.circle(img, (int(p[0]), int(p[1])), 2, (0, 255, 0), -1, 0)
cv2.imshow('img', img)
if cv2.waitKey(-1) == 27:
pass
return 0
if __name__ == "__main__":
sys.exit(run("./8_Election_Campain_Election_Campaign_8_326.jpg"))