jetson nano 下来尝一下github 近3.7w星的代码 .
https://github.com/ageitgey/face_recognition
一 通过demo分析一下人脸比对的算法流程.
a 输入几个要比对的人脸样本, 定位人脸 face_location, 对人脸区域编码 face_encodings
b 在待检测的图像里,先进行人脸定位 face_location, 定位出的人脸区 作编码 face_encodings
c 定位出的 encodings 与 录入的样本face_encodings逐一比较, 找到一个距离最小的(最像的)
d 最小的距离满足 < conf_threshod , 则 标定为 对应的人, 否则未找到 .
二 安装 face_recognition
pip3 install face_recognition
三 实践
face_recognition 使用的 face_location 算法在我的环境都不大理想. hog 算法占较高cpu且准确率不高, cnn 模型占 gpu内存较高.
由于把它换掉了
3.1 demo 1 猜猜哪个是"我"
输入三个人的照片,再从一堆照片里比对哪个是我们三个人之一.
import cv2
import face_recognition
import numpy as np
import traceback
class FaceInfo(object):
def __init__(self, path, name):
self.name = name
self.path = path
self.face_img, self.face_encoding = self.face_img_load(path)
@classmethod
def face_img_load(cls, path):
image = face_recognition.load_image_file(path)
locations = face_recognition.face_locations(image)
face_encodings = face_recognition.face_encodings(image, locations)
y0, x1, y1, x0 = locations[0]
face_image = image[y0: y1, x0: x1]
face_image = cv2.resize(face_image, (64, 64))
#face_image = cv2.cvtColor(face_image, cv2.COLOR_RGB2BGR)
return face_image, face_encodings[0]
hu_face = FaceInfo('img/hu_face.jpg', 'hu')
cheng_face = FaceInfo('img/cheng_face.jpg', 'cheng')
han_face = FaceInfo('img/han_face.jpg', 'han')
# Create arrays of known face encodings and their names
known_face_encodings = [
hu_face.face_encoding,
cheng_face.face_encoding,
han_face.face_encoding
]
known_face_names = [
"hu",
"cheng",
"han"
]
face_templates = [
hu_face.face_img,
cheng_face.face_img,
han_face.face_img
]
tolerance_thresh = 0.6
out_win = "foo"
cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
for i in range(1, 24):
try:
img_path = 'img/%d.jpg' % i
test_image = face_recognition.load_image_file(img_path)
if test_image.shape[1] < 800:
pass
elif test_image.shape[1] < 1600:
test_image = cv2.resize(test_image, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR)
else:
test_image = cv2.resize(test_image, (0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR)
# (top, right, bottom, left)
face_locations = face_recognition.face_locations(test_image)
face_encodings = face_recognition.face_encodings(test_image, face_locations)
print("face nums = %s" % (len(face_locations)))
match_names = []
match_face_imgs = []
for face_encoding in face_encodings:
name = 'foo'
face_img = None
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
best_idx = np.argmin(face_distances)
# face samelike tolerance
if face_distances[best_idx] <= tolerance_thresh:
name = known_face_names[best_idx]
face_img = face_templates[best_idx]
print('detect face samlike [%s] distane = %.3f, all = %s' % (name, face_distances[best_idx], face_distances))
match_names.append(name)
match_face_imgs.append(face_img)
i = 0
for loc, name, face_img in zip(face_locations, match_names, match_face_imgs):
y0, x1, y1, x0 = loc
cv2.putText(test_image, name, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 3)
cv2.rectangle(test_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
if face_img is not None:
if (i + 1) * 64 <= test_image.shape[0]:
test_image[0: 64, i * 64 : (i + 1) * 64] = face_img
i += 1
test_image = cv2.cvtColor(test_image, cv2.COLOR_RGB2BGR)
cv2.imshow(out_win, test_image)
cv2.waitKey(3000)
except:
pass
3.2 demo2 实时摄像头之 "xx" 你好
这里换了 opencv dnn yoloface darknet 作人脸检测器, 优化速度, 640x360分辨率人脸定位 在 40ms以内, 人脸比对在 30ms以内. 加了语音播报, 首次定位到某某人会说 XX你好
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import sys
sys.path.append('/workspace/hugo_py')
sys.path.append('./yoloface')
import os
cur_path=os.path.dirname(os.path.abspath(__file__))
import Speech
import cv2
import face_recognition
import numpy as np
import traceback
from camera import JetCamera
import traceback
import time
from yoloface.darknet_detect_live import DarkNetWrap
cap_w = 640
cap_h = 360
cap_fps = 10
class FaceInfo(object):
def __init__(self, path, name, face_detector):
self.name = name
self.path = path
self.face_detector = face_detector
self.face_img, self.face_encoding = self.face_img_load(path)
def face_img_load(self, path):
image = face_recognition.load_image_file(path)
boxs = self.face_detector.detect(image)
print('----------------- boxs', boxs)
x0, y0, x1, y1 = boxs[0]
locations = [y0, x1, y1, x0]
face_encodings = face_recognition.face_encodings(image, [locations])
y0, x1, y1, x0 = locations
face_image = image[y0: y1, x0: x1]
face_image = cv2.resize(face_image, (64, 64))
#face_image = cv2.cvtColor(face_image, cv2.COLOR_RGB2BGR)
return face_image, face_encodings[0]
class FaceDetectDNN(object):
def __init__(self, DNN="TF", conf_threshold = 0.7):
# OpenCV DNN supports 2 networks.
# 1. FP16 version of the original caffe implementation ( 5.4 MB )
# 2. 8 bit Quantized version using Tensorflow ( 2.7 MB )
self.conf_threshold = conf_threshold
'''模型加载'''
DNN = "TF"
if DNN == "CAFFE":
modelFile = "./fd_model/res10_300x300_ssd_iter_140000_fp16.caffemodel"
configFile = "./fd_model/deploy.prototxt"
self.net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
else:
modelFile = "./fd_model/opencv_face_detector_uint8.pb"
configFile = "./fd_model/opencv_face_detector.pbtxt"
self.net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
# 打开 OPENCV-DNN CUDA 支持
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
def detect(self, frame):
frameOpencvDnn = frame.copy()
frameHeight = frameOpencvDnn.shape[0]
frameWidth = frameOpencvDnn.shape[1]
'''检测'''
blob = cv2.dnn.blobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], False, False)
self.net.setInput(blob)
detections = self.net.forward()
bboxes = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > self.conf_threshold:
x1 = int(detections[0, 0, i, 3] * frameWidth)
y1 = int(detections[0, 0, i, 4] * frameHeight)
x2 = int(detections[0, 0, i, 5] * frameWidth)
y2 = int(detections[0, 0, i, 6] * frameHeight)
bboxes.append([x1, y1, x2, y2])
return bboxes
def main():
out_win = "foo"
cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cam = JetCamera(cap_w, cap_h, cap_fps)
cam.open()
#fd = FaceDetectDNN()
fd = DarkNetWrap()
Speech.SetReader(Speech.Reader_Type["Reader_XuXiaoBao"])
Speech.SetVolume(10)
Speech.SetSpeed(5)
last_person_str = ''
os.chdir(cur_path)
hu_face = FaceInfo('img/hu_face.jpg', 'hu', face_detector=fd)
cheng_face = FaceInfo('img/cheng_face.jpg', 'cheng', face_detector=fd)
han_face = FaceInfo('img/han_face.jpg', 'han', face_detector=fd)
# Create arrays of known face encodings and their names
known_face_encodings = [
hu_face.face_encoding,
cheng_face.face_encoding,
han_face.face_encoding
]
known_face_names = [
"hu",
"cheng",
"han",
]
known_face_names_cn = {'hu': '哇哈哈', 'cheng': '郑鸭蛋', 'han': '胡涵茜', 'foo': '不认识'}
face_templates = [
hu_face.face_img,
cheng_face.face_img,
han_face.face_img
]
tolerance_thresh = 0.6
cnt = 0
while True:
try:
ret, frame = cam.read()
# print("camera read one frame ")
if not ret:
break
detect_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# (top, right, bottom, left)
t0 = time.time()
boxs = fd.detect(frame)
face_locations = []
for b in boxs:
x0, y0, x1, y1 = b
face_locations.append([y0, x1, y1, x0])
#face_locations = face_recognition.face_locations(detect_img)
t1 = time.time()
face_encodings = face_recognition.face_encodings(detect_img, face_locations)
t2 = time.time()
print("face nums = %s" % (len(face_locations)))
match_names = []
match_face_imgs = []
for face_encoding in face_encodings:
name = 'foo'
face_img = None
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
best_idx = np.argmin(face_distances)
# face samelike tolerance
if face_distances[best_idx] <= tolerance_thresh:
name = known_face_names[best_idx]
face_img = face_templates[best_idx]
print('detect face samlike [%s] distane = %.3f, all = %s' % (
name, face_distances[best_idx], face_distances))
match_names.append(name)
match_face_imgs.append(face_img)
i = 0
for loc, name, face_img in zip(face_locations, match_names, match_face_imgs):
y0, x1, y1, x0 = loc
cv2.putText(detect_img, name, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 3)
cv2.rectangle(detect_img, (x0, y0), (x1, y1), (0, 0, 255), 2)
if face_img is not None:
if (i + 1) * 64 <= detect_img.shape[0]:
detect_img[0: 64, i * 64: (i + 1) * 64] = face_img
i += 1
detect_img = cv2.cvtColor(detect_img, cv2.COLOR_RGB2BGR)
cv2.imshow(out_win, detect_img)
cv2.waitKey(1)
cnt += 1
person_str = ''
for name in match_names:
person_str += '%s 你好! ' % known_face_names_cn[name]
if person_str != last_person_str:
Speech.Block_Speech_text(person_str)
last_person_str = person_str
if cnt % 1 == 0:
print("frame cnt [%d] face recognition detect delay = %.1fms, %.1fms" % (cnt, (t1 - t0) * 1000, (t2 - t1) * 1000))
except:
traceback.print_exc()
break
cam.close()
if __name__ == '__main__':
main()