RK3588人脸识别(2)

前言

这是RK3588开发板的边缘段程序,与客户端的改动差不多,只不过服务端加载onnx权重文件,客户端加载rknn权重文件(我先把这个客户端的博客写完,后续在完善npu的配置和量化的过程),我的客户端代码也包括本地对比和客户端两种。

客户端

1、数据集创建

你可以选择使用摄像头采集人脸直接进行编码并保存,但是我的工作主要是在边缘设备上,数据库对我来说只是验证的过程,所以我先在本地进行保存图片,在调用函数进行编码保存为.npy文件

建立face_dataset文件夹图片的命名规则如下

2、rknn模型

将之前训练好的人脸检测和识别的onnx权重文件,经过rknn-toolkit2量化保存为rknn格式的权重文件。

3、数据库

创建model_data文件夹保存人脸的特征向量和名字

4、代码部分

a、数据库编码函数encoding.py

import get_embedding
import os
from tqdm import tqdm
import retinaface
import rec
import cv2
import numpy as np

get_embedding.init()

list_dir = os.listdir("face_dataset")
image_paths = []
names = []
for name in list_dir:
    image_paths.append("face_dataset/"+name)
    names.append(name.split("_")[0])

def encode_face_dataset(image_paths, names):
    embedder_ret = []
    for index, path in enumerate(tqdm(image_paths)):
        image = cv2.imread(path)
        x1, y1, x2, y2, ret = retinaface.get_faces(image)
        if ret == None: return None
        for face in ret:
            embedding = rec.get_feat(face['face'])
            embedder_ret.append(embedding)

    np.save("model_data/face_encoding.npy", embedder_ret)
    np.save("model_data/names.npy", names)
    return embedder_ret

encode_face_dataset(image_paths ,names)

b、人脸对齐函数face_align.py

import cv2
import numpy as np

def affineMatrix(nose, leftEyeCenter, rightEyeCenter, scale=2.5):
    nose = np.array(nose, dtype=np.float32)
    left_eye = np.array(leftEyeCenter, dtype=np.float32)
    right_eye = np.array(rightEyeCenter, dtype=np.float32)
    eye_width = right_eye - left_eye
    angle = np.arctan2(eye_width[1], eye_width[0])
    center = nose
    alpha = np.cos(angle)
    beta = np.sin(angle)
    w = np.sqrt(np.sum(eye_width**2)) * scale
    m = [[alpha, beta, -alpha * center[0] - beta * center[1] + w * 0.5],
        [-beta, alpha, beta * center[0] - alpha * center[1] + w * 0.5]]
    return np.array(m), (int(w), int(w))

def align(img, nose, leftEyeCenter, rightEyeCenter, target_size=(160, 160)):
    mat, size = affineMatrix(nose, leftEyeCenter, rightEyeCenter)
    img = cv2.warpAffine(img, mat, size)
    # img = cv2.resize(img, (112, 112)) ##mobilefacenet
    img = letterbox_image(img, target_size)##facenet
    img = img.astype(np.float32)
    return img

def letterbox_image(image, size):
    ih, iw, _   = np.shape(image)
    w, h        = size
    scale       = min(w/iw, h/ih)
    nw          = int(iw*scale)
    nh          = int(ih*scale)

    image       = cv2.resize(image, (nw, nh))
    new_image   = np.ones([size[1], size[0], 3]) * 128
    new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image
    return new_image

c、核心代码get_embedding.py

from retinaface import get_faces,init_rknn
import rec

def get_embeddings(image):
    x1 ,y1, x2, y2,ret = get_faces(image)
    if len(x1) != 1:
        return x1, y1, x2, y2, ret
    if ret == None: return None
    embedder_ret = []
    for face in ret:
        embedding = rec.get_feat(face['face'])
        embedder_ret.append(embedding)
    return x1 ,y1, x2, y2, embedder_ret
    
def init():
    init_rknn()
    rec.init()

d、人脸识别的函数rec.py

import cv2
import numpy as np
from rknnlite.api import RKNNLite

rknn = RKNNLite()

def init():
    rknn.load_rknn('model/facenet.rknn')
    rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)

def get_feat(img):
    img = img[..., ::-1]
    blob = np.expand_dims(img, axis=0)
    net_out = rknn.inference(inputs=[blob])[0][0]
    return net_out

e、人脸检测的函数retinaface.py

import numpy as np
import cv2
from math import ceil
from itertools import product as product
import face_align
import time

from rknnlite.api import RKNNLite

rknn = RKNNLite(verbose=False)

def init_rknn():
    rknn.load_rknn('model/RetinaFace_3588.rknn')
    rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)

def letterbox_resize(image, size, bg_color):
    if isinstance(image, str):
        image = cv2.imread(image)

    target_width, target_height = size
    image_height, image_width, _ = image.shape

    # Calculate the adjusted image size
    aspect_ratio = min(target_width / image_width, target_height / image_height)
    new_width = int(image_width * aspect_ratio)
    new_height = int(image_height * aspect_ratio)

    # Use cv2.resize() for proportional scaling
    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # Create a new canvas and fill it
    result_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * bg_color
    offset_x = (target_width - new_width) // 2
    offset_y = (target_height - new_height) // 2
    result_image[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = image
    return result_image, aspect_ratio, offset_x, offset_y

def PriorBox(image_size): #image_size Support (320,320) and (640,640)
    anchors = []
    min_sizes = [[16, 32], [64, 128], [256, 512]]
    steps = [8, 16, 32]
    feature_maps = [[ceil(image_size[0] / step), ceil(image_size[1] / step)] for step in steps]
    for k, f in enumerate(feature_maps):
        min_sizes_ = min_sizes[k]
        for i, j in product(range(f[0]), range(f[1])):
            for min_size in min_sizes_:
                s_kx = min_size / image_size[1]
                s_ky = min_size / image_size[0]
                dense_cx = [x * steps[k] / image_size[1] for x in [j + 0.5]]
                dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]]
                for cy, cx in product(dense_cy, dense_cx):
                    anchors += [cx, cy, s_kx, s_ky]
    output = np.array(anchors).reshape(-1, 4)
    return output

def box_decode(loc, priors):
    variances = [0.1, 0.2]
    boxes = np.concatenate((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), axis=1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes

def decode_landm(pre, priors):
    variances = [0.1, 0.2]
    landmarks = np.concatenate((
        priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
        priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
        priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
        priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
        priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]
    ), axis=1)
    return landmarks

def nms(dets, thresh):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep

def get_faces(img):
    img_height, img_width, _ = img.shape
    model_height, model_width = (320, 320)
    letterbox_img, aspect_ratio, offset_x, offset_y = letterbox_resize(img, (model_height,model_width), 114)  # letterbox缩放
    infer_img = letterbox_img[..., ::-1]

    i=np.expand_dims(infer_img,0)
    start_time = time.time()
    outputs = rknn.inference(inputs=[i])
    if outputs == None: return None
    loc, conf, landmarks = outputs
    priors = PriorBox(image_size=(model_height, model_width))
    boxes = box_decode(loc.squeeze(0), priors)
    scale = np.array([model_width, model_height,
                      model_width, model_height])
    boxes = boxes * scale // 1
    boxes[...,0::2] =np.clip((boxes[...,0::2] - offset_x) / aspect_ratio, 0, img_width)  #letterbox
    boxes[...,1::2] =np.clip((boxes[...,1::2] - offset_y) / aspect_ratio, 0, img_height) #letterbox
    scores = conf.squeeze(0)[:, 1]
    landmarks = decode_landm(landmarks.squeeze(
        0), priors)
    scale_landmarks = np.array([model_width, model_height, model_width, model_height,
                                model_width, model_height, model_width, model_height,
                                model_width, model_height])
    landmarks = landmarks * scale_landmarks // 1
    landmarks[...,0::2] = np.clip((landmarks[...,0::2] - offset_x) / aspect_ratio, 0, img_width) #letterbox
    landmarks[...,1::2] = np.clip((landmarks[...,1::2] - offset_y) / aspect_ratio, 0, img_height) #letterbox

    inds = np.where(scores > 0.02)[0]
    boxes = boxes[inds]
    landmarks = landmarks[inds]
    scores = scores[inds]

    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landmarks = landmarks[order]
    scores = scores[order]


    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
        np.float32, copy=False)
    keep = nms(dets, 0.5)
    dets = dets[keep, :]
    landmarks = landmarks[keep]
    dets = np.concatenate((dets, landmarks), axis=1)
    ret = []
    x1_list, y1_list, x2_list, y2_list = [], [], [], []
    for data in dets:
        if float(data[4]) < 0.6:
            continue

        x1 = int(data[0])
        y1 = int(data[1])
        x2 = int(data[2])
        y2 = int(data[3])
        x3 = int(data[5])
        y3 = int(data[6])
        x4 = int(data[7])
        y4 = int(data[8])
        x5 = int(data[9])
        y5 = int(data[10])

        leftEyeCenter = np.array([x3, y3])
        rightEyeCenter = np.array([x4, y4])
        nose = np.array([x5, y5])

        face_aligned = face_align.align(img, nose, leftEyeCenter, rightEyeCenter)
        faces = {'face' : face_aligned, 'score' : data[4], 'point1' : x1, 'point2': y1 + 12}
        ret.append(faces)

        x1_list.append(x1)
        y1_list.append(y1)
        x2_list.append(x2)
        y2_list.append(y2)
    # Release
    return x1_list, y1_list, x2_list, y2_list, ret

f、本地测试函数test.py

import cv2
import time
import numpy as np
import get_embedding

get_embedding.init()
facenet_threhold = 0.95

def face_distance(face_encodings, face_to_compare):
    if len(face_encodings) == 0:
        return np.empty((0))
    return np.linalg.norm(face_encodings - face_to_compare, axis=1)

def compare_faces(known_face_encodings, face_encoding_to_check, tolerance=1):
    dis = face_distance(known_face_encodings, face_encoding_to_check)
    return list(dis <= tolerance), dis

cap = cv2.VideoCapture(21)
if not cap.isOpened():
    print("无法打开摄像头")
    exit()

print("按下 'q' 键退出")
prev_time = time.time()
fps = 0

while True:
    ret, frame = cap.read()
    if not ret:
        print("无法从摄像头读取画面")
        break

    frame = cv2.flip(frame, 1)
    old_image = frame.copy()

    current_time = time.time()
    elapsed_time = current_time - prev_time
    fps = 1 / elapsed_time if elapsed_time > 0 else 0
    prev_time = current_time

    try:
        x1, y1, x2, y2, embedder_ret = get_embedding.get_embeddings(old_image)
        known_face_encodings = np.load("model_data/face_encoding.npy")
        known_face_names = np.load("model_data/names.npy")
        if len(x1) > 0:
            if len(x1) > 1:
                for i in range(len(x1)):
                    cv2.rectangle(old_image, (x1[i], y1[i]), (x2[i], y2[i]), (255, 0, 0), 2)
            else:
                cv2.rectangle(old_image, (x1[0], y1[0]), (x2[0], y2[0]), (255, 0, 0), 2)
                face_names = []
                for face_encoding in embedder_ret:
                    matches, face_distances = compare_faces(known_face_encodings, face_encoding,
                                                            tolerance=facenet_threhold)
                    print(matches)
                    print(face_distances)
                    name = "Unknown"
                    best_match_index = np.argmin(face_distances)
                    if matches[best_match_index]:
                        name = known_face_names[best_match_index]
                        print(name)
                    face_names.append(name)
        else:
            print("未检测到人脸")
    except Exception as e:
        print(f"获取人脸特征失败: {e}")
    cv2.putText(old_image, f'FPS: {fps:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    cv2.imshow("Camera", old_image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

现在人脸特征编码保存,人脸识别数据库对比的功能已经基本实现。

客户端代码(与服务端连接才可以用)
import socket
import pickle
import cv2
import time
import get_embedding
import struct
import numpy as np

SERVER_IP = 'xxx.xxx.xx.x'
SERVER_PORT = 12345
get_embedding.init()

def connect_to_server(ip, port):
    try:
        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        client.connect((ip, port))
        print("成功连接到服务器")
        return client
    except Exception as e:
        print(f"连接服务器失败: {e}")
        return None

client_socket = connect_to_server(SERVER_IP, SERVER_PORT)

cap = cv2.VideoCapture(21)
if not cap.isOpened():
    print("无法打开摄像头")
    exit()

print("按下 'q' 键退出")
prev_time = time.time()
fps = 0

face_encoding_queue = []

while True:
    try:
        ret, frame = cap.read()
        if not ret:
            print("无法从摄像头读取画面")
            break

        frame = cv2.flip(frame, 1)
        old_image = frame.copy()

        current_time = time.time()
        elapsed_time = current_time - prev_time
        fps = 1 / elapsed_time if elapsed_time > 0 else 0
        prev_time = current_time

        x1, y1, x2, y2, embedder_ret = get_embedding.get_embeddings(old_image)
        if len(x1) > 0:
            for i in range(len(x1)):
                cv2.rectangle(old_image, (x1[i], y1[i]), (x2[i], y2[i]), (255, 0, 0), 2)

            for face_encoding in embedder_ret:
                if len(face_encoding) == 128:
                    face_encoding_queue.append(face_encoding)

                    if len(face_encoding_queue) >= 10:
                        avg_face_encoding = np.mean(face_encoding_queue, axis=0)
                        face_encoding_queue.clear()

                        serialized_data = pickle.dumps(avg_face_encoding)
                        data_length = struct.pack("!I", len(serialized_data))

                        if client_socket:
                            try:
                                client_socket.sendall(data_length)
                                client_socket.sendall(serialized_data)

                                response = b""
                                while True:
                                    packet = client_socket.recv(4096)
                                    if not packet:
                                        break
                                    response += packet
                                    if len(packet) < 4096:
                                        break

                                if response:
                                    name = pickle.loads(response)
                                    print(f"匹配结果:{name}")
                                    cv2.putText(old_image, name, (x1[0], y1[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                                (0, 255, 0), 2, cv2.LINE_AA)
                            except BrokenPipeError:
                                print("检测到断开的管道,正在重新连接...")
                                client_socket = connect_to_server(SERVER_IP, SERVER_PORT)
                            except Exception as e:
                                print(f"发送或接收数据失败: {e}")
                        else:
                            print("服务器未连接,正在尝试重新连接...")
                            client_socket = connect_to_server(SERVER_IP, SERVER_PORT)
                else:
                    print("提取的人脸特征无效,跳过此帧")
        else:
            print("未检测到人脸")

        # 显示 FPS
        cv2.putText(old_image, f'FPS: {fps:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.imshow("Camera", old_image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    except Exception as e:
        print(f"发生错误: {e}")
        break

# 关闭资源
if client_socket:
    client_socket.close()
cap.release()
cv2.destroyAllWindows()

有疑问请联系我:QQ:3281428136

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值