Retinaface+Arcface实现视频人脸识别

Retinaface代码参考:https://blog.csdn.net/weixin_44791964/article/details/106214657

Arcface代码参考:https://blog.csdn.net/ssunshining/article/details/109613807

更改Retinaface.py中的detect_image的返回值如下:

import cv2
import numpy as np
import colorsys
import os
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from PIL import Image,ImageFont, ImageDraw
from torch.autograd import Variable
from retinaface_pytorch.nets.retinaface import RetinaFace
from retinaface_pytorch.utils.config import cfg_mnet,cfg_re50
from retinaface_pytorch.utils.anchors import Anchors
from retinaface_pytorch.utils.box_utils import decode, decode_landm, non_max_suppression

def preprocess_input(image):
    image -= np.array((104, 117, 123),np.float32)
    return image

class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/Retinaface_mobilenet0.25.pth',
        "confidence": 0.5,
        "backbone": "mobilenet",
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        self.net = RetinaFace(cfg=self.cfg, phase='eval').eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        
        state_dict = torch.load(self.model_path)
        self.net.load_state_dict(state_dict)
        if self.cuda:
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
        print('Finished!')

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        # 绘制人脸框
        old_image = image.copy()

        image = np.array(image,np.float32)
        im_height, im_width, _ = np.shape(image)

        # 它的作用是将归一化后的框坐标转换成原图的大小
        scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]])

        scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]])

        # pytorch
        image = preprocess_input(image).transpose(2, 0, 1)
        # 增加batch_size维度
        image = torch.from_numpy(image).unsqueeze(0)
        # 计算先验框
        anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        with torch.no_grad():
            if self.cuda:
                scale = scale.cuda()
                scale_for_landmarks = scale_for_landmarks.cuda()
                image = image.cuda()
                anchors = anchors.cuda()

            loc, conf, landms = self.net(image)  # forward pass
            
            boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance'])
            boxes = boxes * scale
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()
            
            landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance'])
            landms = landms * scale_for_landmarks
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes,conf,landms],-1)
            
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
    
        for b in boxes_conf_landms:
            text = "{:.4f}".format(b[4])                                                            #置信度
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)                    #人脸框的位置
            cx = b[0]
            cy = b[1] + 12
            # cv2.putText(old_image, text, (cx, cy),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            # landms
            # cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)                                   #五个关键点
            # cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            # cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        # return old_image
        return old_image,boxes_conf_landms

 Retinaface+Arcface

import torchvision.models as models
from torch import nn
from torch.nn import functional as F
from Face_test.dataset import *
import torch
from PIL import Image, ImageDraw, ImageFont
import os
from retinaface_pytorch.retinaface import Retinaface
from PIL import Image
import numpy as np
import cv2
import time

class Arcsoftmax(nn.Module):
    def __init__(self, feature_num, cls_num):
        super().__init__()
        self.w = nn.Parameter(torch.randn((feature_num, cls_num)),requires_grad=True)   #nn.Parameter将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面
        self.func = nn.Softmax()                                                        #二分类

    def forward(self, x, s=64, m=0.5):                                                  #s=64, m=222.5为超参数m为弧度
        x_norm = F.normalize(x, dim=1)
        w_norm = F.normalize(self.w, dim=0)                                             #传入的参数nn.Parameter在0维上进行标准化

        cosa = torch.matmul(x_norm, w_norm) / s                                         #torch.matmul二维的点成,高维的矩阵乘法
        a = torch.acos(cosa)

        arcsoftmax = torch.exp(
            s * torch.cos(a + m)) / (torch.sum(torch.exp(s * cosa), dim=1, keepdim=True) - torch.exp(
            s * cosa) + torch.exp(s * torch.cos(a + m)))                                #代码实现公式

        return arcsoftmax


class FaceNet(nn.Module):
    def __init__(self):
        super(FaceNet, self).__init__()
        self.sub_net = nn.Sequential(
            models.mobilenet_v2(),                                                      #导入mobilenet_v2

        )
        self.feature_net = nn.Sequential(
            nn.BatchNorm1d(1000),
            nn.LeakyReLU(0.1),                                                          #222.1指的是leakRelu负半轴的倾斜角
            nn.Linear(1000, 512, bias=False),
        )
        self.arc_softmax = Arcsoftmax(512, 112)                                         #8是和最终的分类的数量有关,512或256或128都形

    def forward(self, x):
        y = self.sub_net(x)                                                             #y是原本的mobilenet_v2()的输出值
        feature = self.feature_net(y)                                                   #self.feature_net网络导数第二层
        return feature, self.arc_softmax(feature)                                       #前向推理返回的是特征和arc_softmax分类

    def encode(self, x):
        return self.feature_net(self.sub_net(x))                                        #返回的是倒数第二层的值

def compare(face1, face2):
    face1_norm = F.normalize(face1)                                                     #对传入的人脸进行标准化
    face2_norm = F.normalize(face2)

    cosa = torch.matmul(face1_norm, face2_norm.T)                                        #矩阵乘法
    # cosb = torch.dot(face1_norm.reshape(-1), face2_norm.reshape(-1))
    return cosa

if __name__ == '__main__':
    retinaface = Retinaface()                                 #传入Retinaface
    net = FaceNet().cuda()
    net.load_state_dict(torch.load(param_path))               #param_path为Arcface保存的参数路径
    net.eval()

    file_path = r"C:\Users\Administrator\Desktop\face"
    dic = {}
    featuress = []                                                               #存放所有的人脸特征
    for person in os.listdir(file_path):                                         #遍历每一个人脸文件夹
        for face in os.listdir(os.path.join(file_path, person)):                 #人脸照片转换为特征
            person_picture = tf(Image.open(os.path.join(file_path, person, face))).cuda()
            person_feture = net.encode(person_picture[None, ...])                #获取编码后的每一个人的脸部特征
            feature = person_feture.detach().cpu()                               #将脸部特征转到CPU上,节省GPU的计算量
            dic[feature] = person                                                #特征作为键,人脸作为值
            featuress.append(feature)                                            #将所有的特征存放到一个列表中

    num = len(featuress)                                                         #获取特征的长度
    # font_path = r"C:\Windows\Fonts\simsun.ttc"  # 设置字体的路径
    # font1 = ImageFont.truetype(font_path, 19, encoding="utf-8")  # 设置字体的格式

    # 调用摄像头
    capture = cv2.VideoCapture(0)
    fps = 0.0
    count = 1                                                                    #用于跳帧检测

    while True:
        t1 = time.time()
        if count%3!=0:                                                           #每3帧检测一次
            count += 1
            continue

        ref, frame = capture.read()  # 读取某一帧
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)           # 格式转变,BGRtoRGB
        frame,boxes = retinaface.detect_image(frame)             #接收返回的变量
        frame = Image.fromarray(np.uint8(frame))

        for box in boxes:                                       #提取检测到的人脸的四个坐标值
            box = list(map(int, box))
            x1 = int(box[0])
            y1 = int(box[1])
            x2 = int(box[2])
            y2 = int(box[3])
            frame = Image.fromarray(np.uint8(frame))            # 从numpy转为PIL类型
            cropped = frame.crop((x1, y1, x2, y2))

            person1 = tf(cropped).cuda()                        # 将MTCNN裁剪出来的图片归一化并且传入cuda
            person1_feature = net.encode(person1[None, ...])    # 获取到处理后的视频人脸的特征

            siam_last = 0
            name = 0
            for i in range(num):
                person2_feature = featuress[i].cuda()
                siam = compare(person1_feature, person2_feature)
                if siam >siam_last:                             #如果此时的当前的相似度大于上一个特征的相似度,则从字典中取出当前对应的人的名字(按所有特征中的相似度最大的那个算)
                    siam_last = siam
                    name = dic[featuress[i]]

            frame = np.asarray(frame)                           #从PIL转为numpy格式
            cv2.putText(frame, name+str(float("%.2f" % siam_last.detach().cpu())), (x1, y1+20),cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            count += 1

        t7 = time.time()
        fps = 1 / (t7 - t1)
        cv2.putText(frame, str("fps :%.2f" % fps),(0, 40), cv2.FONT_HERSHEY_DUPLEX,1, (255, 0, 0))      #展示帧率
        # print("fps    :", 1 / (t7 - t1))

        frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR)
        cv2.imshow("video", np.uint8(frame))
        c = cv2.waitKey(1) & 0xff

 

  • 2
    点赞
  • 33
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
RetinaFace ArcFace是一种常用于人脸识别深度学习算法。RetinaFace是一种人脸检测算法,用于在图像中识别和定位人脸。它采用了全卷积神经网络结构,并利用了特征金字塔网络来多尺度地检测不同大小的人脸。RetinaFace使用了anchor-based方法,在不同尺度上生成候选框,并通过网络预测每个候选框的边界框和类别概率。 而ArcFace是一种人脸识别算法,用于在给定的人脸图像中提取具有高度可辨识性的人脸特征向量。ArcFace将人脸图像映射到一个高维特征空间,并通过特殊的角度余弦损失函数,在特征空间中将同一个人脸的特征向量拉近,并将不同人脸的特征向量推开。这样,在特征空间中,同一个人脸的特征向量之间的相似度会很高,而不同人脸的特征向量之间的相似度会很低。 结合RetinaFaceArcFace,可以实现人脸检测和识别的整个流程。首先使用RetinaFace检测人脸,并获取人脸的位置和边界框。然后,将这些人脸区域图像输入ArcFace网络,提取每个人脸的特征向量。接下来,可以使用这些特征向量来比较和匹配不同人脸的相似度,以实现人脸的识别和验证。 RetinaFace ArcFace人脸识别领域具有较好的性能和广泛的应用。它可以应用于人脸识别门禁系统、人脸支付、人脸考勤、人脸身份验证等多个领域。通过这两种算法的组合,可以提高人脸识别的准确性和鲁棒性,实现更加安全和高效的人脸识别技术。
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值