Retinaface+Arcface实现视频人脸识别

最新推荐文章于 2024-05-19 20:13:44 发布

前尘昨夜此刻

最新推荐文章于 2024-05-19 20:13:44 发布

阅读量2.4k

点赞数 2

文章标签：人脸识别深度学习 cv pytorch 神经网络

本文链接：https://blog.csdn.net/ssunshining/article/details/110148066

版权

Retinaface代码参考：https://blog.csdn.net/weixin_44791964/article/details/106214657

Arcface代码参考：https://blog.csdn.net/ssunshining/article/details/109613807

更改Retinaface.py中的detect_image的返回值如下：

import cv2
import numpy as np
import colorsys
import os
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from PIL import Image,ImageFont, ImageDraw
from torch.autograd import Variable
from retinaface_pytorch.nets.retinaface import RetinaFace
from retinaface_pytorch.utils.config import cfg_mnet,cfg_re50
from retinaface_pytorch.utils.anchors import Anchors
from retinaface_pytorch.utils.box_utils import decode, decode_landm, non_max_suppression

def preprocess_input(image):
    image -= np.array((104, 117, 123),np.float32)
    return image

class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/Retinaface_mobilenet0.25.pth',
        "confidence": 0.5,
        "backbone": "mobilenet",
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        self.net = RetinaFace(cfg=self.cfg, phase='eval').eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        
        state_dict = torch.load(self.model_path)
        self.net.load_state_dict(state_dict)
        if self.cuda:
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
        print('Finished!')

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        # 绘制人脸框
        old_image = image.copy()

        image = np.array(image,np.float32)
        im_height, im_width, _ = np.shape(image)

        # 它的作用是将归一化后的框坐标转换成原图的大小
        scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]])

        scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]])

        # pytorch
        image = preprocess_input(image).transpose(2, 0, 1)
        # 增加batch_size维度
        image = torch.from_numpy(image).unsqueeze(0)
        # 计算先验框
        anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        with torch.no_grad():
            if self.cuda:
                scale = scale.cuda()
                scale_for_landmarks = scale_for_landmarks.cuda()
                image = image.cuda()
                anchors = anchors.cuda()

            loc, conf, landms = self.net(image)  # forward pass
            
            boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance'])
            boxes = boxes * scale
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()
            
            landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance'])
            landms = landms * scale_for_landmarks
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes,conf,landms],-1)
            
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
    
        for b in boxes_conf_landms:
            text = "{:.4f}".format(b[4])                                                            #置信度
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)                    #人脸框的位置
            cx = b[0]
            cy = b[1] + 12
            # cv2.putText(old_image, text, (cx, cy),
            #             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            # landms
            # cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)                                   #五个关键点
            # cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            # cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        # return old_image
        return old_image,boxes_conf_landms

Retinaface+Arcface

import torchvision.models as models
from torch import nn
from torch.nn import functional as F
from Face_test.dataset import *
import torch
from PIL import Image, ImageDraw, ImageFont
import os
from retinaface_pytorch.retinaface import Retinaface
from PIL import Image
import numpy as np
import cv2
import time

class Arcsoftmax(nn.Module):
    def __init__(self, feature_num, cls_num):
        super().__init__()
        self.w = nn.Parameter(torch.randn((feature_num, cls_num)),requires_grad=True)   #nn.Parameter将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面
        self.func = nn.Softmax()                                                        #二分类

    def forward(self, x, s=64, m=0.5):                                                  #s=64, m=222.5为超参数m为弧度
        x_norm = F.normalize(x, dim=1)
        w_norm = F.normalize(self.w, dim=0)                                             #传入的参数nn.Parameter在0维上进行标准化

        cosa = torch.matmul(x_norm, w_norm) / s                                         #torch.matmul二维的点成，高维的矩阵乘法
        a = torch.acos(cosa)

        arcsoftmax = torch.exp(
            s * torch.cos(a + m)) / (torch.sum(torch.exp(s * cosa), dim=1, keepdim=True) - torch.exp(
            s * cosa) + torch.exp(s * torch.cos(a + m)))                                #代码实现公式

        return arcsoftmax


class FaceNet(nn.Module):
    def __init__(self):
        super(FaceNet, self).__init__()
        self.sub_net = nn.Sequential(
            models.mobilenet_v2(),                                                      #导入mobilenet_v2

        )
        self.feature_net = nn.Sequential(
            nn.BatchNorm1d(1000),
            nn.LeakyReLU(0.1),                                                          #222.1指的是leakRelu负半轴的倾斜角
            nn.Linear(1000, 512, bias=False),
        )
        self.arc_softmax = Arcsoftmax(512, 112)                                         #8是和最终的分类的数量有关，512或256或128都形

    def forward(self, x):
        y = self.sub_net(x)                                                             #y是原本的mobilenet_v2()的输出值
        feature = self.feature_net(y)                                                   #self.feature_net网络导数第二层
        return feature, self.arc_softmax(feature)                                       #前向推理返回的是特征和arc_softmax分类

    def encode(self, x):
        return self.feature_net(self.sub_net(x))                                        #返回的是倒数第二层的值

def compare(face1, face2):
    face1_norm = F.normalize(face1)                                                     #对传入的人脸进行标准化
    face2_norm = F.normalize(face2)

    cosa = torch.matmul(face1_norm, face2_norm.T)                                        #矩阵乘法
    # cosb = torch.dot(face1_norm.reshape(-1), face2_norm.reshape(-1))
    return cosa

if __name__ == '__main__':
    retinaface = Retinaface()                                 #传入Retinaface
    net = FaceNet().cuda()
    net.load_state_dict(torch.load(param_path))               #param_path为Arcface保存的参数路径
    net.eval()

    file_path = r"C:\Users\Administrator\Desktop\face"
    dic = {}
    featuress = []                                                               #存放所有的人脸特征
    for person in os.listdir(file_path):                                         #遍历每一个人脸文件夹
        for face in os.listdir(os.path.join(file_path, person)):                 #人脸照片转换为特征
            person_picture = tf(Image.open(os.path.join(file_path, person, face))).cuda()
            person_feture = net.encode(person_picture[None, ...])                #获取编码后的每一个人的脸部特征
            feature = person_feture.detach().cpu()                               #将脸部特征转到CPU上，节省GPU的计算量
            dic[feature] = person                                                #特征作为键，人脸作为值
            featuress.append(feature)                                            #将所有的特征存放到一个列表中

    num = len(featuress)                                                         #获取特征的长度
    # font_path = r"C:\Windows\Fonts\simsun.ttc"  # 设置字体的路径
    # font1 = ImageFont.truetype(font_path, 19, encoding="utf-8")  # 设置字体的格式

    # 调用摄像头
    capture = cv2.VideoCapture(0)
    fps = 0.0
    count = 1                                                                    #用于跳帧检测

    while True:
        t1 = time.time()
        if count%3!=0:                                                           #每3帧检测一次
            count += 1
            continue

        ref, frame = capture.read()  # 读取某一帧
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)           # 格式转变，BGRtoRGB
        frame,boxes = retinaface.detect_image(frame)             #接收返回的变量
        frame = Image.fromarray(np.uint8(frame))

        for box in boxes:                                       #提取检测到的人脸的四个坐标值
            box = list(map(int, box))
            x1 = int(box[0])
            y1 = int(box[1])
            x2 = int(box[2])
            y2 = int(box[3])
            frame = Image.fromarray(np.uint8(frame))            # 从numpy转为PIL类型
            cropped = frame.crop((x1, y1, x2, y2))

            person1 = tf(cropped).cuda()                        # 将MTCNN裁剪出来的图片归一化并且传入cuda
            person1_feature = net.encode(person1[None, ...])    # 获取到处理后的视频人脸的特征

            siam_last = 0
            name = 0
            for i in range(num):
                person2_feature = featuress[i].cuda()
                siam = compare(person1_feature, person2_feature)
                if siam >siam_last:                             #如果此时的当前的相似度大于上一个特征的相似度，则从字典中取出当前对应的人的名字（按所有特征中的相似度最大的那个算）
                    siam_last = siam
                    name = dic[featuress[i]]

            frame = np.asarray(frame)                           #从PIL转为numpy格式
            cv2.putText(frame, name+str(float("%.2f" % siam_last.detach().cpu())), (x1, y1+20),cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            count += 1

        t7 = time.time()
        fps = 1 / (t7 - t1)
        cv2.putText(frame, str("fps :%.2f" % fps),(0, 40), cv2.FONT_HERSHEY_DUPLEX,1, (255, 0, 0))      #展示帧率
        # print("fps    :", 1 / (t7 - t1))

        frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR)
        cv2.imshow("video", np.uint8(frame))
        c = cv2.waitKey(1) & 0xff