针对FFHQFaceAlignment只能裁剪对齐单个人脸的改进，使用dlib库

lang.y

已于 2024-07-16 18:33:09 修改

阅读量289

点赞数 6

文章标签： python pytorch numpy 计算机视觉

于 2024-07-16 18:32:00 首次发布

本文链接：https://blog.csdn.net/yue_la/article/details/140472230

版权

概述

https://blog.csdn.net/yue_la/article/details/140404258在这篇文章中，我写了FFHQFaceAlignment的代码解读，但是它只能裁剪对齐单个人脸，针对多人脸图像，只能检测一个人脸。我写的agent想实现一个功能：可以针对多人图像中的单个人脸进行裁剪对齐，添加一个参数face_index来进行人脸选择。所以编写了这个代码，原代码中的模型并不支持多人脸检测，改用dlib库和相关模型。

dlib库安装

linux系统可以使用下面的命令安装dlib库

pip install cmake
pip install boost
pip install dlib

如果安装失败，可以尝试使用conda安装

conda install -c conda-forge dlib

这行命令需要等待比较长时间，接近十分钟，按照提示选择y + 回车

代码

导库

import os
import os.path as osp
from tqdm import tqdm
import torch
import numpy as np
import cv2
import PIL.Image
import PIL.ImageFile
from PIL import Image
import scipy.ndimage
import dlib

align_crop_image函数

def align_crop_image(self, image, landmarks, transform_size=256):
        lm = landmarks
        lm_chin = lm[0: 17]
        lm_eyebrow_left = lm[17: 22]
        lm_eyebrow_right = lm[22: 27]
        lm_nose = lm[27: 31]
        lm_nostrils = lm[31: 36]
        lm_eye_left = lm[36: 42]
        lm_eye_right = lm[42: 48]
        lm_mouth_outer = lm[48: 60]
        lm_mouth_inner = lm[60: 68]

        eye_left = np.mean(lm_eye_left, axis=0)
        eye_right = np.mean(lm_eye_right, axis=0)
        eye_avg = (eye_left + eye_right) * 0.5
        eye_to_eye = eye_right - eye_left
        mouth_left = lm_mouth_outer[0]
        mouth_right = lm_mouth_outer[6]
        mouth_avg = (mouth_left + mouth_right) * 0.5
        eye_to_mouth = mouth_avg - eye_avg

        x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
        x /= np.hypot(*x)
        x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
        y = np.flipud(x) * [-1, 1]
        c = eye_avg + eye_to_mouth * 0.1
        quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
        qsize = np.hypot(*x) * 2

        img = Image.fromarray(image)
        shrink = int(np.floor(qsize / transform_size * 0.5))
        if shrink > 1:
            rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
            img = img.resize(rsize, Image.Resampling.LANCZOS)
            quad /= shrink
            qsize /= shrink

        border = max(int(np.rint(qsize * 0.1)), 3)
        crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
                int(np.ceil(max(quad[:, 1]))))
        crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
                min(crop[3] + border, img.size[1]))
        if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
            img = img.crop(crop)
            quad -= crop[0:2]

        pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
               int(np.ceil(max(quad[:, 1]))))
        pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
               max(pad[3] - img.size[1] + border, 0))
        enable_padding = True
        if enable_padding and max(pad) > border - 4:
            pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
            img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
            h, w, _ = img.shape
            y, x, _ = np.ogrid[:h, :w, :1]
            mask = np.maximum(1.0 - np.minimum(np.float32(x) / (pad[0] + 1e-12), np.float32(w - 1 - x) / (pad[2] + 1e-12)),
                              1.0 - np.minimum(np.float32(y) / (pad[1] + 1e-12), np.float32(h - 1 - y) / (pad[3] + 1e-12)))

            blur = qsize * 0.01
            img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
            img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
            img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')

            quad += pad[:2]

        img = img.transform((transform_size, transform_size), Image.Transform.QUAD, (quad + 0.5).flatten(),
                            Image.Resampling.BILINEAR)

        return np.array(img)

代码解读可移步我的另一篇文章，有专门介绍

read_image_opencv函数

def read_image_opencv(self, image_path):
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype('uint8')

_run函数

def _run(self, input_dir, face_index):
    #裁剪后的图片尺寸
    transform_size = 256
        
    #获取输入地址的绝对路径，并创建一个输出文件
    input_dir = osp.abspath(osp.expanduser(input_dir))
    output_dir = osp.join(osp.split(input_dir)[0], "{}_aligned".format(osp.split(input_dir)[1]))

    print(f"Creating output directory: {output_dir}")
    os.makedirs(output_dir, exist_ok=True)
    
    #将input_dir中的所有有效图像加载到input_images列表中
    input_images = [osp.join(input_dir, dI) for dI in os.listdir(input_dir)
                   if osp.isfile(osp.join(input_dir, dI)) and osp.splitext(dI)[-1] in IMAGE_EXT]
    input_images.sort()

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("/data/FFHQFaceAlignment/dlib_model/shape_predictor_68_face_landmarks.dat")

人脸检测器detector：

dlib.get_frontal_face_detector() 返回一个用于检测图像中正面人脸的对象（一个人脸检测器）。用于在图像中定位和识别人脸的位置。它能够检测图像中的人脸，并返回一个或多个边界框（bounding boxes），每个边界框表示一个检测到的人脸。

形状预测器predictor：

dlib.shape_predictor() 参数是一个模型的地址，用于加载一个训练好的模型，该模型能够预测人脸关键点的位置。接受人脸检测器返回的人脸区域（bounding box），并对人脸区域内的关键点进行精细化定位。这些关键点包括眼睛、鼻子、嘴巴等面部特征的位置。

模型下载地址：http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2

    #对所有的图片逐个处理
    for img_file in input_images:
        img = self.read_image_opencv(img_file).copy()
        #dlib只能处理灰度图
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # 使用 dlib 进行人脸检测，得到人脸boxes，faces是单张图片中的人脸列表
        faces = detector(gray_img)

        print(f"Detected {len(faces)} faces in {img_file}")
    
        for i, face in enumerate(faces):
            #对图片中的人脸进行处理，返回landmarks
            landmarks = predictor(gray_img, face)
            #将预测到的关键点信息存储在 landmarks_np 中
            landmarks_np = np.array([[landmarks.part(j).x, landmarks.part(j).y] for j in range(68)])

            if i == face_index:
               # 对于指定的人脸索引，进行对齐和裁剪
               img = self.align_crop_image(image=img, landmarks=landmarks_np, transform_size=transform_size)

        cv2.imwrite(osp.join(output_dir, osp.split(img_file)[-1]), cv2.cvtColor(img.copy(), cv2.COLOR_RGB2BGR))

    return output_dir

到此代码结束，可以实现对多人图片中指定的face_index人脸进行裁剪对齐，处理成FFHQ数据集中的样式

lang.y

关注

6
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
针对FFHQFaceAlignment只能裁剪对齐单个人脸的改进，使用dlib库

FFHQFaceAlignment只能裁剪对齐单个人脸，针对多人脸图像，只能检测一个人脸。我写的agent想实现一个功能：可以针对多人图像中的单个人脸进行裁剪对齐，添加一个参数face_index来进行人脸选择。所以编写了这个代码，原代码中的模型并不支持多人脸检测，改用dlib库和相关模型。
复制链接

扫一扫