CASME2图片的对齐与裁剪

^_^linger^_^
已于 2023-12-01 12:13:59 修改
阅读量318
点赞数
文章标签： python 开发语言
于 2023-05-17 11:28:29 首次发布
本文链接：https://blog.csdn.net/GYY8023/article/details/130635771
版权
CASME2图片的对齐与裁剪

CASME2图片的对齐与裁剪
参考链接
CASME2图片的对齐与裁剪

import os

import cv2
import numpy as np
import math
from collections import defaultdict
from PIL import Image,ImageDraw
from matplotlib.pyplot import imshow, show, grid
from datetime import datetime
import face_recognition  # install from https://github.com/ageitgey/face_recognition

#此删除文件夹内容的函数来源于网上
def del_file(filepath):
    """
    del_file()该函数的主要功能：删除某一目录下的所有文件或文件夹
    :param filepath: 路径
    :return:
    """
    del_list = os.listdir(filepath)
    for f in del_list:
        file_path = os.path.join(filepath, f)
        if os.path.isfile(file_path):
            os.remove(file_path)

# 使用face_recognition api检测人脸的关键点， model=”large”时返回68个关键点， model=”small”时返回5个关键点
def visualize_landmark(image_array, landmarks):
    """ plot landmarks on image
    :param image_array: numpy array of a single image
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :return: plots of images with landmarks on
    """
    origin_img = Image.fromarray(image_array)
    draw = ImageDraw.Draw(origin_img)
    for facial_feature in landmarks.keys():
        draw.point(landmarks[facial_feature])

    imshow(origin_img)
    # show()
    return origin_img
# 对检测出的人脸进行关键点检测并用圈进行标记
def circle_landmark(img_cv2,landmarks):
    # 创建一个list保存landmark的坐标
    coordinate_landmark = []
    idx = 0
    for facial_feature in landmarks.keys():
        a = landmarks[facial_feature]
        # cv2.circle(img_cv2, a[0], 3, color=(0, 255, 0))
        for p in a:
            coordinate_landmark.append(p)
            cv2.circle(img_cv2, p, 3, color=(0, 255, 0))
            # 利用cv2.putText输出1-68
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(img_cv2, str(idx + 1), p, font, 0.3, (0, 0, 255), 1, cv2.LINE_AA)
            idx += 1
        # print(facial_feature)
    # cv2.imwrite("lll_221.jpg", img_cv2)
# 人脸对齐思路：
#
# 分别计算左、右眼中心坐标
# 计算左右眼中心坐标连线与水平方向的夹角θ
# 计算左右两眼整体中心坐标
# 以左右两眼整体中心坐标为基点，将图片array逆时针旋转θ
def align_face(image_array, landmarks):
    """ align faces according to eyes position
    :param image_array: numpy array of a single image
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :return:
    rotated_img:  numpy array of aligned image
    eye_center: tuple of coordinates for eye center
    angle: degrees of rotation
    """
    # get list landmarks of left and right eye
    left_eye = landmarks['left_eye']
    right_eye = landmarks['right_eye']
    # calculate the mean point of landmarks of left and right eye
    left_eye_center = np.mean(left_eye, axis=0)
    right_eye_center = np.mean(right_eye, axis=0)
    # compute the angle between the eye centroids
    dy = right_eye_center[1] - left_eye_center[1]
    dx = right_eye_center[0] - left_eye_center[0]
    # compute angle between the line of 2 centeroids and the horizontal line
    angle = math.atan2(dy, dx) * 180. / math.pi
    # calculate the center of 2 eyes
    eye_center = ((left_eye_center[0] + right_eye_center[0]) // 2,
                  (left_eye_center[1] + right_eye_center[1]) // 2)
    # at the eye_center, rotate the image by the angle
    rotate_matrix = cv2.getRotationMatrix2D(eye_center, angle, scale=1)
    rotated_img = cv2.warpAffine(image_array, rotate_matrix, (image_array.shape[1], image_array.shape[0]))
    return rotated_img, eye_center, angle
# 定义旋转图片中坐标的函数，
# 另，由于图片和普通坐标系的原点不同，两者坐标点的旋转方式略有出入，
# 图片坐标旋转涉及y坐标在图片坐标系和普通坐标系之间的变换，
def rotate(origin, point, angle, row):
    """ rotate coordinates in image coordinate system
    :param origin: tuple of coordinates,the rotation center
    :param point: tuple of coordinates, points to rotate
    :param angle: degrees of rotation
    :param row: row size of the image
    :return: rotated coordinates of point
    """
    x1, y1 = point
    x2, y2 = origin
    y1 = row - y1
    y2 = row - y2
    angle = math.radians(angle)
    x = x2 + math.cos(angle) * (x1 - x2) - math.sin(angle) * (y1 - y2)
    y = y2 + math.sin(angle) * (x1 - x2) + math.cos(angle) * (y1 - y2)
    y = row - y
    return int(x), int(y)
# 定义旋转图片中landmark的函数，以人脸双眼中心为基点，将每个人脸关键点逆时针旋转θ，该θ角度是人脸对齐的旋转角度
def rotate_landmarks(landmarks, eye_center, angle, row):
    """ rotate landmarks to fit the aligned face
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :param eye_center: tuple of coordinates for eye center
    :param angle: degrees of rotation
    :param row: row size of the image
    :return: rotated_landmarks with the same structure with landmarks, but different values
    """
    rotated_landmarks = defaultdict(list)
    for facial_feature in landmarks.keys():
        for landmark in landmarks[facial_feature]:
            rotated_landmark = rotate(origin=eye_center, point=landmark, angle=angle, row=row)
            rotated_landmarks[facial_feature].append(rotated_landmark)
    return rotated_landmarks

# 通常，人脸对齐后会根据landmark裁剪人脸到固定尺寸，再feed近卷积网络。此处裁剪的思路为：
#
# 水平方向以最靠左和最靠右的landmark的中点为裁剪后图片的中心点
# 垂直方向上分为三部分
# 中部：两眼landmark中心到嘴巴landmark中心的像素距离
# 底部和顶部：（size-两眼landmark中心到嘴巴landmark中心的距离）/2，size为输入高度
#
#
# 定义裁剪函数
def corp_face(image_array, size, landmarks):
    """ crop face according to eye,mouth and chin position
    :param image_array: numpy array of a single image
    :param size: single int value, size for w and h after crop
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :return:
    cropped_img: numpy array of cropped image
    left, top: left and top coordinates of cropping
    """
    x_min = np.min(landmarks['chin'], axis=0)[0]
    x_max = np.max(landmarks['chin'], axis=0)[0]
    x_center = (x_max - x_min) / 2 + x_min
    left, right = (x_center - size / 2, x_center + size / 2)

    eye_landmark = landmarks['left_eye'] + landmarks['right_eye']
    eye_center = np.mean(eye_landmark, axis=0).astype("int")
    lip_landmark = landmarks['top_lip'] + landmarks['bottom+lip']
    lip_center = np.mean(lip_landmark, axis=0).astype("int")
    mid_part = lip_center[1] - eye_center[1]
    top, bottom = eye_center[1] - (size - mid_part) / 2, lip_center[1] + (size - mid_part) / 2

    pil_img = Image.fromarray(image_array)
    left, top, right, bottom = [int(i) for i in [left, top, right, bottom]]
    cropped_img = pil_img.crop((left, top, right, bottom))
    cropped_img = np.array(cropped_img)
    return cropped_img, left, top
# 应知友需求，修改人脸裁剪逻辑以适应不同尺寸的输入图片，调整后的人脸裁剪逻辑和PRN论文一致：
#
# 垂直方向上分为三部分
# 中部：两眼landmark中心到嘴巴landmark中心的像素距离，占垂直方向的35%
# 底部：占垂直方向的35%
# 顶部：占垂直方向的30%
# 水平方向以最靠左和最靠右的landmark的中点为裁剪后图片的中心点，crop区域w=h
def corp_face_unsize(image_array, landmarks):
    """ crop face according to eye,mouth and chin position
    :param image_array: numpy array of a single image
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :return:
    cropped_img: numpy array of cropped image
    """

    eye_landmark = np.concatenate([np.array(landmarks['left_eye']),
                                   np.array(landmarks['right_eye'])])
    eye_center = np.mean(eye_landmark, axis=0).astype("int")
    lip_landmark = np.concatenate([np.array(landmarks['top_lip']),
                                   np.array(landmarks['bottom_lip'])])
    lip_center = np.mean(lip_landmark, axis=0).astype("int")
    mid_part = lip_center[1] - eye_center[1]
    top = eye_center[1] - mid_part
    bottom = lip_center[1] + mid_part * 40 / 70

    w = h = bottom - top
    x_min = np.min(landmarks['chin'], axis=0)[0]
    x_max = np.max(landmarks['chin'], axis=0)[0]
    x_center = (x_max - x_min) / 2 + x_min
    left, right = (x_center - w / 2, x_center + w / 2)

    pil_img = Image.fromarray(image_array)
    left, top, right, bottom = [int(i) for i in [left, top, right, bottom]]
    cropped_img = pil_img.crop((left, top, right, bottom))
    cropped_img = np.array(cropped_img)
    return cropped_img, left, top

# 定义landmark变换函数，由于图片裁剪，landmark坐标需要再次变换。
def transfer_landmark(landmarks, left, top):
    """transfer landmarks to fit the cropped face
    :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values
    :param left: left coordinates of cropping
    :param top: top coordinates of cropping
    :return: transferred_landmarks with the same structure with landmarks, but different values
    """
    transferred_landmarks = defaultdict(list)
    for facial_feature in landmarks.keys():
        for landmark in landmarks[facial_feature]:
            transferred_landmark = (landmark[0] - left, landmark[1] - top)
            transferred_landmarks[facial_feature].append(transferred_landmark)
    return transferred_landmarks

'''
    写于2023年11月30日：对CASME2-RAW-video2jpg中的文件进行裁剪并且存到另一个文件夹中
'''
if __name__ == '__main__':
    t1 = datetime.now()
    image_root = 'D:/gyy_data/CASMEII/CASME2-RAW-video2jpg'
    image_root_cropped = 'D:/gyy_data/CASMEII/CASME2-RAW-video2jpg_cropped'

    sub_list = os.listdir(image_root)
    for i in range(len(sub_list)):
        # sub_dir: './CASME2-RAW-video2jpg/sub01/'
        sub_dir = os.path.join(image_root,sub_list[i])+'/'
        # sub_image_list: ['EP02_01f', 'EP03_02', 'EP04_02', 'EP04_03', 'EP04_04', 'EP19_01', 'EP19_03f', 'EP19_05f', 'EP19_06f']
        sub_image_list = os.listdir(sub_dir)
        # sub_file_dir: './CASME2-RAW-video2jpg_cropped/sub01/'
        sub_file_dir = os.path.join(image_root_cropped, sub_list[i]) + '/'
        for j in range(len(sub_image_list)):
            temp = sub_image_list[j]
            # jpg_dir_cropped: './CASME2-RAW-video2jpg_cropped/sub01/EP02_01f'
            jpg_dir_cropped = os.path.join(sub_file_dir,temp)+'/'
            print(jpg_dir_cropped)
            if not os.path.exists(jpg_dir_cropped):
                os.makedirs(jpg_dir_cropped)
                print("-----OK-----")
            else:
                print("文件夹已经存在！")
                # 如果有images文件夹，会清空文件夹！
                del_file(jpg_dir_cropped)  # 清空文件夹
            # each_image_dir: './CASME2-RAW-video2jpg/sub01/EP02_01f'
            each_image_dir = os.path.join(sub_dir,sub_image_list[j])+'/'
            print(each_image_dir)
            root = os.listdir(each_image_dir)
            for k in range(len(root)):
                '''第一帧的路径'''
                start_main = os.path.join(each_image_dir, root[0])
                # cv2读入图像并且显示
                start_main_img_cv2 = cv2.imread(start_main)
                # cv2.imshow("start_main_img_cv2", start_main_img_cv2)
                # cv2.waitKey (0)
                # 将图像转换成PIL形式
                start_main_img_cv2_PIL = Image.fromarray(cv2.cvtColor(start_main_img_cv2, cv2.COLOR_BGR2RGB))
                start_main_img_cv2_PIL_array = np.array(start_main_img_cv2_PIL)
                # img_cv2_PIL.show()
                face_landmarks_list1 = face_recognition.face_landmarks(start_main_img_cv2_PIL_array, model="large")
                face_landmarks_dict1 = face_landmarks_list1[0]

                aligned_face1, eye_center1, angle1 = align_face(image_array=start_main_img_cv2_PIL_array,
                                                                landmarks=face_landmarks_dict1)

                rotated_landmarks1 = rotate_landmarks(landmarks=face_landmarks_dict1,
                                                      eye_center=eye_center1, angle=angle1,
                                                      row=start_main_img_cv2_PIL_array.shape[0])

                cropped_face1, left1, top1 = corp_face_unsize(image_array=aligned_face1,
                                                              landmarks=rotated_landmarks1)

                '''图像路径'''
                img_path = os.path.join(each_image_dir, root[k])
                # 读入图像并且显示
                img_cv2 = cv2.imread(img_path)  # cv2读取的是数组类型  BGR  H W C
                # cv2.imshow("img_cv2", img_cv2)
                # cv2.waitKey (0)
                # cv2读取的是数组类型  BGR  H W C 将其转化成 RGB
                img_cv2_PIL = Image.fromarray(cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB))
                # 将图像转换成数组
                img_cv2_PIL_array = np.array(img_cv2_PIL)
                # 获取图像的关键点
                # face_landmarks_list = face_recognition.face_landmarks(img_cv2_PIL_array, model="large")
                # face_landmarks_dict = face_landmarks_list[0]
                # print(face_landmarks_dict, end=" ")
                # 可视化关键点
                # visualize_landmark(image_array=img_cv2_PIL_array, landmarks=face_landmarks_dict)

                # 人脸旋转对齐
                aligned_face, eye_center, angle = align_face(image_array=img_cv2_PIL_array,
                                                             landmarks=rotated_landmarks1)
                # 人脸关键点旋转：图片旋转后，图中的landmark坐标也要相应旋转，这样landmark才能匹配旋转后的图片。
                # landmark旋转前的效果如下，以下旋转所有人脸关键点，并可视化
                # visualize_landmark(image_array=aligned_face, landmarks=face_landmarks_dict)
                rotated_landmarks = rotate_landmarks(landmarks=rotated_landmarks1,
                                                     eye_center=eye_center, angle=angle, row=img_cv2_PIL_array.shape[0])
                # landmark旋转后的效果如下，以下旋转所有人脸关键点，并可视化
                # visualize_landmark(image_array=aligned_face, landmarks=rotated_landmarks)
                # 人脸裁剪
                cropped_face, left, top = corp_face_unsize(image_array=aligned_face,
                                                           landmarks=rotated_landmarks)
                t_array, _, _ = align_face(image_array=cropped_face, landmarks=rotated_landmarks1)

                # t_array = np.array(t)  # 先转换为数组   H W C
                t_PIL_cv2 = cv2.cvtColor(t_array, cv2.COLOR_RGB2BGR)
                t_PIL_cv2_resize = cv2.resize(t_PIL_cv2, (224, 224))

                # 存在本地
                print("Save into:", jpg_dir_cropped + root[k])
                cv2.imwrite(jpg_dir_cropped + '/' + root[k], t_PIL_cv2_resize)
                # cv2.imshow('t_PIL_cv2_resize', t_PIL_cv2_resize)
                # cv2.waitKey(0)
            # end k
        # end j
    # end i
    t2 = datetime.now()
    print("Time cost = ", (t2 - t1))
    print("SUCCEED !!!")