人脸表情分类算法设计


前言

表情分类是深度学习中一个重要的应用领域。在本文中介绍了一个基于KDEF数据集的表情分类算法设计流程
github:link


一、KDEF数据集介绍

数据集链接link
KDEF全称为(karolinska directed emotional faces)。该数据集发布于1998年。在最开始的时候被用于心理和医学的研究。志愿者在数据集的创建过程中,特意使用了比较柔和,均匀的光照,且穿着统一的T恤颜色,且在测试集中没有胡须,耳环和眼镜,且没有明显的化妆。每个志愿者有7种不同的表情,每个表情分为了5个角度。在这个数据集合中一共有70个人,35名男性,35名女性,年纪在20~30之间。一共4900张彩色图片。大小为562*762。在这里插入图片描述

二、数据集处理

1.数据集划分

代码如下(示例):原始的数据集按照志愿者的ID单独的建立了一个子文件夹,使用了志愿者ID + 表情分类标签 + 头部姿势的方法标记每一张图片。这不是我们的目标数据集的编号方式。我们打算用表情 + 序号的方式对KDEF数据集进行标注,然后集合到一个文件夹。

2.数据清洗

完成第一步后,我们发现部分数据是空白的,即没有人的表情对象,无效数据,所以我们在数据的划分过程中,使用了对灰度值判断的方法,将无效数据进行删除

3.训练集和测试集划分

我们使用随机种子,将数据集划分成了训练集和测试集 。其中训练集 4000张,测试集888张。

数据集处理代码:

import os  
import cv2 as cv
import numpy as np
import random
path = "./data/KDEF/" #文件夹目录
save_path  = './data/KDEF_ORDER/'
# save_path1 = './datasets/KDEF_ORDER1/'
save_path_train  = './data/KDEF_ORDER_TRAIN/'
save_path_test = './data/KDEF_ORDER_TEST/'
files= os.listdir(save_path) #得到文件夹下的所有文件名称  
abr_dict = {'AF':0,'AN':0,'DI':0,'HA':0,'NE':0,'SA':0,'SU':0}
#-------------------------------------------#
#根据文件对应的id读取图片,然后将图片保存到对应的文件
#------------------------------------------#
for file in files: #遍历文件夹  
    for sub_file in os.listdir(path + file):
        temp = sub_file.split('.')
        abr  = temp[0][4:6]
        if abr in abr_dict.keys():
            temp_img  = cv.imread(path + file +'/'+ sub_file)    # 读取对应的图片
            if np.mean(temp_img) > 30:                           # 删除空白图片
                abr_dict[abr] += 1
                file_name = save_path + abr + str(abr_dict[abr]) + '.jpg'
                cv.imwrite(file_name,temp_img)
            else:
                print(path + file + '/' + sub_file)              # 输出图片ID                    
#------------------------------------------#
#划分训练集和测试集
#------------------------------------------#
length = len(files)
rand_seed = random.sample(range(0,length),length)
rand_seed_test = rand_seed[4000:]
rand_seed_train = rand_seed[:4000]
count = 0
for i in rand_seed_train:
    train_read_path = save_path + files[i]
    train_save_path = save_path_train + files[i] 
    img = cv.imread(train_read_path)
    cv.imwrite(train_save_path,img)

for i in rand_seed_test:
    test_read_path = save_path + files[i]
    test_save_path = save_path_test + files[i] 
    img = cv.imread(test_read_path)
    cv.imwrite(test_save_path,img)

三、数据增强

通过数据增强扩充数据集合的数量。一共使用了四种数据增强的方式,分别包括直方图均衡,GAMMA变换的图像增强,CLHE对比度自适应,以及深度学习中常用的随机裁剪的方法。

from torch.utils.data import Dataset
import os 
import cv2 as cv
import numpy as np
import torch
#----------------------------------------------------#
#在读取图片的时候把图片的预处理加入 原始图片输入562*762 统一转换到762*762
#----------------------------------------------------#
emotion_label = {'AF':0,'AN':1,'DI':2,'HA':3,'NE':4,'SA':5,'SU':6}
def one_hot(x, class_count):
    	# 第一构造一个[class_count, class_count]的对角线为1的向量
	# 第二保留label对应的行并返回
	return torch.eye(class_count)[x,:]

def gamma(image):
    image = image/255.0
    gamma = 0.4
    image = np.power(image,gamma)
    return image

def CLHE(image):
    clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    image = clahe.apply(image)
    return image

class Face_Dataset(Dataset):
    def __init__(self,img_dir = None,imgs_transform = None,equalize = False,contrast_enhanced = False):
        self.img_dir   = img_dir
        self.transform = imgs_transform
        self.filelist  = os.listdir(self.img_dir)
        self.equalize  = equalize 
        self.contrast  = contrast_enhanced
        self.a         = 0

    def __len__(self):
        return len(self.filelist)

    def __getitem__(self, index):
        img_name   = self.img_dir + self.filelist[index]
        temp_img   = cv.imread(img_name)
        if self.equalize == True:
            b,g,r    = cv.split(temp_img)
            b1       = cv.equalizeHist(b)
            g1       = cv.equalizeHist(g)
            r1       = cv.equalizeHist(r)
            temp_img = cv.merge([b1,g1,r1])

        if self.contrast == True:
            if self.a == 0:  # 使用伽马变换
                b2,g2,r2    = cv.split(temp_img)
                b2          = gamma(b2)
                g2          = gamma(g2)
                r2          = gamma(r2)
                temp_img = cv.merge([b2,g2,r2])
                self.a   = 1
            else:           # 使用CLHE变换
                b3,g3,r3    = cv.split(temp_img)
                b3          = CLHE(b3)
                g3          = CLHE(g3)
                r3          = CLHE(r3)
                temp_img = cv.merge([b3,g3,r3])
                self.a   = 0

        label_index= self.filelist[index].split('.')[0][:2]
        emotion    = emotion_label[label_index]
        if self.transform is not None:
            gray_pic = self.transform(temp_img)
        emotion    = torch.LongTensor([emotion])
        return gray_pic,emotion


class Face_Test_Dataset(Dataset):
    def __init__(self,img_dir = None,imgs_transform = None):
        self.img_dir   = img_dir
        self.transform = imgs_transform
        self.filelist  = os.listdir(self.img_dir)

    def __len__(self):
        return len(self.filelist)

    def __getitem__(self, index):
        img_name   = self.img_dir + self.filelist[index]
        temp_img   = cv.imread(img_name)
        label_index= self.filelist[index].split('.')[0][:2]
        emotion    = emotion_label[label_index]
        if self.transform is not None:
            gray_pic = self.transform(temp_img)
        emotion    = torch.LongTensor([emotion])
        return gray_pic,emotion

四、训练代码

在本任务中,使用resnet50作为主干特征网络用于特征的提取,并使用了warm-up的策略进行学习率的调整。

from numpy.core.numeric import False_
from torch.utils.data import DataLoader,ConcatDataset
from torchvision.transforms import transforms
import numpy as np
from torch import nn,optim
import cv2 as cv
import torch
from torchvision.models import resnet50
import argparse
from datasets import Face_Dataset
from tqdm import tqdm
from tensorboardX import SummaryWriter, writer

device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
img_crop_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((512,512)),
    transforms.RandomCrop(500),
    transforms.Resize((512,512))

])

img_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((512,512))
])

#----------------------------------------#
#学习率调整函数
#----------------------------------------#
def warmup_learning_rate(optimizer,iteration):   #  warmup函数在上升阶段的预热函数
    lr_ini = 0.0001
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr_ini+(args.initial_lr - lr_ini)*iteration/100

def cosin_deacy(optimizer,lr_base,current_epoch,warmup_epoch,global_epoch): # 余弦退火函数设计

    lr_new = 0.5 * lr_base*(1+np.cos(np.pi*(current_epoch - warmup_epoch)/np.float(global_epoch-warmup_epoch)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr_new

device         = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train(args):
    net = nn.Sequential(
        resnet50(),
        nn.Linear(1000,100),
        nn.Linear(100,7)
    )
    model = torch.nn.DataParallel(net,device_ids=[0,1])
    model          = model.to(device)
    optimizer      = torch.optim.Adam(model.parameters(),lr=args.initial_lr)
    criterion      = torch.nn.CrossEntropyLoss()
    writer         = SummaryWriter('./log')

    for epoch in range(args.epochs):
        face_data_orign       = Face_Dataset(args.train_datasets,img_transforms)
        face_data_crop        = Face_Dataset(args.train_datasets,img_crop_transforms)
        face_data_equ         = Face_Dataset(args.train_datasets,imgs_transform=img_crop_transforms,equalize=True)
        face_data_con         = Face_Dataset(args.train_datasets,imgs_transform=img_crop_transforms,contrast_enhanced=True)
        face_data             = ConcatDataset([face_data_orign,face_data_crop,face_data_equ,face_data_con])
        face_dataloader = DataLoader(
            face_data,
            batch_size  = args.batch_size,
            shuffle     = True,
            num_workers = args.workers,
            drop_last   = False
        )
        print('#'+'_'*40+'#')
        for img, emotion in tqdm(face_dataloader):
            img       = img.to(device)
            img       = img.type(torch.FloatTensor)
            emotion   = emotion.to(device)
            out       = model(img)    
            loss      = criterion(out,emotion.squeeze())     
            loss.backward()                         
            optimizer.step()                    
            optimizer.zero_grad()
            if epoch == args.warmup_epoch:
                lr_base = optimizer.state_dict()['param_groups'][0]['lr']
            if epoch >= args.warmup_epoch:
                cosin_deacy(optimizer, lr_base, epoch,args.warmup_epoch, args.epochs)
        writer.add_scalar('train_loss',loss/args.batch_size,global_step=epoch)
        print('epoch:{0},train_loss:{1},learning_rate:{2}'.format(epoch+1,round(loss.item()/args.batch_size,6),round(optimizer.state_dict()['param_groups'][0]['lr'],6)))
    torch.save(model.state_dict(),'{0}EMC{1}.pth'.format(args.weights,epoch+1))

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--workers',default=16,type=int)
    parser.add_argument('--initial_lr',default=0.0001,type=float)
    parser.add_argument('--epochs',default=100,type=int)
    parser.add_argument('--warmup_epoch',default=50,type=int)
    parser.add_argument('--batch_size',default=18,type=int) 
    parser.add_argument('--weights',default="./weights/",type=str)
    parser.add_argument('--train_datasets',default='./data/KDEF_ORDER_TRAIN/',type=str)
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()  
    train(args)

五、测试代码

在测试代码中计算了模型的准确率,同时计算了各个分类之间的混淆矩阵

from operator import index
from unicodedata import normalize
from pandas.core.frame import DataFrame
from torch import nn 
import numpy as np
from torch.autograd.grad_mode import no_grad
from tqdm import tqdm
import torch
import cv2 as cv
import argparse
from torchvision.models import resnet50
from torch.utils.data.dataloader import DataLoader
from torchvision.transforms import transforms
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from tqdm.cli import main
from datasets import Face_Dataset,Face_Test_Dataset
import pandas as pd
import torchvision as tv

device = torch.device("cuda:0"if torch.cuda.is_available() else "cpu")
img_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((512,512))
])
emotion_list = ['afraid','angry','disgust','happy','netural','sad','suprise']
emotion_label = {'AF':0,'AN':1,'DI':2,'HA':3,'NE':4,'SA':5,'SU':6}
'''
  基本功能: 1.测试预测的总体的准确率 
           2.各个类别的混淆矩阵 
           3.输出预测错误的图片id同时保存到指定的文件夹,
           使用 gt 和 pt标注前后差别,并将对应的图片ID输出
'''
def test(args):
    net = nn.Sequential(
        resnet50(),
        nn.Linear(1000,100),
        nn.Linear(100,7)
    )
    model           = torch.nn.DataParallel(net,device_ids= [0,1]).cuda()
    model.load_state_dict(torch.load(args.weights))
    test_datasets   = Face_Test_Dataset(args.test_datasets,img_transforms)
    test_dataloader = DataLoader(test_datasets,batch_size=args.batch_size,num_workers=args.workers,drop_last=False)
    predict_correct_num = 0
    label_num       = 0
    wrong_num       = 0
    target_predict  = []
    target_label    = []
    class_num       = [str(i) for i in range(7)]
    bar             = tqdm(test_dataloader)
    model.eval()
    with no_grad():
         for img ,emotion in tqdm(bar):
            img              = img.to(device)
            emotion_label    = emotion.to(device)
            img              = img.type(torch.FloatTensor)
            emotion_predict  = model(img)
            emotion_predict  = emotion_predict.squeeze()
            emotion_predict  = emotion_predict.cpu().numpy()
            emotion_predict  = np.where(emotion_predict == np.max(emotion_predict))
            emotion_label    = emotion_label.cpu().numpy()
            #----------------#
            #计算总体的准确率,并保存预测错误图片和对应id---#
            #----------------#
            label_num       += 1
            if emotion_predict[0] == emotion_label[0]:
                predict_correct_num += 1
            else:
                wrong_num   += 1
                save_wrong_path    = args.predict_wrong + 'origin' + '_' + emotion_list[int(emotion_label[0])] + '_' + 'predict' + '_' + emotion_list[int(emotion_predict[0])] + '__' + str(wrong_num) + '.jpg'
                img          = img.cpu().detach().numpy()
                img          = img.squeeze()
                img          = img.transpose(1,2,0)
                img          = img*255
                img          = img.astype(np.uint8)
                img          = cv.resize(img,(562,762))
                cv.imwrite(save_wrong_path,img)
            #------------------#
            #计算对应的混淆矩阵,并显示出来
            #------------------#
            target_label.append(str(int(emotion_label[0])))
            target_predict.append(str(int(emotion_predict[0])))
            bar.set_description('accuracy_rate of emotion classification is %f' % (predict_correct_num/label_num))
            
    target_label     = np.array(target_label)
    target_predict   = np.array(target_predict)
    matrix     = confusion_matrix(target_label,target_predict)
    dataframe  = pd.DataFrame(matrix,index=emotion_list,columns=emotion_list)
    C_M        = sns.heatmap(dataframe,annot=True,cbar=None,cmap='Blues') 
    plt.title('Confusion Matrix')
    plt.ylabel('emotion_label') 
    plt.xlabel('emotion_predict')
    plt.show()
    C_M = C_M.get_figure()
    C_M.savefig('Confusion_Matrix_Emotion_Classification.jpg',dpi=500)    # 保存混淆矩阵

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--workers',default=16,type=int)
    parser.add_argument('--batch_size',default=1,type=int)
    parser.add_argument('--test_datasets',default='./data/KDEF_ORDER_TEST/',type=str)
    parser.add_argument('--weights',default='./weights/EMC100_with_enhanced.pth',type=str)
    parser.add_argument('--predict_wrong',default='./data/KDEF_PREDICT_WRONG/',type=str)
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()
    test(args)

六、结果

对888张图片的测试集进行分类,准确率为94.8%,各分类的混淆矩阵如下。
在这里插入图片描述

如果感觉有用,麻烦各位大佬点个赞,谢谢,best wishes!

  • 9
    点赞
  • 36
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值