用深度学习创建自己的表情
翻译地址源连接
通过该深度学习(卷积神经网络)项目,对脸部表情分类在将分好类的情绪映射为为相应的表情。
数据集
FER2013数据集(面部表情识别)由48*48像素的灰度人脸图像组成。图像居中并占据等量的空间。本数据集由以下类别的面部情绪组成
0:angry(生气)
1:disgust(嫌弃)
2:fear(害怕)
3:happy(开心)
4:sad(伤心)
5:surprise(惊讶)
6:neutral
第一步 导入库
import tkinter as tk
from tkinter import *
import cv2
from PIL import Image, ImageTk
import os
import numpy as np
import cv2
# 序列模型
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D
from keras.optimizers import Adam
from keras.layers import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
第二步 初始化训练集和验证集生成器
ImageDataGenerator()图片处理生成器
图片处理(对每一个批次的训练图片进行数据增强处理如旋转Rotation、归一化、平移shift、翻转flip、大小变换zoom等等)
生成器(每次给模型喂入一个batch_size大小的样本数据)
ImageDataGenerator的方法flow_from_directory()通过文件夹路径采集数据和标签数组,生成批量增强数据
target_size默认图片大小为256256,现将图片调整为4848
batch_size批量大小默认为32,现设置为64
color_model颜色模式,默认RGB,设置为灰度为单通道的图片
class_model决定了返回的标签组的形式,categorical会但会2维的独热编码标签,默认维categorical
train_dir = 'data/train'
val_dir = 'data/test'
# 归一化
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
#
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(48,48),
batch_size=64,
color_mode="gray_framescale",
class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
val_dir,
target_size=(48,48),
batch_size=64,
color_mode="gray_framescale",
class_mode='categorical')
第三步 创建卷积神经网络
keras.layers.Conv2D(filters, kernel_size, strides=(1, 1), padding=‘valid’, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=‘glorot_uniform’, bias_initializer=‘zeros’, kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)
emotion_model = Sequential()
# 第一层需要指出输入的形状(样本数,行,列,通道数),只指出后三维即可,第一维按batch_size自动指定
# 卷积中滤波器的数量为32,即输出空间的维度
# kernel_size 卷积核大小
emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
# 最大池化层
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
# 丢弃率
emotion_model.add(Dropout(0.25))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Flatten())
emotion_model.add(Dense(1024, activation='relu'))
emotion_model.add(Dropout(0.5))
# 最终输出7类
emotion_model.add(Dense(7, activation='softmax'))
第四步 编译训练模型
emotion_model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.0001, decay=1e-6),metrics=['accuracy'])
emotion_model_info = emotion_model.fit_generator(
train_generator,
steps_per_epoch=28709 // 64,
epochs=50,
validation_data=validation_generator,
validation_steps=7178 // 64)
##第五步 保存模型参数
emotion_model.save_weights('model.h5')
第六步
CascadeClassifier,是Opencv中做人脸检测的时候的一个级联分类器。并且既可以使用Haar,也可以使用LBP特征
级联分类器的策略是,将若干个强分类器由简单到复杂排列,希望经过训练使每个强分类器都有较高检测率,而误识率可以放低。
# 是否使用OpenCL技术
# OpenCL技术也是通过在GPU上写代码来加速
cv2.ocl.setUseOpenCL(False)
emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"}
def show_vid():
# 打开笔记本的内置摄像头
cap1 = cv2.VideoCapture(0)
if not cap1.isOpened():
print("cant open the camera1")
# 按帧读取视频,返回值ret是布尔值,读取帧是正确的则返回True,如果文件读取到结尾返回False
# frame是每一帧的图片是一个三维矩阵
flag1, frame1 = cap1.read()
frame1 = cv2.resize(frame1,(600,500))
# 改为级联分类的地址
bounding_box = cv2.CascadeClassifier('./lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')
# 转为灰度图
gray_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
# detectMultiScale(待检测图像,scaleFactor前后两次相继的扫描中搜索框的比例系数,默认为1.1即每次搜索框扩大10%,
# minNeighbors表示每一个目标至少要被检测到5次才算是真的目标(因为周围的像素和不同的窗口大小都可以检测到人脸)) 返回人脸
num_faces = bounding_box.detectMultiScale(gray_frame,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in num_faces:
# 在frame上画矩形
cv2.rectangle(frame1, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray_frame = gray_frame[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)
prediction = emotion_model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
# putText添加文字 (图片,添加的文字,左上角坐标,字体,字体大小,颜色,字体粗细)
# cv2.putText(frame1, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
show_text[0]=maxindex
if flag1 is None:
print ("Major error!")
elif flag1:
global last_frame1
last_frame1 = frame1.copy()
pic = cv2.cvtColor(last_frame1, cv2.COLOR_BGR2RGB)
img = Image.fromarray(pic)
imgtk = ImageTk.PhotoImage(image=img)
lmain.imgtk = imgtk
lmain.configure(image=imgtk)
lmain.after(10, show_vid)
if cv2.waitKey(1) & 0xFF == ord('q'):
exit()
第七步 显示
show_text=[0]
emoji_dist={0:"./emojis/angry.png",2:"./emojis/disgusted.png",2:"./emojis/fearful.png",3:"./emojis/happy.png",4:"./emojis/neutral.png",5:"./emojis/sad.png",6:"./emojis/surpriced.png"}
def show_vid2():
# 读入是array
frame2=cv2.imread(emoji_dist[show_text[0]])
pic2=cv2.cvtColor(frame2,cv2.COLOR_BGR2RGB)
# 将array转换为image
img2=Image.fromarray(frame2)
imgtk2=ImageTk.PhotoImage(image=img2)
lmain2.imgtk2=imgtk2
lmain3.configure(text=emotion_dict[show_text[0]],font=('arial',45,'bold'))
lmain2.configure(image=imgtk2)
lmain2.after(10, show_vid2)
if __name__ == '__main__':
root=tk.Tk()
img = ImageTk.PhotoImage(Image.open("logo.png"))
heading = Label(root,image=img,bg='black')
heading.pack()
heading2=Label(root,text="Photo to Emoji",pady=20, font=('arial',45,'bold'),bg='black',fg='#CDCDCD')
heading2.pack()
lmain = tk.Label(master=root,padx=50,bd=10)
lmain2 = tk.Label(master=root,bd=10)
lmain3=tk.Label(master=root,bd=10,fg="#CDCDCD",bg='black')
lmain.pack(side=LEFT)
lmain.place(x=50,y=250)
lmain3.pack()
lmain3.place(x=960,y=250)
lmain2.pack(side=RIGHT)
lmain2.place(x=900,y=350)
root.title("Photo To Emoji")
root.geometry("1400x900+100+10")
root['bg']='black'
exitbutton = Button(root, text='Quit',fg="red",command=root.destroy,font=('arial',25,'bold')).pack(side = BOTTOM)
show_vid()
show_vid2()
root.mainloop()
所有代码
训练模型
import numpy as np
import cv2
from keras.emotion_models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D
from keras.optimizers import Adam
from keras.layers import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
train_dir = 'data/train'
val_dir = 'data/test'
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(48,48),
batch_size=64,
color_mode="gray_framescale",
class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
val_dir,
target_size=(48,48),
batch_size=64,
color_mode="gray_framescale",
class_mode='categorical')
emotion_model = Sequential()
emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Flatten())
emotion_model.add(Dense(1024, activation='relu'))
emotion_model.add(Dropout(0.5))
emotion_model.add(Dense(7, activation='softmax'))
# emotion_model.load_weights('emotion_model.h5')
cv2.ocl.setUseOpenCL(False)
emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"}
emotion_model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.0001, decay=1e-6),metrics=['accuracy'])
emotion_model_info = emotion_model.fit_generator(
train_generator,
steps_per_epoch=28709 // 64,
epochs=50,
validation_data=validation_generator,
validation_steps=7178 // 64)
emotion_model.save_weights('emotion_model.h5')
# start the webcam feed
cap = cv2.VideoCapture(0)
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
if not ret:
break
bounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2gray_frame)
num_faces = bounding_box.detectMultiScale(gray_frame,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in num_faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray_frame = gray_frame[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)
emotion_prediction = emotion_model.predict(cropped_img)
maxindex = int(np.argmax(emotion_prediction))
cv2.putText(frame, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imshow('Video', cv2.resize(frame,(1200,860),interpolation = cv2.INTER_CUBIC))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
显示
import tkinter as tk
from tkinter import *
import cv2
from PIL import Image, ImageTk
import os
import numpy as np
import numpy as np
import cv2
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D
from keras.optimizers import Adam
from keras.layers import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
emotion_model = Sequential()
emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Flatten())
emotion_model.add(Dense(1024, activation='relu'))
emotion_model.add(Dropout(0.5))
emotion_model.add(Dense(7, activation='softmax'))
emotion_model.load_weights('model.h5')
cv2.ocl.setUseOpenCL(False)
emotion_dict = {0: " Angry ", 1: "Disgusted", 2: " Fearful ", 3: " Happy ", 4: " Neutral ", 5: " Sad ", 6: "Surprised"}
emoji_dist={0:"./emojis/angry.png",2:"./emojis/disgusted.png",2:"./emojis/fearful.png",3:"./emojis/happy.png",4:"./emojis/neutral.png",5:"./emojis/sad.png",6:"./emojis/surpriced.png"}
global last_frame1
last_frame1 = np.zeros((480, 640, 3), dtype=np.uint8)
global cap1
show_text=[0]
def show_vid():
cap1 = cv2.VideoCapture(0)
if not cap1.isOpened():
print("cant open the camera1")
flag1, frame1 = cap1.read()
frame1 = cv2.resize(frame1,(600,500))
bounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')
gray_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
num_faces = bounding_box.detectMultiScale(gray_frame,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in num_faces:
cv2.rectangle(frame1, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray_frame = gray_frame[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)
prediction = emotion_model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
# cv2.putText(frame1, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
show_text[0]=maxindex
if flag1 is None:
print ("Major error!")
elif flag1:
global last_frame1
last_frame1 = frame1.copy()
pic = cv2.cvtColor(last_frame1, cv2.COLOR_BGR2RGB)
img = Image.fromarray(pic)
imgtk = ImageTk.PhotoImage(image=img)
lmain.imgtk = imgtk
lmain.configure(image=imgtk)
lmain.after(10, show_vid)
if cv2.waitKey(1) & 0xFF == ord('q'):
exit()
def show_vid2():
frame2=cv2.imread(emoji_dist[show_text[0]])
pic2=cv2.cvtColor(frame2,cv2.COLOR_BGR2RGB)
img2=Image.fromarray(frame2)
imgtk2=ImageTk.PhotoImage(image=img2)
lmain2.imgtk2=imgtk2
lmain3.configure(text=emotion_dict[show_text[0]],font=('arial',45,'bold'))
lmain2.configure(image=imgtk2)
lmain2.after(10, show_vid2)
if __name__ == '__main__':
root=tk.Tk()
img = ImageTk.PhotoImage(Image.open("logo.png"))
heading = Label(root,image=img,bg='black')
heading.pack()
heading2=Label(root,text="Photo to Emoji",pady=20, font=('arial',45,'bold'),bg='black',fg='#CDCDCD')
heading2.pack()
lmain = tk.Label(master=root,padx=50,bd=10)
lmain2 = tk.Label(master=root,bd=10)
lmain3=tk.Label(master=root,bd=10,fg="#CDCDCD",bg='black')
lmain.pack(side=LEFT)
lmain.place(x=50,y=250)
lmain3.pack()
lmain3.place(x=960,y=250)
lmain2.pack(side=RIGHT)
lmain2.place(x=900,y=350)
root.title("Photo To Emoji")
root.geometry("1400x900+100+10")
root['bg']='black'
exitbutton = Button(root, text='Quit',fg="red",command=root.destroy,font=('arial',25,'bold')).pack(side = BOTTOM)
show_vid()
show_vid2()
root.mainloop()