前言
本项目已经获得软件著作权,也已经在我的github上开源。大家可以作为课程设计参考或者自己学习使用。
运行环境:见github项目库中requirements.txt文件。注意自己装好pycharm和pyqt5,内含UI文件。
说明:我在上传github时候手动将中文注释均换为了英文(为了国际化一点吧哈哈哈~)大家应该可以看懂的。相关模型文件见github库中readme,里面给出了下载地址。
功能
- 人脸识别:识别出身份并框选出人脸。
- 表情识别:使用的预训练模型,能够识别出7种不同的表情:愤怒,厌恶,恐惧,开心,难过,惊讶和中性。
- 摄像机webcam实时识别。
- 抓取屏幕左上角进行实时识别。
- 拍照(截图)功能。(UI中的“咔一张”)
运行框架
效果展示
Github链接:Yangxinyee/Real-timeFaceMonitoringAndExpressionRecognitionSystemBasedOnCNN: Real-time face monitoring and expression recognition system based on CNN (github.com)
如果大家觉得有帮助的话请在github中给我一个小星星吧~
人脸识别模型
使用tensorflow编写模型,包含模型和模型训练。
import random
import os
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import load_model
from keras import backend as K
import numpy as np
from load_dataset import load_dataset, resize_image
IMAGE_SIZE = 64
class Dataset:
def __init__(self, path_name):
# training set
self.train_images = None
self.train_labels = None
# Validation set
self.valid_images = None
self.valid_labels = None
# test set
self.test_images = None
self.test_labels = None
# Data set load path
self.path_name = path_name
# Type of image
self.user_num = len(os.listdir(path_name))
# Current dimension order
self.input_shape = None
# Load the data set, divide the data set according to the principle of cross-validation,
# and carry out related preprocessing
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE,
img_channels=3):
# data category
nb_classes = self.user_num
# Load the data set into memory
images, labels = load_dataset(self.path_name)
train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3,
random_state=random.randint(0, 100))
if K.image_dim_ordering() == 'th':
train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
# test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
self.input_shape = (img_channels, img_rows, img_cols)
else:
train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
# test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
self.input_shape = (img_rows, img_cols, img_channels)
# Output the number of training sets, verification sets, and test sets
print(train_images.shape[0], 'train samples')
print(valid_images.shape[0], 'valid samples')
# print(test_images.shape[0], 'test samples')
"""Our model uses categorical_crossentropy as a loss function,
so it is necessary to vectorize category labels by one-hot encoding
according to the number of categories nb_classes.
There are only two categories here, and the label data becomes two-dimensional after transformation"""
train_labels = np_utils.to_categorical(train_labels, nb_classes)
valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
# test_labels = np_utils.to_categorical(test_labels, nb_classes)
# Floating the pixel data for normalization
train_images = train_images.astype('float32')
valid_images = valid_images.astype('float32')
# test_images = test_images.astype('float32')
# The value of each pixel of the image is normalized to the range of 0~1
train_images /= 255
valid_images /= 255
# test_images /= 255
self.train_images = train_images
self.valid_images = valid_images
# self.test_images = test_images
self.train_labels = train_labels
self.valid_labels = valid_labels
# self.test_labels = test_labels
# CNN network model class
class Model:
def __init__(self):
self.model = None
# modelling
def build_model(self, dataset, nb_classes=4):
self.model = Sequential()
self.model.add(Convolution2D(32, 3, 3, border_mode='same',
input_shape=dataset.input_shape))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(32, 3, 3))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))
self.model.add(Convolution2D(64, 3, 3, border_mode='same'))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(64, 3, 3))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))
self.model.add(Flatten())
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(Dropout(0.5))
self.model.add(Dense(nb_classes))
self.model.add(Activation('softmax'))
# Output model summary
self.model.summary()
# training model
def train(self, dataset, batch_size=20, nb_epoch=10, data_augmentation=True):
sgd = SGD(lr=0.01, decay=1e-6,
momentum=0.9, nesterov=True) # Training with an optimizer with SGD+momentum starts with generating an optimizer object
self.model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy']) # Complete the actual model configuration
if not data_augmentation:
self.model.fit(dataset.train_images,
dataset.train_labels,
batch_size=batch_size,
nb_epoch=nb_epoch,
validation_data=(dataset.valid_images, dataset.valid_labels),
shuffle=True)
# Use real-time data augmentation
else:
datagen = ImageDataGenerator(
featurewise_center=False, # Whether to decentralize input data (mean 0)
samplewise_center=False, # Whether to set the mean of each sample of input data to 0
featurewise_std_normalization=False, # Data normalization (input data divided by the standard deviation of the data set)
samplewise_std_normalization=False, # Whether to divide each sample data by its own standard deviation
zca_whitening=False, # Whether to apply ZCA whitening to input data
rotation_range=20, # The Angle of random rotation of the image when the data is lifted (range 0 ~ 180)
width_shift_range=0.2, # The magnitude of the horizontal shift of the picture when the data is raised
# (expressed as a percentage of the picture width, floating-point number between 0 and 1)
height_shift_range=0.2, # Same as above, except this is vertical
horizontal_flip=True, # Whether to perform a random horizontal flip
vertical_flip=False) # Whether to perform a random vertical flip
# The number of the whole training sample set is calculated for eigenvalue normalization, ZCA whitening and other processing
datagen.fit(dataset.train_images)
# Use the generator to start training the model
self.model.fit_generator(datagen.flow(dataset.train_images, dataset.train_labels,
batch_size=batch_size),
samples_per_epoch=dataset.train_images.shape[0],
nb_epoch=nb_epoch,
validation_data=(dataset.valid_images, dataset.valid_labels))
MODEL_PATH = './model/aggregate.face.model1.h5'
def save_model(self, file_path=MODEL_PATH):
self.model.save(file_path)
def load_model(self, file_path=MODEL_PATH):
self.model = load_model(file_path)
def evaluate(self, dataset):
score = self.model.evaluate(dataset.test_images, dataset.test_labels, verbose=1)
print("%s: %.2f%%" % (self.model.metrics_names[1], score[1] * 100))
# Recognize faces
def face_predict(self, image):
if K.image_data_format() == 'channels_first' and image.shape != (1, 3, IMAGE_SIZE, IMAGE_SIZE):
image = resize_image(image) # The size must be the same as the training set and should be IMAGE_SIZE x IMAGE_SIZE
image = image.reshape((1, 3, IMAGE_SIZE, IMAGE_SIZE)) # Unlike the model training, the prediction is only made for one image
elif K.image_data_format() == 'channels_first' and image.shape != (1, IMAGE_SIZE, IMAGE_SIZE, 3):
image = resize_image(image)
image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))
image = image.astype('float32')
image /= 255
# Gives the probability that the input belongs to each category
pred = self.model.predict(image)
result_probability = np.argmax(pred,axis=1)
# result_probability = self.model.predict_proba(image)
# print('result:', result_probability)
# Give category prediction
if max(result_probability[0]) >= 0.9:
result = self.model.predict_classes(image)
print('result:', result)
# Returns category prediction results
return result[0]
else:
print('result:none')
return -1
if __name__ == '__main__':
user_num = len(os.listdir('./data/'))
dataset = Dataset('./data/')
dataset.load()
model = Model()
model.build_model(dataset, nb_classes=user_num)
model.build_model(dataset, nb_classes=user_num)
model.train(dataset)
model.save_model(file_path='./model.aggregate.face.model1.h5')
表情识别模型
使用pytorch编写模型。表情数据集用的是:FER2013,kaggle上搜搜就有。
表情识别CNN结构:
import torch.nn as nn
def gaussian_weights_init(m):
classname = m.__class__.__name__
# If a string cannot be found, -1 is returned. If the value is not equal to -1, the string contains the character
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.04)
def insert_FaceCNN():
class FaceCNN(nn.Module):
# Initialize the network structure
def __init__(self):
super(FaceCNN, self).__init__()
# First convolution, pooling
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), # convolutional layer
nn.BatchNorm2d(num_features=64), # normalization
nn.RReLU(inplace=True), # activation function
nn.MaxPool2d(kernel_size=2, stride=2), # Maximum pooling
)
# Second convolution, pooling
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=128),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# The third convolution, pooling
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=256),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# parameter initialization
self.conv1.apply(gaussian_weights_init)
self.conv2.apply(gaussian_weights_init)
self.conv3.apply(gaussian_weights_init)
# Fully connected layer
self.fc = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=256 * 6 * 6, out_features=4096),
nn.RReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=1024),
nn.RReLU(inplace=True),
nn.Linear(in_features=1024, out_features=256),
nn.RReLU(inplace=True),
nn.Linear(in_features=256, out_features=7),
)
# Forward propagation
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
# flatten
x = x.view(x.shape[0], -1)
y = self.fc(x)
return y
数据采集
以下是调用自己电脑摄像头进行人脸采集。
"""-----------------------------------------
1. collect face data set
Obtain 10000 face data sets of my own,
using dlib to recognize faces,
although the speed is slower than OpenCV recognition,
the recognition effect is better.
Size: 64*64
-----------------------------------------"""
import cv2
import dlib
import os
import random
faces_add_path = './data/'
size = 64
""" Change image parameters: brightness and contrast """
def img_change(img, light=1, bias=0):
width = img.shape[1]
height = img.shape[0]
for i in range(0, width):
for j in range(0, height):
for k in range(3):
tmp = int(img[j, i, k]*light + bias)
if tmp > 255:
tmp = 255
elif tmp < 0:
tmp = 0
img[j,i,k] = tmp
return img
"""Feature extractor :dlib comes with frontal_face_detector"""
detector = dlib.get_frontal_face_detector()
cap = cv2.VideoCapture(0)
num = 1
while True:
print("Whether to collect new faces(y or n)?")
if input() == 'y':
add_user_name = input("Please enter your name:")
print("Look at the camera.")
faces_add_path = faces_add_path + add_user_name
if not os.path.exists(faces_add_path):
os.makedirs(faces_add_path)
while (num <= 10000):
print('Being processed picture %s' % num)
success, img = cap.read()
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
""" Face detection using Feature Extractor """
dets = detector(gray_img, 1)
"""-------------------------------------------------------------------------------------------
The enumerate function is used to traverse the elements in the sequence and their subscripts.
i is the face number and d is the element corresponding to i.
left: The distance between the left side of the face and the left edge of the picture;
right: The distance between the right side of the face and the left edge of the image
top: the distance between the upper part of the face and the upper part of the image;
bottom: The distance between the bottom of the face and the top border of the picture
------------------------------------------------------------------------------------------------"""
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = img[x1:y1, x2:y2]
""" Adjust the contrast and brightness of the picture,
the contrast and brightness values are random numbers,
so as to increase the diversity of the sample """
face = img_change(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
face = cv2.resize(face, (size, size))
cv2.imshow('image', face)
cv2.imwrite(faces_add_path + '/' + str(num) + '.jpg', face)
num += 1
key = cv2.waitKey(30)
if key == 27:
break
else:
print('Finished!')
break
else :
print("No collection, program over")
break
以下是将耶鲁人脸数据库的图像数据进行相同的处理。其实这里不用别的数据集也可以。这一步可以省略。
"""---------------------------------------------------------------
Second, collect other face data sets
There are Yale face library of Yale University,
ORL face library of Cambridge University,
FERET face library of the United States Department of Defense and so on
This system USES face data set download: http://vis-www.cs.umass.edu/lfw/lfw.tgz
First put the downloaded photo set in img_source directory,
and use dlib to batch identify the face part of the image.
And save to the specified directory faces_other
size: 64*64
----------------------------------------------------------------"""
# -*- codeing: utf-8 -*-
import sys
import cv2
import os
import dlib
source_path = './img_source'
faces_other_path = './data/faces_other'
size = 64
if not os.path.exists(faces_other_path):
os.makedirs(faces_other_path)
"""Feature extractor :dlib comes with frontal_face_detector"""
detector = dlib.get_frontal_face_detector()
num = 1
for (path, dirnames, filenames) in os.walk(source_path):
for filename in filenames:
if filename.endswith('.jpg'):
print('Being processed picture %s' % num)
img_path = path+'/'+filename
img = cv2.imread(img_path)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
"""Use detector for face detection dets as returned result """
dets = detector(gray_img, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = img[x1:y1,x2:y2]
face = cv2.resize(face, (size,size)) # Resize the picture
cv2.imshow('image',face)
cv2.imwrite(faces_other_path+'/'+str(num)+'.jpg', face) #save
num += 1
key = cv2.waitKey(30)
if key == 27:
sys.exit(0)
功能整合主程序
以下文件为github库中的run_me.py入口文件。
from PyQt5 import uic
from PyQt5.QtWidgets import QApplication
from PyQt5.QtGui import QIcon
import win32gui,win32api,win32con
import sys
import os
from faces_train import Model
import cv2
import dlib
import torch
import torch.nn as nn
import numpy as np
from statistics import mode
from PIL import ImageGrab
# Global increment n used to name the screenshot file
n = 0
flag = 0
def gaussian_weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.04)
class FaceCNN(nn.Module):
def __init__(self):
super(FaceCNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=64),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=128),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=256),
nn.RReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv1.apply(gaussian_weights_init)
self.conv2.apply(gaussian_weights_init)
self.conv3.apply(gaussian_weights_init)
self.fc = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=256 * 6 * 6, out_features=4096),
nn.RReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=1024),
nn.RReLU(inplace=True),
nn.Linear(in_features=1024, out_features=256),
nn.RReLU(inplace=True),
nn.Linear(in_features=256, out_features=7),
)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = x.view(x.shape[0], -1)
y = self.fc(x)
return y
# UI framework's classes
class Face:
def __init__(self):
# Load the UI definition from the file
self.ui = uic.loadUi("./ui/rec.ui")
self.ui.setWindowTitle("face recognition system") # Set a unique name
self.ui.setFixedSize(515, 150)
self.ui.Cam_Start.clicked.connect(self.cam_rec)
self.ui.Picture.clicked.connect(self.take_picture)
self.ui.Screen_Start.clicked.connect(self.screen_rec)
def cam_rec(self):
def preprocess_input(images):
images = images / 255.0
return images
classification_model_path = './model/model_cnn.pkl'
# Load the expression recognition model
emotion_classifier = torch.load(classification_model_path)
frame_window = 10
# Emoji tag
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
emotion_window = []
if len(sys.argv) != 1:
print("Usage:%s camera_id\r\n" % (sys.argv[0]))
sys.exit(0)
# Loading model
model = Model()
model.load_model(file_path='./model/aggregate.face.model.h5')
# The color of the rectangular border that frames the face
color = (0, 255, 0)
# Captures a live video stream from a specified camera
cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
while True:
# Capture a live video stream from a specified camera
ret, frame = cap.read() # Read a frame of video
if ret is True:
# Image graying reduces computational complexity
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
else:
continue
dets = detector(frame_gray, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
""" Face size 64*64"""
face = frame[x1:y1, x2:y2]
face = cv2.resize(face, (64, 64))
faceID = model.face_predict(face)
cv2.rectangle(frame, (x2 - 10, x1 - 10), (y2 + 10, y1 + 10), color, thickness=2)
for i in range(len(os.listdir('./data/'))):
if i == faceID:
cv2.putText(frame, os.listdir('./data/')[i],
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
if faceID == -1:
cv2.putText(frame, 'faces_other',
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
image_ = frame_gray[x1: y1, x2: y2]
face = cv2.resize(image_, (48, 48)) #
face = np.expand_dims(face, 0)
face = np.expand_dims(face, 0)
face = preprocess_input(face)
new_face = torch.from_numpy(face)
new_new_face = new_face.float().requires_grad_(False)
emotion_arg = np.argmax(emotion_classifier.forward(new_new_face).detach().numpy())
emotion = emotion_labels[emotion_arg]
emotion_window.append(emotion)
if len(emotion_window) >= frame_window:
emotion_window.pop(0)
try:
emotion_mode = mode(emotion_window)
except:
continue
cv2.putText(frame, emotion_mode, (x2, x1 - 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
cv2.imshow("Real time monitoring system", frame)
# Wait 10 milliseconds to see if there is a key input
k = cv2.waitKey(10)
# If q is entered, the loop exits
if k & 0xFF == ord('q'):
break
# Release the camera and destroy all Windows
cap.release()
cv2.destroyAllWindows()
def screen_rec(self):
def preprocess_input(images):
""" preprocess input by substracting the train mean
# Arguments: images or image of any shape
# Returns: images or image with substracted train mean (129)
"""
images = images / 255.0
return images
classification_model_path = './model/model_cnn.pkl'
emotion_classifier = torch.load(classification_model_path)
frame_window = 10
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
emotion_window = []
if len(sys.argv) != 1:
print("Usage:%s camera_id\r\n" % (sys.argv[0]))
sys.exit(0)
model = Model()
model.load_model(file_path='./model/aggregate.face.model.h5')
color = (0, 255, 0)
BOX = (0, 40, 1000, 640) # Screen shot range, around the top left corner
detector = dlib.get_frontal_face_detector()
while True:
frame = np.array(ImageGrab.grab(bbox=BOX)) # Capture the screen stream and comment out when you want to capture the video stream
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dets = detector(frame_gray, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = frame[x1:y1, x2:y2]
face = cv2.resize(face, (64, 64))
faceID = model.face_predict(face)
cv2.rectangle(frame, (x2 - 10, x1 - 10), (y2 + 10, y1 + 10), color, thickness=2)
for i in range(len(os.listdir('./data/'))):
if i == faceID:
cv2.putText(frame, os.listdir('./data/')[i],
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
if faceID == -1:
cv2.putText(frame, 'faces_other',
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
image_ = frame_gray[x1: y1, x2: y2]
face = cv2.resize(image_, (48, 48))
face = np.expand_dims(face, 0)
face = np.expand_dims(face, 0)
face = preprocess_input(face)
new_face = torch.from_numpy(face)
new_new_face = new_face.float().requires_grad_(False)
emotion_arg = np.argmax(emotion_classifier.forward(new_new_face).detach().numpy())
emotion = emotion_labels[emotion_arg]
emotion_window.append(emotion)
if len(emotion_window) >= frame_window:
emotion_window.pop(0)
try:
emotion_mode = mode(emotion_window)
except:
continue
cv2.putText(frame, emotion_mode, (x2, x1 - 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
b, g, r = cv2.split(frame) # B, G and R channels were extracted respectively
frame = cv2.merge([r, g, b]) # Regroup to R, G, and B
cv2.imshow("Real time monitoring system", frame)
# Wait 10 milliseconds to see if there is a key input
k = cv2.waitKey(10)
# If q is entered, the loop exits
if k & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
# Perform the same part of both identifications
def rec(self):
def preprocess_input(images):
images = images / 255.0
return images
classification_model_path = './model/model_cnn.pkl'
emotion_classifier = torch.load(classification_model_path)
frame_window = 1
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
emotion_window = []
if len(sys.argv) != 1:
print("Usage:%s camera_id\r\n" % (sys.argv[0]))
sys.exit(0)
model = Model()
model.load_model(file_path='./model/aggregate.face.model.h5')
color = (0, 255, 0)
if flag == 1:
cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
while True:
# 读取摄像头
ret, frame = cap.read()
if ret is True:
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
else:
continue
dets = detector(frame_gray, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = frame[x1:y1, x2:y2]
face = cv2.resize(face, (64, 64))
faceID = model.face_predict(face)
cv2.rectangle(frame, (x2 - 10, x1 - 10), (y2 + 10, y1 + 10), color, thickness=2)
for i in range(len(os.listdir('./data/'))):
if i == faceID:
cv2.putText(frame, os.listdir('./data/')[i],
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
if faceID == -1:
cv2.putText(frame, 'faces_other',
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
image_ = frame_gray[x1: y1, x2: y2]
face = cv2.resize(image_, (48, 48))
face = np.expand_dims(face, 0)
face = np.expand_dims(face, 0)
face = preprocess_input(face)
new_face = torch.from_numpy(face)
new_new_face = new_face.float().requires_grad_(False)
emotion_arg = np.argmax(emotion_classifier.forward(new_new_face).detach().numpy())
emotion = emotion_labels[emotion_arg]
emotion_window.append(emotion)
if len(emotion_window) >= frame_window:
emotion_window.pop(0)
try:
emotion_mode = mode(emotion_window)
except:
continue
cv2.putText(frame, emotion_mode, (x2, x1 - 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
cv2.imshow("Real time monitoring system", frame)
k = cv2.waitKey(10)
if k & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
print('1')
elif flag == 2:
# Specifies the capture desktop image window size
BOX = (0, 40, 1100, 640) # Screen shot range, around the top left corner
detector = dlib.get_frontal_face_detector()
while True:
frame = np.array(ImageGrab.grab(bbox=BOX)) # Capture the screen stream and comment out when you want to capture the video stream
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
dets = detector(frame_gray, 1)
for i, d in enumerate(dets):
x1 = d.top() if d.top() > 0 else 0
y1 = d.bottom() if d.bottom() > 0 else 0
x2 = d.left() if d.left() > 0 else 0
y2 = d.right() if d.right() > 0 else 0
face = frame[x1:y1, x2:y2]
face = cv2.resize(face, (64, 64))
faceID = model.face_predict(face)
cv2.rectangle(frame, (x2 - 10, x1 - 10), (y2 + 10, y1 + 10), color, thickness=2)
for i in range(len(os.listdir('./data/'))):
if i == faceID:
cv2.putText(frame, os.listdir('./data/')[i],
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
if faceID == -1:
cv2.putText(frame, 'faces_other',
(x2 + 30, x1 + 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 0, 255),
2)
image_ = frame_gray[x1: y1, x2: y2]
face = cv2.resize(image_, (48, 48))
face = np.expand_dims(face, 0)
face = np.expand_dims(face, 0)
face = preprocess_input(face)
new_face = torch.from_numpy(face)
new_new_face = new_face.float().requires_grad_(False)
emotion_arg = np.argmax(emotion_classifier.forward(new_new_face).detach().numpy())
emotion = emotion_labels[emotion_arg]
emotion_window.append(emotion)
if len(emotion_window) >= frame_window:
emotion_window.pop(0)
try:
emotion_mode = mode(emotion_window)
except:
continue
cv2.putText(frame, emotion_mode, (x2, x1 - 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
b, g, r = cv2.split(frame)
frame = cv2.merge([r, g, b])
cv2.imshow("Real time monitoring system", frame)
k = cv2.waitKey(10)
if k & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
print('2')
# Call the camera flag to 1
def cam_flag(self):
global flag
flag = 1
Face.rec(self)
# Call the screen flag to 2
def screen_flag(self):
global flag
flag = 2
Face.rec(self)
# When clicking to take a picture
def take_picture(self):
global n
hwnd = win32gui.FindWindow(None, 'Real time monitoring system')
screen = QApplication.primaryScreen()
img = screen.grabWindow(hwnd).toImage()
img.save(str(n)+"faces_rec.jpg")
win32api.MessageBox(0, 'Photo taken successfully, please close the program to view them!', 'GOOD!', win32con.MB_DEFAULT_DESKTOP_ONLY)
n += 1
# UI main function
if __name__ == '__main__':
App = QApplication(sys.argv)
App.setWindowIcon(QIcon('logo.png'))
win = Face()
win.ui.show()
sys.exit(App.exec_())
Github链接:
如果大家觉得有帮助的话请在github中给我一个小星星吧~