本篇博客主要记录一下自己的学习过程,希望能够为大家提供帮助。所含的内容包含图像对齐(人脸图像中的五个特征点的坐标);分块LBP特征提取,SVM分类。图像总共含有7种表情,最后识别的准确率并没有达到很高,能够在相同的方法达到更好的效果欢迎在评论区留言。数据集链接和自己写的报告链接,可自行查阅(侵权联系删除):
通过百度网盘分享的文件:
表情实验课题.zip
链接:https://pan.baidu.com/s/1DSkJSvF2bhF7y3d3WPRgeg
提取码:529j
复制这段内容打开「百度网盘APP 即可获取」
通过百度网盘分享的文件:表情实验报告.zip
链接:https://pan.baidu.com/s/1BvQjiX8qaiP0KOXfQvGYqQ
提取码:529j
复制这段内容打开「百度网盘APP 即可获取」
导包
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pickle
import sklearn
import seaborn
from skimage import feature as skif
from skimage.feature import hog
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from skimage import io
from PIL import Image
from sklearn.naive_bayes import GaussianNB
LBP特征提取,加入了分块
num=14
wid=100//num
def get_lbp_data(images_data, lbp_radius=2, lbp_point=10):
n_images = len(images_data)
hist = np.zeros((n_images, num*num*59))
for i in np.arange(n_images):
#print(i)
for row in range(num):
for col in range(num):
blocks=images_data[i][row*wid+1:(row+1)*wid+1,col*wid+1:((col+1)*wid)+1]
lbp = skif.local_binary_pattern(blocks, lbp_point, lbp_radius, 'uniform')
max_bins = 59
train_hist, _ = np.histogram(lbp, normed=True, bins=max_bins, range=(0, max_bins))
index = row*num+col
#print(np.shape(train_hist))
hist[i][index*59:(index+1)*59] = train_hist
return hist
读取数据
def read_data(label2id): #label2id为定义的标签
X = []
Y = []
path ='train'
for label in os.listdir(path): #os.listdir用于返回指定的文件夹包含的文件或文件夹的名字的列表,此处遍历每个文件夹
for img_file in os.listdir(os.path.join(path, label)): #遍历每个表情文件夹下的图像
image = cv2.imread(os.path.join(path, label, img_file)) #读取图像
image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
#result = image/255.0
result = image#图像归一化,若获取LBP特征则无需提前归一化
cv2.waitKey(0)
cv2.destroyAllWindows()
X.append(result) #将读取到的所有图像的矩阵形式拼接在一起
Y.append(label2id[label]) #将读取到的所有图像的标签拼接在一起
return X, Y
def read_data2(label2id): #label2id为定义的标签
X = []
Y = []
path ='test'
for label in os.listdir(path): #os.listdir用于返回指定的文件夹包含的文件或文件夹的名字的列表,此处遍历每个文件夹
for img_file in os.listdir(os.path.join(path, label)): #遍历每个表情文件夹下的图像
image = cv2.imread(os.path.join(path, label, img_file)) #读取图像
image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
result = image #图像归一化
cv2.waitKey(0)
cv2.destroyAllWindows()
X.append(result) #将读取到的所有图像的矩阵形式拼接在一起
Y.append(label2id[label]) #将读取到的所有图像的标签拼接在一起
return X, Y
label2id = {'1':0, '2':1, '3': 2,'4':3,'5':4,'6':5,'7':6}
X, Y = read_data(label2id)
#X_features = extract_hog_features(X)
#X_features = get_lbp_data(X)
X2, Y2 = read_data2(label2id)
SVM分类器
train_hist_array=get_lbp_data(X)
test_hist_array=get_lbp_data(X2)
#随机森林 0.87
#Forest = RandomForestClassifier(n_estimators=180,random_state=0)
#Forest.fit(train_hist_array,Y)
#Y_predict = Forest.predict(test_hist_array)
#svm算法
#svm = sklearn.svm.SVC(C = 0.1179, kernel='linear') # acc =
svm =sklearn.svm.SVC(C=10, kernel='rbf', gamma=0.0998, decision_function_shape='ovr') #acc=
svm.fit(train_hist_array,Y)
Y_predict = svm.predict(test_hist_array)
acc = accuracy_score(Y2, Y_predict)
precision = precision_score(Y2, Y_predict, average='macro')
recall = recall_score(Y2, Y_predict, average='macro')
cm = confusion_matrix(Y2, Y_predict)
print(cm)
print('Acc: ', acc)
print('Precision: ', precision)
print('Recall: ', recall)
xtick=['angry','fear','disgust','happy','sad','surprise','neutral']
ytick=xtick
f, ax= plt.subplots(figsize = (7, 5))
ax.tick_params(axis='y',labelsize=15)
ax.tick_params(axis='x',labelsize=15)
seaborn.set(font_scale=1.2)
plt.rc('font',family='Times New Roman',size=15)
seaborn.heatmap(cm,fmt='g', cmap='Blues', annot=True, cbar=True,xticklabels=xtick, yticklabels=ytick, ax=ax)
plt.title('Confusion Matrix', fontsize='x-large')
plt.show()
C:\Users\hp\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
[[ 38 0 1 17 6 0 23]
[ 2 0 0 6 5 1 4]
[ 1 0 1 16 10 0 17]
[ 2 0 1 272 19 3 14]
[ 7 0 1 43 49 1 27]
[ 0 0 1 24 6 7 7]
[ 10 0 0 39 22 2 94]]
Acc: 0.5769712140175219
Precision: 0.4156844676686868
Recall: 0.3493030605062493
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-p8xDbWFY-1664286398098)(output_7_2.png)]
from sklearn.model_selection import GridSearchCV
param_grid = [
#{'C': [0.05,0.08,0.1,0.2,0.3,0.5,0.8], 'kernel': ['linear']},
{'C': [10,1,0.1], 'gamma': [0.1,0.01,1], 'kernel': ['rbf']},
]
svm_model = sklearn.svm.SVC()
clf = GridSearchCV(svm_model, param_grid,cv=5)
clf.fit(train_hist_array,Y)
best_model = clf.best_estimator_
print(clf.best_params_)
{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
使用人脸五点坐标对齐图像及文件相关操作
import cv2
import numpy as np
from skimage import transform as trans
import os
# 1、输入 关键点人脸+原图
# 2、输出 对齐后的人脸
### input img is resize square or 112 96,输入只需要关键点坐标就可以了
def process(img, landmark, align_img_size=(100, 100)):
M = None
image_size = []
src = np.array([
[30.2946, 51.6963],
[65.5318, 51.5014],
[48.0252, 71.7366],
[33.5493, 92.3655],
[62.7299, 92.2041]], dtype=np.float32)
if align_img_size[1] == 100 and align_img_size[0] == 100:
src[:, 0] += 6.0
tform = trans.SimilarityTransform()
tform.estimate(landmark, src)
M = tform.params[0:2, :]
# do align using landmark
warped = cv2.warpAffine(img, M, (align_img_size[1], align_img_size[0]), borderValue=0.0)
return warped
def align(landmark=None, orign_img=None,x = None):
# landmark = [[42.667, 49.404],
# [59.511, 46.567],
# [52.596, 53.128],
# [44.939, 62.649],
# [61.638, 60.043]]
# orign_img = cv2.imread(r"images\train_02125.jpg")
landmark = np.array(landmark, dtype=np.float32).reshape((5, 2))
warped = process(orign_img, landmark)
bgr = warped # [..., ::-1]
cv2.imwrite("processed_img/%s.jpg"%(x), bgr)
# align(landmark=[[42.667, 49.404],
# [59.511, 46.567],
# [52.596, 53.128],
# [44.939, 62.649],
# [61.638, 60.043]], orign_img = cv2.imread(r"images\train_02125.jpg"))
path = r"images"
path1 = r"ldmk"
files = os.listdir(path)
files1 = os.listdir(path1)
site = []
for file1 in files1: # 遍历文件夹
location = os.path.join(path1, file1)
with open(location, "r", encoding='utf-8') as f: # 打开文件
lines = f.readlines() # 读取文件中的一行
c = 0
dataset = []
for line in lines:
c += 1
line = list(map(float,line.split()))
dataset.append(line)
if c==5:
# print(dataset)
site.append(dataset)
break
f.close()
# print(site[0])
xx = 0
for file in files:
orign_img = os.path.join(path, file)
name = file[6:11]
print(name)
align(landmark=site[xx], orign_img=cv2.imread(orign_img),x=name)
xx+=1
path = r"list_test_label.txt"
# files = os.listdir(path)
# for file in files:
# print(file)
# shutil.copy(r'processed_img/00001.jpg',r'train/1')
with open(path, "r", encoding='utf-8') as f: # 打开文件
lines = f.readlines()
print(lines)
for line in lines:
name = line[6:11]
lable = line[16]
imgpath = r'processed_img/%s.jpg'%(name)
newpath = r'test/%s/'%(lable)
shutil.copy(imgpath,newpath)
# print(dataset)
# for i in files:
# orign_img = 'image'+'\\'+i
# landmark =
# orign_img =
# align()