Dataset: http://vision.ucsd.edu/content/yale-face-database
Image pre-processing
step 1: convert images in gif to images in jpg
import os
from PIL import Image
import filetype
images_dir = "./yalefaces/"
for file in os.listdir(images_dir):
if filetype.guess_extension(images_dir+file) == 'gif':
if os.path.splitext(file)[-1] == '.gif':
Image.open(images_dir+file).save(images_dir+os.path.splitext(file)[0]+'.jpg')
else:
Image.open(images_dir+file).save(images_dir+file+'.jpg')
os.remove(images_dir+file)
step 2: put those images into the directoried named by their name
import os
import shutil
import filetype
images_dir = "./yalefaces/"
for file in os.listdir(images_dir):
if filetype.guess_extension(images_dir+file) == 'jpg':
person_name = file.split('.')[0]
if not os.path.isdir(images_dir+person_name):
os.mkdir(images_dir+person_name)
shutil.move(images_dir+file, images_dir+person_name)
step 3: select ROI of faces from images and resize it
import os
import pickle
images_dir = "./yalefaces/"
persons = []
for file in os.listdir(images_dir):
if os.path.isdir(images_dir+file):
persons.append(file)
persons_dict = {person:ind for person,ind in zip(persons, range(len(persons)))}
with open("persons_dict.pickle", 'wb') as f:
pickle.dump(persons_dict, f)
import cv2
faceCascade = cv2.CascadeClassifier("/usr/local/Cellar/opencv/4.0.1/share/opencv4/haarcascades/haarcascade_frontalface_default.xml")
labels = []
faces = []
for person in persons:
person_dir = os.path.join(images_dir,person)
for file in os.listdir(person_dir):
file_path = os.path.join(person_dir, file)
image = cv2.imread(file_path)
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
fcs = faceCascade.detectMultiScale(
image_gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags = 0
)
for (x, y, w, h) in fcs:
faces.append(cv2.resize(image_gray[y:y+h, x:x+w], (150,150)))
labels.append(persons_dict[person])
import os
import filetype
import matplotlib.pyplot as plt
%matplotlib inline
def plot_images(images, labels, rows=11, cols=15, size=(20,20)):
plt.figure(figsize=size)
for i in range(rows * cols):
plt.subplot(rows, cols, i + 1)
plt.title(labels[i])
plt.imshow(images[i], cmap=plt.cm.gray)
plt.xticks(())
plt.yticks(())
with open('persons_dict.pickle','rb') as f:
persons_dict = pickle.load(f)
persons_dict = {v:k for k,v in persons_dict.items()}
step 4: split faces and labels into trainning and test sets
import random
import numpy as np
ind = np.array(random.sample(range(len(labels)), 15))
labels_test = [labels[i] for i in ind]
faces_test = [faces[i] for i in ind]
labels_trainning = []
faces_trainning = []
for i in range(len(labels)):
if i not in ind:
labels_trainning.append(labels[i])
faces_trainning.append(faces[i])
plot_images(faces_trainning, [persons_dict[label] for label in labels_trainning], rows=10)
Face Recognition using PCA
The data form a cloud of points in the n d nd nd-D space. PCA computes the main axes of this cloud, allowing one to approximate any of the original points using a model with fewer than n d nd nd parameters.
step 1: form the trainning faces matrix
[ x 1 x 2 … x n ] \begin{bmatrix} x_{1} & x_{2} & \dots & x_{n} \end{bmatrix} [x1x2…xn]
x i x_{i} xi is a vector type of image i i i with dimension of number of rows of pixels times number of columns of pixels.
faces_trainning_matrix = np.array(faces_trainning[0].reshape(-1,1))
for i in range(1,len(faces_trainning)):
faces_trainning_matrix = np.concatenate(
(faces_trainning_matrix, faces_trainning[i].reshape(-1,1)),
axis=1)
step 2: calculate the mean
μ = 1 n ∑ i = 1 n x i \mu = \frac{1}{n}\sum_{i=1}^{n}x_{i} μ=n1∑i=1nxi
average_trainning_face_vector = np.zeros(faces_trainning_matrix[:,0].shape, np.float)
for i in range(faces_trainning_matrix.shape[1]):
average_trainning_face_vector += faces_trainning_matrix[:,i]
average_trainning_face_vector = average_trainning_face_vector/faces_trainning_matrix.shape[1]
average_trainning_face = average_trainning_face_vector.reshape(faces_trainning[0].shape[0],faces_trainning[0].shape[1]).astype('uint8')
plt.imshow(average_trainning_face, cmap=plt.cm.gray)
plt.show()
step 3: form normalized trainning faces matrix
X = [ x 1 − μ x 1 − μ … x n − μ ] X = \begin{bmatrix} x_{1} -\mu & x_{1} -\mu & \dots & x_{n} -\mu \end{bmatrix} X=[x1−μx1−μ…xn−μ]
average_trainning_face_vector = average_trainning_face_vector.reshape(-1,1)
faces_trainning_normalized_matrix = faces_trainning_matrix - average_trainning_face_vector
faces_trainning_normalized = []
for i in range(faces_trainning_normalized_matrix.shape[1]):
faces_trainning_normalized.append(faces_trainning_normalized_matrix[:,i].reshape(faces_trainning[0].shape[0],faces_trainning[0].shape[1]).astype('uint8'))
plot_images(faces_trainning_normalized, [persons_dict[label] for label in labels_trainning], rows=10)
step 4: form covariance matrix, and calculate the principal components(eigenvectors)
S = 1 n X X T S = \frac{1}{n}XX^{T} S=n1XXT
Imagine we are given 400 images sized
100
×
100
100 \times 100
100×100 pixel. The Principal Component Analysis solves the covariance matrix
S
=
X
X
T
S = XX^{T}
S=XXT , where
s
i
z
e
(
X
)
=
10000
×
400
size(X) = 10000 × 400
size(X)=10000×400 in our example. You would end up with a
10000
×
10000
10000×10000
10000×10000 matrix, roughly 0.8GB. Solving this problem isn’t feasible, so we’ll need to apply a trick. From your linear algebra lessons you know that a
M
×
N
M \times N
M×N matrix with
M
>
N
M > N
M>N can only have
N
−
1
N − 1
N−1 non-zero eigenvalues. So it’s possible to take the eigenvalue decomposition
S
=
X
T
X
S = X^{T} X
S=XTX of size
N
×
N
N \times N
N×N instead:
1
n
X
T
X
v
i
=
λ
i
v
i
\frac{1}{n}X^{T}Xv_{i} = \lambda_{i}v_{i}
n1XTXvi=λivi
and get the original eigenvectors of
S
=
X
X
T
S = XX^{T}
S=XXT with a left multiplication of the data matrix:
1
n
X
X
T
(
X
v
i
)
=
λ
i
(
X
v
i
)
\frac{1}{n}XX^{T}(Xv_{i}) = \lambda_{i}(Xv_{i})
n1XXT(Xvi)=λi(Xvi)
The resulting eigenvectors are orthogonal. To get orthonormal eigenvectors, they need to be normalized to unit length.
covariance_matrix = np.dot(faces_trainning_normalized_matrix.astype('float').transpose(),
faces_trainning_normalized_matrix.astype('float'))/len(faces_trainning)
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
eigenvectors = np.dot(faces_trainning_normalized_matrix.astype('float'), eigenvectors)
W = [ v 1 v 2 … v k ] W = \begin{bmatrix} v_{1} & v_{2} & \dots & v_{k} \end{bmatrix} W=[v1v2…vk]
principal_components = eigenvectors[:,np.argsort(eigenvalues)[-50]].reshape(-1,1)
for i in np.argsort(eigenvalues)[-49:]:
principal_components = np.concatenate((principal_components, eigenvectors[:,i].reshape(-1,1)), axis=1)
step 5: form eigenfaces
eigenfaces_matrix = 255*(principal_components-np.min(principal_components))/(np.max(principal_components)-np.min(principal_components))
eigenfaces = []
for i in range(eigenfaces_matrix.shape[1]):
eigenfaces.append(
eigenfaces_matrix[:,i].astype('uint8').reshape(faces_trainning_normalized[0].shape[0],
faces_trainning_normalized[0].shape[1]
)
)
eigenfaces.reverse()
plot_images(eigenfaces, ['eigenface '+str(i) for i in range(150)], rows=5, cols=10, size=(20,10))
/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:6: ComplexWarning: Casting complex values to real discards the imaginary part
step 6: normalize principal components(eigenvectors) to unit length
v i = v i ∥ v i ∥ v_{i} = \frac{v_{i}}{\parallel v_{i} \parallel} vi=∥vi∥vi
import math
for i in range(principal_components.shape[1]):
eigenvector_length = math.sqrt(np.dot(principal_components[:,i].transpose(), principal_components[:,i]))
principal_components[:,i] = principal_components[:,i]/eigenvector_length
/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:4: ComplexWarning: Casting complex values to real discards the imaginary part
after removing the cwd from sys.path.
step 7: projecting all trianning samples into the PCA subspace
y = W T ( x − μ ) y = W^{T}(x-\mu) y=WT(x−μ)
faces_trainning_PCA = np.dot(principal_components.transpose(), faces_trainning_normalized_matrix.astype('float'))
reconstruction from the PCA basis
x = W y + μ x = Wy + \mu x=Wy+μ
faces_trainning_recovered_matrix = np.dot(principal_components, faces_trainning_PCA) + average_trainning_face_vector.astype('float')
faces_trainning_recovered = [faces_trainning_recovered_matrix[:,i].astype('uint8').reshape(faces_trainning[0].shape[0],faces_trainning[0].shape[1]) for i in range(faces_trainning_recovered_matrix.shape[1])]
plot_images(faces_trainning_recovered, [persons_dict[label] for label in labels_trainning], rows=10)
/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:2: ComplexWarning: Casting complex values to real discards the imaginary part
step 8: projecting the query image samples into the PCA subspace
faces_test_matrix = np.array(faces_test[0].reshape(-1,1))
for i in range(1,len(faces_test)):
faces_test_matrix = np.concatenate(
(faces_test_matrix, faces_test[i].reshape(-1,1)),
axis=1)
faces_test_normalized_matrix = faces_test_matrix - average_trainning_face_vector
faces_test_PCA = np.dot(principal_components.transpose(), faces_test_normalized_matrix.astype('float'))
step 9: finding the nearest neighbor between the projected trainning images and the projected query image
def predict(faces_trainning_PCA, face_test_PCA):
minDist = np.finfo('float').max
label_predict = -1
for i in range(faces_trainning_PCA.shape[1]):
dist = np.sqrt(np.sum(np.power(faces_trainning_PCA[:,i]-face_test_PCA,2)))
if dist < minDist:
minDist = dist
label_predict = labels_trainning[i]
return label_predict
labels_predict = []
for i in range(faces_test_PCA.shape[1]):
labels_predict.append(predict(faces_trainning_PCA, faces_test_PCA[:,i]))
step 10: check the predicted results
plot_images(faces_test, [persons_dict[label] for label in labels_test], rows=1, size=(20,5))
plot_images(faces_test, [persons_dict[label] for label in labels_predict], rows=1, size=(20,5))
Accuracy: 11/15