【机器学习】PCA主成分分析与人脸识别的简单实现_主成分分析算法在人脸识别中的应用代码-CSDN博客

本文链接：https://blog.csdn.net/m0_63567560/article/details/135349664

PCA简介

PCA（主成分分析）为主流的一种线性降维算法。以”最小重构误差“为目标导向，通过降维（投影），用数据中相对重要（最主要）的信息表达（代替）原数据，从而达到降维的目的。

特征维度约简

将高维特征向量映射到低维子空间中

典型的高维特征

基因数据

人脸数据

特征维度约简的目的

可视化：在 2D 或 3D 空间中的可视化

维度约简：高效的存储与检索

噪声消除：提升分类或识别精度

特征约简的方法

主成分分析（PCA）

基本思路

② 计算数据均值并对数据中心化

③ 计算协方差矩阵（散度矩阵）

④ 分解协方差矩阵得到按特征值从大到小排序的特征向量（也可用SVD分解）

⑤ 取出前k个特征向量作为投影，使原数据降维到对应投影方向，实现由原本n维数据降到k维

代码实现

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
 
def trainFaceMat():
    # 将每个人的10张图片取前7张图片读取为训练数据，共280张
    path = "./att_faces/s"
    trainFaceMat = []
    for i in range(1, 41):
        for j in range(1, 8):
            imgPath = path + str(i) + "/" + str(j) + ".pgm"
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 将图片转为灰度图
            img = cv2.imread(imgPath,0)  # 通过opencv读取灰度图片，flag = 0，8位深度，1通道   
            # img = np.resize(img,(img.shape[0]*img.shape[1],1))
            img = np.array(img).reshape(-1,1) #转成n行1列,压缩为1维 112*92 变为10304*1
            trainFaceMat.append(img)
    trainFaceMat = np.array(trainFaceMat)
    trainFaceMat = np.squeeze(trainFaceMat) #去掉维度1
    # trainFaceMat = np.concatenate(trainFaceMat,axis = 1).T # axis=1按照第二个维度叠加，第一个维度保持不变，再取转置 返回280*10304
    return trainFaceMat
    # print(trainFaceMat.shape) # 280*10304
 
def testFaceMat():
    # 将每个人的10张图片取后3张图片读取为训练数据，共120张
    path = "./att_faces/s"
    testFaceMat = []
    for i in range(1, 41):
        for j in range(8, 11):
            imgPath = path + str(i) + "/" + str(j) + ".pgm"
            img = cv2.imread(imgPath,0)  # 通过opencv读取灰度图片
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 将图片转为灰度图
            # img = np.resize(img,(img.shape[0]*img.shape[1],1))
            img = np.array(img).reshape(-1,1) #转成n行1列,压缩为1维 112*92 变为10304*1
            # img = np.resize(img,(1,img.shape[0]*img.shape[1]))
            testFaceMat.append(img)
    testFaceMat = np.array(testFaceMat)
    testFaceMat = np.squeeze(testFaceMat)
    # testFaceMat = np.concatenate(testFaceMat,axis = 1).T # axis=1按照第二个维度叠加，第一个维度保持不变，再取转置 返回280*10304
    return testFaceMat # 120*10304
 
def meanFaceMat():#计算训练样本的平均值矩阵 1*10304
    meanFaceMat = np.mean(trainFaceMat(), axis=0)  # 每一列的和除行数，得到平均值
    meanFaceMat = np.expand_dims(meanFaceMat,axis = 0)
    return meanFaceMat
    # print("meanFaceMat.shape[0] ",meanFaceMat.shape[0]) # 1
    # print("meanFaceMat.shape[1] ",meanFaceMat.shape[1]) # 10304
 
def normTrainFaceMat():# 去除平均值，得到规格化后的训练样本矩阵
    # trainFaceMat 大小为 280 * 10304, meanFaceMat 大小为 1 * 10304
    normTrainFaceMat = trainFaceMat() - meanFaceMat()
    return normTrainFaceMat
    #print(normTrainFaceMat.shape) # 280*10304
 
def eigenface():
    # 计算协方差矩阵,特征值和特征向量
    # normTrainFaceMat = normTrainFaceMat()
    covariance = np.cov(normTrainFaceMat())
    # 求得协方差矩阵的特征值和特征向量
    eigenvalue, featurevector = np.linalg.eig(covariance)
    # 获取特征值按降序排序对应原矩阵的下标
    sorted_Index = np.argsort(eigenvalue)
    # 保留前K个最大的特征值对应的特征向量 k=100
    topk_evecs = featurevector[:,sorted_Index[:-100-1:-1]]
    # 获得训练样本的特征脸空间
    eigenface = np.dot(np.transpose(normTrainFaceMat()), topk_evecs)
    # 计算训练样本在特征脸空间的投影 
    # 训练样本在特征脸空间的投影eigen_train_sample
    eigen_train_sample = np.dot(normTrainFaceMat(), eigenface)
    return eigenface,eigen_train_sample 
    # 返回训练样本的特征脸空间,及训练样本特征脸空间的投影
 
def prediction(eigen_test_sample,eigen_train_sample):
    minDistance = 999999999
    # 计算出每个人的最小距离,如果对检测图进行检测,距离大于最小距离,则表示这张图是这个人
    for j in range(1,eigen_train_sample.shape[0]):
        tmp_distance = np.linalg.norm(eigen_test_sample - eigen_train_sample[j])
        #fact = ( i - i % 3 ) / 3
        if minDistance > tmp_distance:
            predict = (j - j % 7) / 7   #(i - i % 10) / 10
            minDistance = tmp_distance
    return predict
 
def readimagearray():
    # 将所有图片读取到readimage中
    path = "./att_faces/s"
    ImgList = []
    for i in range(1, 41):
        for j in range(1, 11):
            imgPath = path + str(i) + "/" + str(j) + ".pgm"
            img = cv2.imread(imgPath)  # 通过opencv读取图片
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 将图片转为灰度图
            # img = np.resize(img,(img.shape[0]*img.shape[1],1))
            # img = np.array(img).reshape(-1,1)
            ImgList.append(img)
    ImgList = np.array(ImgList)
    return ImgList
 
if __name__ == "__main__":
    # 人脸识别测试
    test = testFaceMat()
    mean = meanFaceMat()
    # 去除平均值，得到规格化后的识别样本矩阵
    normTestFaceMat = test - mean
    # 计算测试样本在特征脸空间的投影
    abc = eigenface()
    eigenface = abc[0]
    eigen_train_sample = abc[1]
    eigen_test_sample = np.dot(normTestFaceMat, eigenface)
    # print('aaaaaaa',eigen_test_sample.shape[0])
    # 计算欧式距离，找到匹配人脸
    right_num = 0
    for i in range(1,eigen_test_sample.shape[0]):
        predict = prediction(eigen_test_sample[i],eigen_train_sample)
        predict = int(predict)
        fact = (i - i % 3) / 3
        if predict == fact:
            right_num += 1
            print(f'第{i+1}张图片预测成功')
            output_path = r'./means_Output3'
            if not os.path.exists(output_path):
                os.mkdir(output_path)
            inputlist = np.array(readimagearray())
            imgfact = np.array(inputlist[int(fact*10)+i%3])
            predictimg = np.array(inputlist[int(fact*10)])
            plt.axis('off')
            plt.imshow(imgfact,cmap="gray")
            plt.savefig('./means_Output3/'+ '第' + str(int(fact)+1) + '个人的第' + str(i%3+1) + '张测试图', bbox_inches='tight', pad_inches=0)
            plt.imshow(predictimg,cmap="gray")
            plt.savefig('./means_Output3/'+ '第' + str(int(fact)+1) + '个人的第' + str(i%3+1) + '张测试图的预测图', bbox_inches='tight', pad_inches=0)
            # plt.show()
            # cv2.waitKey()
        else:
            print(f'第{i+1}张图片预测失败')
            output_path = r'./means_Output4'
            if not os.path.exists(output_path):
                os.mkdir(output_path)
            inputlist = np.array(readimagearray())
            imgfact = np.array(inputlist[int(fact*10)+i%3])
            predictimg = np.array(inputlist[int(predict*10)])
            plt.axis('off')
            plt.imshow(imgfact,cmap="gray")
            plt.savefig('./means_Output4/'+ '第' + str(int(fact)+1) + '个人的第' + str(i%3+1) + '张测试图', bbox_inches='tight', pad_inches=0)
            plt.imshow(predictimg,cmap="gray")
            plt.savefig('./means_Output4/'+ '第' + str(int(fact)+1) + '个人的第' + str(i%3+1) + '张测试图的预测图', bbox_inches='tight', pad_inches=0)
    print("accuracy:%.2f%%" % (right_num / float(eigen_test_sample.shape[0]) * 100))

取k=100时人脸特征如下