最近一段时间在做pca降维,就把网络上的资源看了不少,这里做个总结。
http://blog.csdn.net/weixingstudio/article/details/8234766
上面的连接中有我对pca的一个详细的介绍。这个文档里只给出一些其他的自己实现的pca程序。
上面的文档有正确运行pca降维的matlab程序,大家可以参考上面文档里面的。
这个文档里面的程序仅供大家学习研究用。。
一下内容,都来自网络资源。
算法一
这个算法注释很清楚,但是因为我没有具体的学习pca的原理,所以里面的白化矩阵我不知道做什么用的,还望大家多多指教
%程序说明:y = pca(mixedsig),程序中mixedsig为 n*T 阶混合数据矩阵,n为信号个数,T为采样点数
% y为 m*T 阶主分量矩阵。
function y = pca(mixedsig)
if nargin == 0
error('You must supply the mixed data as input argument.');
end
if length(size(mixedsig))>2
error('Input data can not have more than two dimensions. ');
end
if any(any(isnan(mixedsig)))
error('Input data contains NaN''s.');
end
%——————————————去均值————————————
meanValue = mean(mixedsig')';
mixedsig = mixedsig - meanValue * ones(1,size(meanValue,2));
[Dim,NumofSampl] = size(mixedsig);
oldDimension = Dim;
fprintf('Number of signals: %d\n',Dim);
fprintf('Number of samples: %d\n',NumofSampl);
fprintf('Calculate PCA...');
firstEig = 1;
lastEig = Dim;
covarianceMatrix = cov(mixedsig',1); %计算协方差矩阵
[E,D] = eig(covarianceMatrix); %计算协方差矩阵的特征值和特征向量
%———计算协方差矩阵的特征值大于阈值的个数lastEig———
rankTolerance = 1e-5;
maxLastEig = sum(diag(D)) > rankTolerance;
lastEig = maxLastEig;
%——————————降序排列特征值——————————
eigenvalues = flipud(sort(diag(D)));
%—————————去掉较小的特征值——————————
if lastEig < oldDimension
lowerLimitValue = (eigenvalues(lastEig) + eigenvalues(lastEig + 1))/2;
else
lowerLimitValue = eigenvalues(oldDimension) - 1;
end
lowerColumns = diag(D) > lowerLimitValue;
%—————去掉较大的特征值(一般没有这一步)——————
if firstEig > 1
higherLimitValue = (eigenvalues(firstEig - 1) + eigenvalues(firstEig))/2;
else
higherLimitValue = eigenvalues(1) + 1;
end
higherColumns = diag(D) < higherLimitValue;
%—————————合并选择的特征值——————————
selectedColumns =lowerColumns & higherColumns;
%—————————输出处理的结果信息—————————
fprintf('Selected[ %d ] dimensions.\n',sum(selectedColumns));
fprintf('Smallest remaining (non-zero) eigenvalue[ %g ]\n',eigenvalues(lastEig));
fprintf('Largest remaining (non-zero) eigenvalue[ %g ]\n',eigenvalues(firstEig));
fprintf('Sum of removed eigenvalue[ %g ]\n',sum(diag(D) .* (~selectedColumns)));
%———————选择相应的特征值和特征向量———————
E = selcol(E,selectedColumns);
D = selcol(selcol(D,selectedColumns)',selectedColumns);
%——————————计算白化矩阵———————————
whiteningMatrix = inv(sqrt(D)) * E';
dewhiteningMatrix = E * sqrt(D);
%——————————提取主分量————————————
y = whiteningMatrix * mixedsig;
%——————————行选择子程序———————————
function newMatrix = selcol(oldMatrix,maskVector)
if size(maskVector,1)~ = size(oldMatrix,2)
error('The mask vector and matrix are of uncompatible size.');
end
numTaken = 0;
for i = 1:size(maskVector,1)
if maskVector(i,1) == 1
takingMask(1,numTaken + 1) == i;
numTaken = numTaken + 1;
end
end
newMatrix = oldMatrix(:,takingMask);
算法二
这个算法是国外比较详细的算法,还有其他的文件,程序的原名叫:'Eigenface' Face Recognition System
function [m, A, Eigenfaces] = EigenfaceCore(T)
% Use Principle Component Analysis (PCA) to determine the most
% discriminating features between images of faces.
%
% Description: This function gets a 2D matrix, containing all training image vectors
% and returns 3 outputs which are extracted from training database.
%
% Argument: T - A 2D matrix, containing all 1D image vectors.
% Suppose all P images in the training database
% have the same size of MxN. So the length of 1D
% column vectors is M*N and 'T' will be a MNxP 2D matrix.
%
% Returns: m - (M*Nx1) Mean of the training database
% Eigenfaces - (M*Nx(P-1)) Eigen vectors of the covariance matrix of the training database
% A - (M*NxP) Matrix of centered image vectors
%
% See also: EIG
% Original version by Amir Hossein Omidvarnia, October 2007
% Email: aomidvar@ece.ut.ac.ir
%%%%%%%%%%%%%%%%%%%%%%%% Calculating the mean image
m = mean(T,2); % Computing the average face image m = (1/P)*sum(Tj's) (j = 1 : P)
Train_Number = size(T,2);
%%%%%%%%%%%%%%%%%%%%%%%% Calculating the deviation of each image from mean image
A = [];
for i = 1 : Train_Number
temp = double(T(:,i)) - m; % Computing the difference image for each image in the training set Ai = Ti - m
A = [A temp]; % Merging all centered images
end
%%%%%%%%%%%%%%%%%%%%%%%% Snapshot method