前面讲了许多PLS的性质,这里介绍一下PLS的matlab自带的PLS代码,对PLS有一个全面的认识
matlab自带的plsregress函数实现了SIMPLS算法,主要是对X0和Y0的协方差矩阵不断做正交分解
function [Xloadings,Yloadings,Xscores,Yscores, ...
beta,pctVar,mse,stats] = plsregress(X,Y,ncomp,varargin)
%PLSREGRESS Partial least squares regression.
....
% Center both predictors and response, and do PLS
%对X,Y进行中心化,在模型中去掉截距项
meanX = mean(X,1);
meanY = mean(Y,1);
X0 = bsxfun(@minus, X, meanX);
Y0 = bsxfun(@minus, Y, meanY);
% 根据输出的需要,返回不同的参数,下面重点关注SIMPLS,
%这是Dejone在九几年提出的,matlab用这个算法来实现PLS
,可见这个算法的意义。
if nargout <= 2
[Xloadings,Yloadings] = simpls(X0,Y0,ncomp);
elseif nargout <= 4
[Xloadings,Yloadings,Xscores,Yscores] = simpls(X0,Y0,ncomp);
....
end
%------------------------------------------------------------------------------
%SIMPLS Basic SIMPLS. Performs no error checking.
function [Xloadings,Yloadings,Xscores,Yscores,Weights] = simpls(X0,Y0,ncomp)
[n,dx] = size(X0);
dy = size(Y0,2);
% An orthonormal basis for the span of the X loadings, to make the successive
% deflation X0'*Y0 simple - each new basis vector can be removed from Cov
% separately.
%计算协方差矩阵
Cov = X0'*Y0;
for i = 1:ncomp
% Find unit length ti=X0*ri and ui=Y0*ci whose covariance, ri'*X0'*Y0*ci, is
% jointly maximized, subject to ti'*tj=0 for j=1:(i-1).
%计算协方差的主成分方向
[ri,si,ci] = svd(Cov,'econ');
ri = ri(:,1); ci = ci(:,1); si = si(1);
%计算得分
ti = X0*ri;
normti = norm(ti); ti = ti ./ normti; % ti'*ti == 1
%计算载荷
Xloadings(:,i) = X0'*ti;
qi = si*ci/normti; % = Y0'*ti
Yloadings(:,i) = qi;
if nargout > 2
Xscores(:,i) = ti;
Yscores(:,i) = Y0*qi; % = Y0*(Y0'*ti), and proportional to Y0*ci
if nargout > 4
Weights(:,i) = ri ./ normti; % rescaled to make ri'*X0'*X0*ri == ti'*ti == 1
end
end
% Update the orthonormal basis with modified Gram Schmidt (more stable),
% repeated twice (ditto).
vi = Xloadings(:,i);
for repeat = 1:2
for j = 1:i-1
vj = V(:,j);
vi = vi - (vj'*vi)*vj;
end
end
vi = vi ./ norm(vi);
V(:,i) = vi;
% Deflate Cov, i.e. project onto the ortho-complement of the X loadings.
% First remove projections along the current basis vector, then remove any
% component along previous basis vectors that's crept in as noise from
% previous deflations.
%更新协方差矩阵
Cov = Cov - vi*(vi'*Cov);
Vi = V(:,1:i);
Cov = Cov - Vi*(Vi'*Cov);
end
if nargout > 2
% By convention, orthogonalize the Y scores w.r.t. the preceding Xscores,
% i.e. XSCORES'*YSCORES will be lower triangular. This gives, in effect, only
% the "new" contribution to the Y scores for each PLS component. It is also
% consistent with the PLS-1/PLS-2 algorithms, where the Y scores are computed
% as linear combinations of a successively-deflated Y0. Use modified
% Gram-Schmidt, repeated twice.
for i = 1:ncomp
ui = Yscores(:,i);
for repeat = 1:2
for j = 1:i-1
tj = Xscores(:,j);
ui = ui - (tj'*ui)*tj;
end
end
Yscores(:,i) = ui;
end
end
%------------------------------------------------------------------------------
%PLSCV Efficient cross-validation for X and Y mean squared error in PLS.
function mse = plscv(X,Y,ncomp,cvp,mcreps,ParOptions)
[n,dx] = size(X);
% Return error for as many components as asked for; some columns may be NaN
% if ncomp is too large for CV.
mse = NaN(2,ncomp+1);
% The CV training sets are smaller than the full data; may not be able to fit as
% many PLS components. Do the best we can.
if isa(cvp,'cvpartition')
cvpType = 'partition';
maxncomp = min(min(cvp.TrainSize)-1,dx);
nTest = sum(cvp.TestSize);
else
cvpType = 'Kfold';
% maxncomp = min(min( floor((n*(cvp-1)/cvp)-1), dx));
maxncomp = min( floor((n*(cvp-1)/cvp)-1), dx);
nTest = n;
end
if ncomp > maxncomp
warning(message('stats:plsregress:MaxComponentsCV', maxncomp));
ncomp = maxncomp;
end
% Cross-validate sum of squared errors for models with 1:ncomp components,
% simultaneously. Sum the SSEs over CV sets, and compute the mean squared
% error
CVfun = @(Xtr,Ytr,Xtst,Ytst) sseCV(Xtr,Ytr,Xtst,Ytst,ncomp);
sumsqerr = crossval(CVfun,X,Y,cvpType,cvp,'mcreps',mcreps,'options',ParOptions);
mse(:,1:ncomp+1) = reshape(sum(sumsqerr,1)/(nTest*mcreps), [2,ncomp+1]);
%------------------------------------------------------------------------------
%SSECV Sum of squared errors for cross-validation
function sumsqerr = sseCV(Xtrain,Ytrain,Xtest,Ytest,ncomp)
XmeanTrain = mean(Xtrain);
YmeanTrain = mean(Ytrain);
X0train = bsxfun(@minus, Xtrain, XmeanTrain);
Y0train = bsxfun(@minus, Ytrain, YmeanTrain);
% Get and center the test data
X0test = bsxfun(@minus, Xtest, XmeanTrain);
Y0test = bsxfun(@minus, Ytest, YmeanTrain);
% Fit the full model, models with 1:(ncomp-1) components are nested within
[Xloadings,Yloadings,~,~,Weights] = simpls(X0train,Y0train,ncomp);
XscoresTest = X0test * Weights;
% Return error for as many components as the asked for.
outClass = superiorfloat(Xtrain,Ytrain);
sumsqerr = zeros(2,ncomp+1,outClass); % this will get reshaped to a row by CROSSVAL
% Sum of squared errors for the null model
sumsqerr(1,1) = sum(sum(abs(X0test).^2, 2));
sumsqerr(2,1) = sum(sum(abs(Y0test).^2, 2));
% Compute sum of squared errors for models with 1:ncomp components
for i = 1:ncomp
X0reconstructed = XscoresTest(:,1:i) * Xloadings(:,1:i)';
sumsqerr(1,i+1) = sum(sum(abs(X0test - X0reconstructed).^2, 2));
Y0reconstructed = XscoresTest(:,1:i) * Yloadings(:,1:i)';
sumsqerr(2,i+1) = sum(sum(abs(Y0test - Y0reconstructed).^2, 2));
end