- 算法步骤:
- 1)将原始数据按列组成n行m列矩阵X
- 2)特征中心化。即每一维的数据都减去该维的均值,使每一维的均值都为0
- 3)求出协方差矩阵
- 4)求出协方差矩阵的特征值及对应的特征向量
- 5)将特征向量按对应的特征值大小从上往下按行排列成矩阵,取前k行组成矩阵p
- 6)Y=PX 即为降维到k维后的数据
PCA
public class PCA {
public static DenseMatrix64F runPCA(DenseMatrix64F src,int k) {
DenseMatrix64F rs = new DenseMatrix64F(src.numRows,k);
DenseMatrix64F norm_X = new DenseMatrix64F(src.numRows,src.numCols);
for(int i =0;i<src.numCols;i++) {
double tmp=0;
for(int j=0;j<src.numRows;j++) {
tmp+=src.get(j, i);
}
tmp /=src.numRows;
for(int j=0;j<src.numRows;j++) {
norm_X.set(j,i, src.get(j, i)-tmp);
}
}
DenseMatrix64F norm_X_T = new DenseMatrix64F(src.numCols,src.numRows);
CommonOps.transpose(norm_X, norm_X_T);
DenseMatrix64F scatter_matrix = new DenseMatrix64F(src.numCols,src.numCols);
CommonOps.mult(norm_X_T,norm_X,scatter_matrix);
EDInfo ed = JacobiCount(new DenseMatrix64F(scatter_matrix),0.001,1000);
DenseMatrix64F feature = new DenseMatrix64F(k,src.numCols);
for(int i=0;i<k;i++) {
for(int j=0;j<src.numCols;j++) {
feature.set(i, j, ed.getValues().get(j, i));
}
}
DenseMatrix64F feature_T = new DenseMatrix64F(src.numCols,k);
CommonOps.transpose(feature, feature_T);
CommonOps.mult(norm_X,feature_T,rs);
return rs;
}
public static EDInfo JacobiCount(DenseMatrix64F src, double diff, int iter) {
DenseMatrix64F values = new DenseMatrix64F(src.numRows,src.numCols);
for(int i=0;i<src.numRows;i++) {
for(int j=0;j<src.numCols;j++) {
if(i == j) {
values.set(i, j, 1);
}else {
values.set(i, j, 0);
}
}
}
int nCount = 0;
while(true)
{
double dbMax = Double.MIN_VALUE;
int nRow = 0;
int nCol = 1;
for(int i=0;i<src.numRows;i++) {
for(int j=0;j<src.numCols;j++) {
if(i != j && Math.abs(src.get(i, j)) &g