这是我自己在网上学习的一些关于矩阵分解的知识,不能保证其正确性,谨慎参考。
1、奇异值分解(SVD)是一种正交矩阵分解,它是最可靠的分解法。在学习奇异值分解后,我把这个分解联想到用户评分表上,用来预测用户对资料的评分。P*Q的矩阵被分解成P*N和N*Q两个矩阵,其中N可以看做是“特征”。
2、梯度:在微积分里面,对多元函数的参数求偏导数,把求得的各个参数的偏导数以向量的形式写出来,就是梯度。
3、代码的流程:初始化用户-资料评分矩阵(P*Q),初始化分解的两个矩阵(P*N和N*Q),P*N和N*Q两个矩阵的值是线性随机产生的。采用梯度下降优化P*N和N*Q两个矩阵。
public class myMatrix {
private static int u= ;//用户的个数
private static int m= ;//资料的个数
private static int n= ;//特征的个数
private static double[][] x=new double[u][n];//用户-特征矩阵
private static double[][] thate=new double[m][n];//资料-特征矩阵
private static double[][] x_partial=new double[u][n];//用来存储参数的梯度
private static double[][] thate_partial=new double[m][n];//用来存储参数的梯度
private static double lambda=1;//正则化参数
private static double alpha=0.007;//步长
public static void main(String[] args) throws IOException {
double[][] user_material=new double[u][m];//用户资料评分矩阵
init(user_material);//初始化用户资料评分,数据自己填充吧。
int time=0;
System.out.println();
while (time++<200) {
setThate_partial(user_material);
setX_partial(user_material);
update();
double[][] matrixProduct=matrixProduct(x,thate);
double lossPre=lossFunction(user_material,matrixProduct);
System.out.println("第"+time+"次的损失函数值:"+lossPre);
}
showMatrix(matrixProduct(x, thate));
double[][] matrixProduct=matrixProduct(x,thate);
List users=new ArrayList<user>();
File fileName=new File("F:\\小程序将用户资料评分表转换成矩阵\\表转换成矩阵\\output3.txt");
fileName.createNewFile();
BufferedWriter out=new BufferedWriter(new FileWriter(fileName));
for (int i=0;i<matrixProduct.length;i++){
for (int k=0;k<matrixProduct[0].length;k++){
user user=new user(i+1,k+1,matrixProduct[i][k]);
users.add(user);
out.write(matrixProduct[i][k]+" ");
}
out.write("\r\n");
}
sortGradeMethod(users);
for (int q=1;q<=u;q++) {
System.out.print("第"+q+"个用户输出:");
int p=1;
Iterator iterator = users.iterator();
while (iterator.hasNext()) {
user user1 = (user) iterator.next();
if (user1.getUserId() == q){
System.out.print("("+user1.getMaterialId()+","+")");
p++;
if (p==11){
System.out.println();
break;}
}
}
}
}
//输出矩阵
private static void showMatrix(double[][] a){
for (int i=0;i<a.length;i++){
for (int k=0;k<a[0].length;k++)
if (k==0){
System.out.print(a[i][k]);
}
else if (k==a[0].length-1){
System.out.println(","+a[i][k]);
}
else System.out.print(","+a[i][k]);
}
}
//初始化分解后的两个矩阵
private static void initPQ(){
for (int i=0;i<u;i++) {
for (int k = 0; k < n; k++) {
x[i][k] = Math.random();//随机产生数据
}
}
for (int p=0;p<m;p++){
for (int q=0;q<n;q++){
thate[p][q]=Math.random();
}
}
}
//将分解后的两个矩阵相乘,得到一个新的矩阵
private static double[][] matrixProduct(double[][] a,double[][] b){
double[][] matrixProduct=new double[a.length][b.length];
for (int i=0;i<a.length;i++){
for (int k=0;k<b.length;k++){
double matrix=0;
for (int q=0;q<n;q++){
matrix+=a[i][q]*b[k][q];
}
matrixProduct[i][k]=matrix;
}
}
return matrixProduct;
}
//求X矩阵中参数的梯度
private static void setX_partial(double[][] user_material){
for (int i=0;i<u;i++) {
for (int j = 0; j < n; j++) {
x_partial[i][j] = 0;
for (int k = 0; k < m; k++) {
if (user_material[i][k] != -1) {
x_partial[i][j] += (getPredict(i, k) - user_material[i][k]) * thate[k][j]+lambda * thate[k][j];
}
}
}
}
}
//求Thate矩阵中各参数的偏导数
private static void setThate_partial(double[][] user_material){
for (int i=0;i<m;i++){
for (int j=0;j<n;j++){
thate_partial[i][j]=0;
for (int k=0;k<u;k++){
if (user_material[k][i]!=-1){
thate_partial[i][j]+=(getPredict(k,i)-user_material[k][i])*x[k][j]+lambda*x[k][j];
}
}
}
}
}
//求偏导数要用的
private static double getPredict(int i,int k){
double pre=0;
for (int p=0;p<n;p++){
pre+=x[i][p]*thate[k][p];
}
return pre;
}
//优化分解后的两个矩阵
private static void update() {
for (int i = 0; i < x.length; i++){
for (int j = 0; j < x[0].length; j++) {
x[i][j] -= alpha * x_partial[i][j];
}
}
for (int p=0;p<thate.length;p++){
for (int q=0;q<thate[0].length;q++){
thate[p][q]-=alpha*thate_partial[p][q];
}
}
}
//采用平方损失函数
private static double lossFunction(double[][] a,double[][] b){
double lossPre=0.0;
for (int i=0;i<a.length;i++){
for (int j=0;j<a[0].length;j++){
if (a[i][j]!=-1){
lossPre+=(a[i][j]-b[i][j])*(a[i][j]-b[i][j]);
}
}
}
return lossPre;
}
//根据预测的评分大小将数据排序 private static void sortGradeMethod(List list){ Collections.sort(list, new Comparator() { @Override public int compare(Object o1, Object o2) { user user1= (user) o2; user user2= (user) o1; if (user1.getGrade()>user2.getGrade())return 1; else if (user1.getGrade()==user2.getGrade())return 0; else return -1; } }); } }
public class user { private int userId; private int materialId; private double grade; public user(int userId,int materialId,double grade){ this.userId=userId; this.materialId=materialId; this.grade=grade; } public int getUserId() { return userId; } public void setUserId(int userId) { this.userId = userId; } public int getMaterialId() { return materialId; } public void setMaterialId(int materialId) { this.materialId = materialId; } public double getGrade() { return grade; } public void setGrade(double grade) { this.grade = grade; } }
输出的是每个用户对所有未评分的资料的预测分数值的Top10的编号。