机器学习-矩阵分解

提出问题

在一个关于用户和物品评分的矩阵中(如下图所示:用户集U={u1,u2,u3,u4},项目集I={i1,i2,i3,i4}。矩阵中的对应元素代表评分,如:u1对i2的评分为2。?代表用户未对该项目进行评分),往往矩阵中的评分会较为稀疏。其中有很多物品用户并没有对其进行评分,我们的任务就是对这些没有评分的项目进行预测。

i1i2i3i4
u123
u242
u3433
u424

矩阵分解

矩阵分解是推荐系统的一种重要算法,矩阵分解可以理解为将一个矩阵分解为两个或者两个以上的低维矩阵。

假设有一个用户与项目的稀疏矩阵R_{m\times n},其中m表示用户个数,n表示项目个数。

我们现在假设用户对一个项目的喜爱程度会由k个因素决定,P_{m\times k}表示用户对这k个因素的偏好程度,Q_{k\times n}表示项目对这k个因素的满足程度。

我们将此矩阵分解为两个低维矩阵P_{m\times k}Q_{k\times n}的乘积,即R_{m\times n}=P_{m\times k}Q_{k\times n}

通过乘积我们可以得到实际评分值与预测值的差值,并计算损失。

为了使得我们的实际评分值与预测值的差值尽可能小,我们应最小化损失。

在机器学习算法中,在最小化损失函数时,可以通过梯度下降来一步步的求解。

矩阵分解步骤如下:

  1. 确定k值,将矩阵分解为两个低维矩阵P_{m\times k}Q_{k\times n}的乘积。
  2. 计算实际评分值与预测评分值的差值。
  3. 通过差值以及梯度下降调整P_{m\times k}Q_{k\times n}中的值。
  4. 重复2,3步直到到达设定的重复次数。

输入:数据集。

输出:平均绝对误差以及均方根误差。

优化目标:可能没有优化目标。

代码如下:
 

package knn5;

import java.io.*;
import java.util.Random;

public class MatrixFactorization {

	Random rand = new Random();

	int numUsers;

	int numItems;

	int numRatings;

	Triple[] dataset;

	double alpha;

	double lambda;

	int rank;

	double[][] userSubspace;

	double[][] itemSubspace;

	double ratingLowerBound;

	double ratingUpperBound;

	public MatrixFactorization(String paraFilename, int paraNumUsers, int paraNumItems,
			int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound) {
		numUsers = paraNumUsers;
		numItems = paraNumItems;
		numRatings = paraNumRatings;
		ratingLowerBound = paraRatingLowerBound;
		ratingUpperBound = paraRatingUpperBound;

		try {
			readData(paraFilename, paraNumUsers, paraNumItems, paraNumRatings);
		} catch (Exception ee) {
			System.out.println("File " + paraFilename + " cannot be read! " + ee);
			System.exit(0);
		} // Of try
	}// Of the first constructor


	public void setParameters(int paraRank, double paraAlpha, double paraLambda) {
		rank = paraRank;
		alpha = paraAlpha;
		lambda = paraLambda;
	}// Of setParameters

	public void readData(String paraFilename, int paraNumUsers, int paraNumItems,
			int paraNumRatings) throws IOException {
		File tempFile = new File(paraFilename);
		if (!tempFile.exists()) {
			System.out.println("File " + paraFilename + " does not exists.");
			System.exit(0);
		} // Of if
		BufferedReader tempBufferReader = new BufferedReader(new FileReader(tempFile));

		// Allocate space.
		dataset = new Triple[paraNumRatings];
		String tempString;
		String[] tempStringArray;
		for (int i = 0; i < paraNumRatings; i++) {
			tempString = tempBufferReader.readLine();
			tempStringArray = tempString.split(",");
			dataset[i] = new Triple(Integer.parseInt(tempStringArray[0]),
					Integer.parseInt(tempStringArray[1]), Double.parseDouble(tempStringArray[2]));
		} // Of for i

		tempBufferReader.close();
	}// Of readData

	void initializeSubspaces() {
		userSubspace = new double[numUsers][rank];

		for (int i = 0; i < numUsers; i++) {
			for (int j = 0; j < rank; j++) {
				userSubspace[i][j] = rand.nextDouble();
			} // Of for j
		} // Of for i

		itemSubspace = new double[numItems][rank];
		for (int i = 0; i < numItems; i++) {
			for (int j = 0; j < rank; j++) {
				itemSubspace[i][j] = rand.nextDouble();
			} // Of for j
		} // Of for i
	}// Of initializeSubspaces


	public double predict(int paraUser, int paraItem) {
		double resultValue = 0;
		for (int i = 0; i < rank; i++) {
			resultValue += userSubspace[paraUser][i] * itemSubspace[paraItem][i];
		} // Of for i
		return resultValue;
	}// Of predict

	public void train(int paraRounds) {
		initializeSubspaces();

		for (int i = 0; i < paraRounds; i++) {
			updateNoRegular();
			if (i % 50 == 0) {
				// Show the process
				System.out.println("Round " + i);
				System.out.println("MAE: " + mae());
			} // Of if
		} // Of for i
	}// Of train

	
	public void updateNoRegular() {
		for (int i = 0; i < numRatings; i++) {
			int tempUserId = dataset[i].user;
			int tempItemId = dataset[i].item;
			double tempRate = dataset[i].rating;

			double tempResidual = tempRate - predict(tempUserId, tempItemId); // Residual

			// Update user subspace
			double tempValue = 0;
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * itemSubspace[tempItemId][j];
				userSubspace[tempUserId][j] += alpha * tempValue;
			} // Of for j

			// Update item subspace
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * userSubspace[tempUserId][j];

				itemSubspace[tempItemId][j] += alpha * tempValue;
			} // Of for j
		} // Of for i
	}// Of updateNoRegular

	
	public double rsme() {
		double resultRsme = 0;
		int tempTestCount = 0;

		for (int i = 0; i < numRatings; i++) {
			int tempUserIndex = dataset[i].user;
			int tempItemIndex = dataset[i].item;
			double tempRate = dataset[i].rating;

			double tempPrediction = predict(tempUserIndex, tempItemIndex);

			if (tempPrediction < ratingLowerBound) {
				tempPrediction = ratingLowerBound;
			} else if (tempPrediction > ratingUpperBound) {
				tempPrediction = ratingUpperBound;
			} // Of if

			double tempError = tempRate - tempPrediction;
			resultRsme += tempError * tempError;
			tempTestCount++;
		} // Of for i

		return Math.sqrt(resultRsme / tempTestCount);
	}// Of rsme

	public double mae() {
		double resultMae = 0;
		int tempTestCount = 0;

		for (int i = 0; i < numRatings; i++) {
			int tempUserIndex = dataset[i].user;
			int tempItemIndex = dataset[i].item;
			double tempRate = dataset[i].rating;

			double tempPrediction = predict(tempUserIndex, tempItemIndex);

			if (tempPrediction < ratingLowerBound) {
				tempPrediction = ratingLowerBound;
			} // Of if
			if (tempPrediction > ratingUpperBound) {
				tempPrediction = ratingUpperBound;
			} // Of if

			double tempError = tempRate - tempPrediction;

			resultMae += Math.abs(tempError);
			// System.out.println("resultMae: " + resultMae);
			tempTestCount++;
		} // Of for i

		return (resultMae / tempTestCount);
	}// Of mae


	public static void testTrainingTesting(String paraFilename, int paraNumUsers, int paraNumItems,
			int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound,
			int paraRounds) {
		try {
			// Step 1. read the training and testing data
			MatrixFactorization tempMF = new MatrixFactorization(paraFilename, paraNumUsers,
					paraNumItems, paraNumRatings, paraRatingLowerBound, paraRatingUpperBound);

			tempMF.setParameters(5, 0.0001, 0.005);

			// Step 3. update and predict
			System.out.println("Begin Training ! ! !");
			tempMF.train(paraRounds);

			double tempMAE = tempMF.mae();
			double tempRSME = tempMF.rsme();
			System.out.println("Finally, MAE = " + tempMAE + ", RSME = " + tempRSME);
		} catch (Exception e) {
			e.printStackTrace();
		} // Of try
	}// Of testTrainingTesting


	public static void main(String args[]) {
		testTrainingTesting("C:\\Users\\ASUS\\Desktop\\文件\\11.txt",5, 6, 30, 1, 5, 5000);
	}// Of main

	public class Triple {
		public int user;
		public int item;
		public double rating;
		public Triple() {
			user = -1;
			item = -1;
			rating = -1;
		}// Of the first constructor

		public Triple(int paraUser, int paraItem, double paraRating) {
			user = paraUser;
			item = paraItem;
			rating = paraRating;
		}// Of the first constructor

		public String toString() {
			return "" + user + ", " + item + ", " + rating;
		}// Of toString
	}// Of class Triple

}// Of class MatrixFactorization

运行截图:

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值